From 135f1d5d650cfc29d64bb83cfcaafcdea883b0b1 Mon Sep 17 00:00:00 2001 From: Guillaume Gelin Date: Fri, 8 May 2026 17:08:15 +0200 Subject: [PATCH] Snowflake: Add support for CREATE FILE FORMAT This PR adds a `Statement::CreateFileFormat` variant and parses the full grammar: `CREATE [ OR REPLACE ] [ { TEMP | TEMPORARY | VOLATILE } ] FILE FORMAT [ IF NOT EXISTS ] [ formatTypeOptions ] [ COMMENT = '' ]` Format options are stored as `KeyValueOptions`, with `COMMENT` split out into its own `Option` field to match `CREATE STAGE`. `IDENTIFIER(?)` style names are handled via `parse_object_name(true)`. Closes #2070. --- src/ast/mod.rs | 47 +++++++++++++ src/ast/spans.rs | 2 + src/dialect/snowflake.rs | 33 +++++++++ tests/sqlparser_snowflake.rs | 133 +++++++++++++++++++++++++++++++++++ 4 files changed, 215 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c0826f2008..4fda940c5e 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4487,6 +4487,28 @@ pub enum Statement { comment: Option, }, /// ```sql + /// CREATE [ OR REPLACE ] [ { TEMP | TEMPORARY | VOLATILE } ] FILE FORMAT [ IF NOT EXISTS ] + /// [ TYPE = { CSV | JSON | AVRO | ORC | PARQUET | XML } [ formatTypeOptions ] ] + /// [ COMMENT = '' ] + /// ``` + /// See + CreateFileFormat { + /// `OR REPLACE` flag. + or_replace: bool, + /// Whether file format is temporary. + temporary: bool, + /// Whether file format is volatile. + volatile: bool, + /// `IF NOT EXISTS` flag. + if_not_exists: bool, + /// File format name. + name: ObjectName, + /// Format type options (e.g. `TYPE`, `FIELD_DELIMITER`, `COMPRESSION`, ...). + options: KeyValueOptions, + /// Optional comment. + comment: Option, + }, + /// ```sql /// ASSERT [AS ] /// ``` Assert { @@ -6171,6 +6193,31 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::CreateFileFormat { + or_replace, + temporary, + volatile, + if_not_exists, + name, + options, + comment, + } => { + write!( + f, + "CREATE {or_replace}{temp}{volatile}FILE FORMAT {if_not_exists}{name}", + or_replace = if *or_replace { "OR REPLACE " } else { "" }, + temp = if *temporary { "TEMPORARY " } else { "" }, + volatile = if *volatile { "VOLATILE " } else { "" }, + if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, + )?; + if !options.options.is_empty() { + write!(f, " {options}")?; + } + if let Some(comment) = comment { + write!(f, " COMMENT='{}'", comment)?; + } + Ok(()) + } Statement::CopyIntoSnowflake { kind, into, diff --git a/src/ast/spans.rs b/src/ast/spans.rs index f6ba895478..31c5c57da4 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -297,6 +297,7 @@ impl Spanned for Values { /// - [Statement::CreateProcedure] /// - [Statement::CreateMacro] /// - [Statement::CreateStage] +/// - [Statement::CreateFileFormat] /// - [Statement::Assert] /// - [Statement::Grant] /// - [Statement::Revoke] @@ -457,6 +458,7 @@ impl Spanned for Statement { Statement::CreateProcedure { .. } => Span::empty(), Statement::CreateMacro { .. } => Span::empty(), Statement::CreateStage { .. } => Span::empty(), + Statement::CreateFileFormat { .. } => Span::empty(), Statement::Assert { .. } => Span::empty(), Statement::Grant { .. } => Span::empty(), Statement::Deny { .. } => Span::empty(), diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index fda5b7b976..f6cc14a4a0 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -326,6 +326,10 @@ impl Dialect for SnowflakeDialect { ); } else if parser.parse_keyword(Keyword::DATABASE) { return Some(parse_create_database(or_replace, transient, parser)); + } else if parser.parse_keywords(&[Keyword::FILE, Keyword::FORMAT]) { + return Some(parse_create_file_format( + or_replace, temporary, volatile, parser, + )); } else { // need to go back with the cursor let mut back = 1; @@ -1253,6 +1257,35 @@ pub fn parse_create_stage( }) } +/// Parse a Snowflake `CREATE FILE FORMAT` statement. +/// See +pub fn parse_create_file_format( + or_replace: bool, + temporary: bool, + volatile: bool, + parser: &mut Parser, +) -> Result { + let if_not_exists = parser.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let name = parser.parse_object_name(true)?; + let options = parser.parse_key_value_options(false, &[Keyword::COMMENT])?; + let comment = if parser.parse_keyword(Keyword::COMMENT) { + parser.expect_token(&Token::Eq)?; + Some(parser.parse_comment_value()?) + } else { + None + }; + + Ok(Statement::CreateFileFormat { + or_replace, + temporary, + volatile, + if_not_exists, + name, + options, + comment, + }) +} + pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result { let mut ident = String::new(); while let Some(next_token) = parser.next_token_no_skip() { diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index e75dcbf786..192ff85cca 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -479,6 +479,16 @@ fn test_snowflake_create_invalid_temporal_table() { ); } +#[test] +fn test_snowflake_create_invalid_temporal_file_format() { + assert_eq!( + snowflake().parse_sql_statements("CREATE TEMPORARY VOLATILE FILE FORMAT my_fmt"), + Err(ParserError::ParserError( + "Expected: an object type after CREATE, found: FILE".to_string() + )) + ); +} + #[test] fn test_snowflake_create_table_if_not_exists() { match snowflake().verified_stmt("CREATE TABLE IF NOT EXISTS my_table (a INT)") { @@ -2160,6 +2170,129 @@ fn test_create_stage_with_copy_options() { assert_eq!(snowflake().verified_stmt(sql).to_string(), sql); } +#[test] +fn test_create_file_format() { + let sql = "CREATE FILE FORMAT my_fmt"; + match snowflake().verified_stmt(sql) { + Statement::CreateFileFormat { + or_replace, + temporary, + volatile, + if_not_exists, + name, + options, + comment, + } => { + assert!(!or_replace); + assert!(!temporary); + assert!(!volatile); + assert!(!if_not_exists); + assert_eq!("my_fmt", name.to_string()); + assert!(options.options.is_empty()); + assert!(comment.is_none()); + } + _ => unreachable!(), + }; + assert_eq!(snowflake().verified_stmt(sql).to_string(), sql); + + let extended_sql = concat!( + "CREATE OR REPLACE TEMPORARY FILE FORMAT IF NOT EXISTS my_fmt ", + "COMMENT='some-comment'" + ); + match snowflake().verified_stmt(extended_sql) { + Statement::CreateFileFormat { + or_replace, + temporary, + if_not_exists, + name, + comment, + .. + } => { + assert!(or_replace); + assert!(temporary); + assert!(if_not_exists); + assert_eq!("my_fmt", name.to_string()); + assert_eq!("some-comment", comment.unwrap()); + } + _ => unreachable!(), + }; + assert_eq!( + snowflake().verified_stmt(extended_sql).to_string(), + extended_sql + ); +} + +#[test] +fn test_create_file_format_with_options() { + let sql = concat!( + "CREATE FILE FORMAT my_fmt ", + "TYPE=CSV FIELD_DELIMITER='|' SKIP_HEADER=1 COMPRESSION=GZIP" + ); + match snowflake().verified_stmt(sql) { + Statement::CreateFileFormat { options, .. } => { + assert!(options.options.contains(&KeyValueOption { + option_name: "TYPE".to_string(), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("CSV".to_string()).with_empty_span() + ), + })); + assert!(options.options.contains(&KeyValueOption { + option_name: "FIELD_DELIMITER".to_string(), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("|".to_string()).with_empty_span() + ), + })); + assert!(options.options.contains(&KeyValueOption { + option_name: "SKIP_HEADER".to_string(), + option_value: KeyValueOptionKind::Single( + Value::Number("1".parse().unwrap(), false).with_empty_span() + ), + })); + assert!(options.options.contains(&KeyValueOption { + option_name: "COMPRESSION".to_string(), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("GZIP".to_string()).with_empty_span() + ), + })); + } + _ => unreachable!(), + }; + assert_eq!(snowflake().verified_stmt(sql).to_string(), sql); +} + +#[test] +fn test_create_file_format_volatile() { + let sql = "CREATE VOLATILE FILE FORMAT my_fmt TYPE=JSON STRIP_OUTER_ARRAY=true"; + match snowflake().verified_stmt(sql) { + Statement::CreateFileFormat { + temporary, + volatile, + options, + .. + } => { + assert!(!temporary); + assert!(volatile); + assert!(options.options.contains(&KeyValueOption { + option_name: "STRIP_OUTER_ARRAY".to_string(), + option_value: KeyValueOptionKind::Single(Value::Boolean(true).with_empty_span()), + })); + } + _ => unreachable!(), + }; + assert_eq!(snowflake().verified_stmt(sql).to_string(), sql); +} + +#[test] +fn test_create_file_format_with_identifier_function() { + // The Snowflake driver emits `CREATE TEMP FILE FORMAT identifier(?) ...` when + // uploading pandas DataFrames. `TEMP` is an alias of `TEMPORARY` and the name + // is a call to the `IDENTIFIER` function with a bind parameter. + snowflake().one_statement_parses_to( + "CREATE TEMP FILE FORMAT identifier(?) TYPE=PARQUET COMPRESSION=auto", + "CREATE TEMPORARY FILE FORMAT identifier(?) TYPE=PARQUET COMPRESSION=auto", + ); +} + #[test] fn test_copy_into() { let sql = concat!(