STET

agent_patch

runs/2026-02-28__18-27-22__gpt-5-1-codex-mini/flux-pr-2170/flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-1-codex-mini/agent-logs/agent.patch

16613 bytes

Back to adjudication
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-1-codex-mini/app/changelog/0.61.0.md b/app/changelog/0.61.0.md
index 86a9701..428789b 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-1-codex-mini/app/changelog/0.61.0.md
+++ b/app/changelog/0.61.0.md
@@ -93,6 +93,7 @@ This release consists of 66 commits from 22 contributors. See credits at the end
 - PostgreSQL: Support PostgreSQL ANALYZE with optional table and column [#2187](https://github.com/apache/datafusion-sqlparser-rs/pull/2187) (guan404ming)
 - Add Tokenizer custom token mapper support [#2184](https://github.com/apache/datafusion-sqlparser-rs/pull/2184) (askalt)
 - Fix MAP literals parsing [#2205](https://github.com/apache/datafusion-sqlparser-rs/pull/2205) (Samyak2)
+- Databricks: Support the OPTIMIZE statement (with `FULL`, `WHERE`, and `ZORDER BY`), partitioning columns without specified types, and struct field definitions using colon syntax [#2170](https://github.com/apache/datafusion-sqlparser-rs/pull/2170) (funcpp)
 
 ## Credits
 
@@ -124,4 +125,3 @@ Thank you to everyone who contributed to this release. Here is a breakdown of co
 ```
 
 Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release.
-
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-1-codex-mini/app/src/ast/mod.rs b/app/src/ast/mod.rs
index 010a818..1de8db8 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-1-codex-mini/app/src/ast/mod.rs
+++ b/app/src/ast/mod.rs
@@ -4580,12 +4580,16 @@ pub enum Statement {
     },
     /// ```sql
     /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
+    /// OPTIMIZE [db.]name [FULL] [WHERE <predicate>] [ZORDER BY (...)]
     /// ```
     ///
     /// See ClickHouse <https://clickhouse.com/docs/en/sql-reference/statements/optimize>
+    /// See Databricks <https://docs.databricks.com/en/sql/language-manual/sql-ref-statements/optimize.html>
     OptimizeTable {
         /// Table name to optimize.
         name: ObjectName,
+        /// Whether the original statement included the `TABLE` keyword.
+        has_table_keyword: bool,
         /// Optional cluster identifier.
         on_cluster: Option<Ident>,
         /// Optional partition spec.
@@ -4594,6 +4598,12 @@ pub enum Statement {
         include_final: bool,
         /// Optional deduplication settings.
         deduplicate: Option<Deduplicate>,
+        /// Whether `FULL` was specified (Databricks).
+        include_full: bool,
+        /// Optional predicate clause for Databricks.
+        predicate: Option<Expr>,
+        /// Optional `ZORDER BY` expressions for Databricks.
+        zorder_by: Option<Vec<OrderByExpr>>,
     },
     /// ```sql
     /// LISTEN
@@ -6069,12 +6079,20 @@ impl fmt::Display for Statement {
             }
             Statement::OptimizeTable {
                 name,
+                has_table_keyword,
                 on_cluster,
                 partition,
                 include_final,
                 deduplicate,
+                include_full,
+                predicate,
+                zorder_by,
             } => {
-                write!(f, "OPTIMIZE TABLE {name}")?;
+                if *has_table_keyword {
+                    write!(f, "OPTIMIZE TABLE {name}")?;
+                } else {
+                    write!(f, "OPTIMIZE {name}")?;
+                }
                 if let Some(on_cluster) = on_cluster {
                     write!(f, " ON CLUSTER {on_cluster}")?;
                 }
@@ -6087,6 +6105,15 @@ impl fmt::Display for Statement {
                 if let Some(deduplicate) = deduplicate {
                     write!(f, " {deduplicate}")?;
                 }
+                if *include_full {
+                    write!(f, " FULL")?;
+                }
+                if let Some(predicate) = predicate {
+                    write!(f, " WHERE {predicate}")?;
+                }
+                if let Some(zorder_by) = zorder_by {
+                    write!(f, " ZORDER BY ({})", display_comma_separated(zorder_by))?;
+                }
                 Ok(())
             }
             Statement::LISTEN { channel } => {
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-1-codex-mini/app/src/dialect/databricks.rs b/app/src/dialect/databricks.rs
index e903b07..b5207ca 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-1-codex-mini/app/src/dialect/databricks.rs
+++ b/app/src/dialect/databricks.rs
@@ -71,6 +71,14 @@ impl Dialect for DatabricksDialect {
         true
     }
 
+    fn supports_optimize_without_table_keyword(&self) -> bool {
+        true
+    }
+
+    fn supports_partitioned_by_without_datatypes(&self) -> bool {
+        true
+    }
+
     /// See <https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-comment>
     fn supports_nested_comments(&self) -> bool {
         true
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-1-codex-mini/app/src/dialect/mod.rs b/app/src/dialect/mod.rs
index d0b87d9..32cf120 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-1-codex-mini/app/src/dialect/mod.rs
+++ b/app/src/dialect/mod.rs
@@ -1521,6 +1521,20 @@ pub trait Dialect: Debug + Any {
         false
     }
 
+    /// Returns true if this dialect supports the `OPTIMIZE` statement without the `TABLE` keyword.
+    ///
+    /// [Databricks](https://docs.databricks.com/en/sql/language-manual/sql-ref-statements/optimize.html)
+    fn supports_optimize_without_table_keyword(&self) -> bool {
+        false
+    }
+
+    /// Returns true if this dialect allows `PARTITIONED BY` columns without an explicit data type.
+    ///
+    /// [Databricks](https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-table.html)
+    fn supports_partitioned_by_without_datatypes(&self) -> bool {
+        false
+    }
+
     /// Returns true if this dialect supports the `INSTALL` statement.
     ///
     /// Example:
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-1-codex-mini/app/src/parser/mod.rs b/app/src/parser/mod.rs
index 0b152f2..547b511 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-1-codex-mini/app/src/parser/mod.rs
+++ b/app/src/parser/mod.rs
@@ -694,7 +694,10 @@ impl<'a> Parser<'a> {
                 Keyword::INSTALL if self.dialect.supports_install() => self.parse_install(),
                 Keyword::LOAD => self.parse_load(),
                 // `OPTIMIZE` is clickhouse specific https://clickhouse.tech/docs/en/sql-reference/statements/optimize/
-                Keyword::OPTIMIZE if self.dialect.supports_optimize_table() => {
+                Keyword::OPTIMIZE
+                    if self.dialect.supports_optimize_table()
+                        || self.dialect.supports_optimize_without_table_keyword() =>
+                {
                     self.parse_optimize_table()
                 }
                 // `COMMENT` is snowflake specific https://docs.snowflake.com/en/sql-reference/sql/comment
@@ -3391,17 +3394,23 @@ impl<'a> Parser<'a> {
     ) -> Result<(StructField, MatchedTrailingBracket), ParserError> {
         // Look beyond the next item to infer whether both field name
         // and type are specified.
-        let is_anonymous_field = !matches!(
-            (self.peek_nth_token(0).token, self.peek_nth_token(1).token),
-            (Token::Word(_), Token::Word(_))
+        let next_tokens = (
+            self.peek_nth_token(0).token,
+            self.peek_nth_token(1).token,
+        );
+        let is_named_field = matches!(
+            next_tokens,
+            (Token::Word(_), Token::Word(_)) | (Token::Word(_), Token::Colon)
         );
 
-        let field_name = if is_anonymous_field {
-            None
-        } else {
+        let field_name = if is_named_field {
             Some(self.parse_identifier()?)
+        } else {
+            None
         };
 
+        let _ = self.consume_token(&Token::Colon);
+
         let (field_type, trailing_bracket) = self.parse_data_type_helper()?;
 
         let options = self.maybe_parse_options(Keyword::OPTIONS)?;
@@ -7985,7 +7994,14 @@ impl<'a> Parser<'a> {
     pub fn parse_hive_distribution(&mut self) -> Result<HiveDistributionStyle, ParserError> {
         if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) {
             self.expect_token(&Token::LParen)?;
-            let columns = self.parse_comma_separated(Parser::parse_column_def)?;
+            let columns = if self
+                .dialect
+                .supports_partitioned_by_without_datatypes()
+            {
+                self.parse_comma_separated(Parser::parse_column_def_allow_missing_datatype)?
+            } else {
+                self.parse_comma_separated(Parser::parse_column_def)?
+            };
             self.expect_token(&Token::RParen)?;
             Ok(HiveDistributionStyle::PARTITIONED { columns })
         } else {
@@ -8809,8 +8825,22 @@ impl<'a> Parser<'a> {
 
     /// Parse column definition.
     pub fn parse_column_def(&mut self) -> Result<ColumnDef, ParserError> {
+        self.parse_column_def_internal(false)
+    }
+
+    /// Parse a column definition where the data type may be omitted.
+    pub fn parse_column_def_allow_missing_datatype(&mut self) -> Result<ColumnDef, ParserError> {
+        self.parse_column_def_internal(true)
+    }
+
+    fn parse_column_def_internal(
+        &mut self,
+        allow_missing_datatype: bool,
+    ) -> Result<ColumnDef, ParserError> {
         let col_name = self.parse_identifier()?;
-        let data_type = if self.is_column_type_sqlite_unspecified() {
+        let skip_data_type = allow_missing_datatype
+            && matches!(self.peek_token().token, Token::Comma | Token::RParen);
+        let data_type = if skip_data_type || self.is_column_type_sqlite_unspecified() {
             DataType::Unspecified
         } else {
             self.parse_data_type()?
@@ -18485,8 +18515,45 @@ impl<'a> Parser<'a> {
     /// ```
     /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize)
     pub fn parse_optimize_table(&mut self) -> Result<Statement, ParserError> {
-        self.expect_keyword_is(Keyword::TABLE)?;
+        let has_table_keyword = self.parse_keyword(Keyword::TABLE);
+        let supports_optimize_table = self.dialect.supports_optimize_table();
+        let supports_databricks_optimize = self.dialect.supports_optimize_without_table_keyword();
+
+        if supports_optimize_table && !has_table_keyword {
+            return self.expected("TABLE", self.peek_token());
+        }
+
         let name = self.parse_object_name(false)?;
+
+        if supports_databricks_optimize {
+            let include_full = self.parse_keyword(Keyword::FULL);
+            let predicate = if self.parse_keyword(Keyword::WHERE) {
+                Some(self.parse_expr()?)
+            } else {
+                None
+            };
+            let zorder_by = if self.parse_keywords(&[Keyword::ZORDER, Keyword::BY]) {
+                self.expect_token(&Token::LParen)?;
+                let sorting = self.parse_comma_separated(Parser::parse_order_by_expr)?;
+                self.expect_token(&Token::RParen)?;
+                Some(sorting)
+            } else {
+                None
+            };
+
+            return Ok(Statement::OptimizeTable {
+                name,
+                has_table_keyword,
+                on_cluster: None,
+                partition: None,
+                include_final: false,
+                deduplicate: None,
+                include_full,
+                predicate,
+                zorder_by,
+            });
+        }
+
         let on_cluster = self.parse_optional_on_cluster()?;
 
         let partition = if self.parse_keyword(Keyword::PARTITION) {
@@ -18512,10 +18579,14 @@ impl<'a> Parser<'a> {
 
         Ok(Statement::OptimizeTable {
             name,
+            has_table_keyword,
             on_cluster,
             partition,
             include_final,
             deduplicate,
+            include_full: false,
+            predicate: None,
+            zorder_by: None,
         })
     }
 
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-1-codex-mini/app/tests/sqlparser_databricks.rs b/app/tests/sqlparser_databricks.rs
index b088afd..db66f73 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-1-codex-mini/app/tests/sqlparser_databricks.rs
+++ b/app/tests/sqlparser_databricks.rs
@@ -325,6 +325,39 @@ fn parse_databricks_struct_function() {
     );
 }
 
+#[test]
+fn parse_databricks_struct_datatype() {
+    match databricks()
+        .verified_stmt("CREATE TABLE foo (x STRUCT<name: INT, age: INT>)")
+    {
+        Statement::CreateTable(CreateTable { columns, .. }) => {
+            assert_eq!(
+                columns,
+                vec![ColumnDef {
+                    name: Ident::new("x"),
+                    data_type: DataType::Struct(
+                        vec![
+                            StructField {
+                                field_name: Some(Ident::new("name")),
+                                field_type: DataType::Int(None),
+                                options: None
+                            },
+                            StructField {
+                                field_name: Some(Ident::new("age")),
+                                field_type: DataType::Int(None),
+                                options: None
+                            },
+                        ],
+                        StructBracketKind::AngleBrackets
+                    ),
+                    options: vec![],
+                }]
+            );
+        }
+        stmt => panic!("Unexpected statement: {stmt:?}"),
+    }
+}
+
 #[test]
 fn data_type_timestamp_ntz() {
     // Literal
@@ -390,3 +423,73 @@ fn parse_table_time_travel() {
         .parse_sql_statements("SELECT 1 FROM t1 VERSION AS OF 1 - 2",)
         .is_err())
 }
+
+#[test]
+fn parse_databricks_partitioned_by_optional_datatypes() {
+    match databricks()
+        .verified_stmt("CREATE TABLE foo (id INT) PARTITIONED BY (dt, y INT)")
+    {
+        Statement::CreateTable(CreateTable {
+            hive_distribution,
+            ..
+        }) => match hive_distribution {
+            HiveDistributionStyle::PARTITIONED { columns } => {
+                assert_eq!(
+                    columns,
+                    vec![
+                        ColumnDef {
+                            name: Ident::new("dt"),
+                            data_type: DataType::Unspecified,
+                            options: vec![],
+                        },
+                        ColumnDef {
+                            name: Ident::new("y"),
+                            data_type: DataType::Int(None),
+                            options: vec![],
+                        },
+                    ]
+                );
+            }
+            _ => panic!("Expected partitioned representation"),
+        },
+        stmt => panic!("Unexpected statement: {stmt:?}"),
+    }
+}
+
+#[test]
+fn parse_databricks_optimize() {
+    match databricks().verified_stmt(
+        "OPTIMIZE foo FULL WHERE dt = '2024-01-01' ZORDER BY (col1, col2)",
+    ) {
+        Statement::OptimizeTable {
+            name,
+            has_table_keyword,
+            include_full,
+            predicate,
+            zorder_by,
+            ..
+        } => {
+            assert_eq!(name.to_string(), "foo");
+            assert!(!has_table_keyword);
+            assert!(include_full);
+            assert_eq!(
+                predicate,
+                Some(Expr::BinaryOp {
+                    left: Box::new(Expr::Identifier(Ident::new("dt"))),
+                    op: BinaryOperator::Eq,
+                    right: Box::new(Expr::Value(
+                        (Value::SingleQuotedString("2024-01-01".to_string())).with_empty_span()
+                    ))
+                })
+            );
+            assert_eq!(
+                zorder_by,
+                Some(vec![
+                    OrderByExpr::from(Ident::new("col1")),
+                    OrderByExpr::from(Ident::new("col2")),
+                ])
+            );
+        }
+        stmt => panic!("Unexpected statement: {stmt:?}"),
+    }
+}