STET

agent_patch

runs/2026-02-28__18-27-22__gpt-5-3-codex/flux-pr-2170/flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-3-codex/agent-logs/agent.patch

13087 bytes

Back to adjudication
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-3-codex/app/src/ast/mod.rs b/app/src/ast/mod.rs
index 010a818..82ea3c4 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-3-codex/app/src/ast/mod.rs
+++ b/app/src/ast/mod.rs
@@ -4579,11 +4579,19 @@ pub enum Statement {
         options: Vec<CopyLegacyOption>,
     },
     /// ```sql
+    /// -- ClickHouse
     /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
+    ///
+    /// -- Databricks
+    /// OPTIMIZE [db.]name [WHERE predicate] [ZORDER BY (col1 [, ...])]
     /// ```
     ///
-    /// See ClickHouse <https://clickhouse.com/docs/en/sql-reference/statements/optimize>
+    /// See:
+    /// - ClickHouse <https://clickhouse.com/docs/en/sql-reference/statements/optimize>
+    /// - Databricks <https://docs.databricks.com/en/sql/language-manual/delta-optimize.html>
     OptimizeTable {
+        /// Whether the `TABLE` keyword was present.
+        table_keyword: bool,
         /// Table name to optimize.
         name: ObjectName,
         /// Optional cluster identifier.
@@ -4594,6 +4602,10 @@ pub enum Statement {
         include_final: bool,
         /// Optional deduplication settings.
         deduplicate: Option<Deduplicate>,
+        /// Optional `WHERE` predicate (Databricks).
+        predicate: Option<Expr>,
+        /// Optional `ZORDER BY (...)` expressions (Databricks).
+        zorder_by: Vec<Expr>,
     },
     /// ```sql
     /// LISTEN
@@ -6068,13 +6080,20 @@ impl fmt::Display for Statement {
                 Ok(())
             }
             Statement::OptimizeTable {
+                table_keyword,
                 name,
                 on_cluster,
                 partition,
                 include_final,
                 deduplicate,
+                predicate,
+                zorder_by,
             } => {
-                write!(f, "OPTIMIZE TABLE {name}")?;
+                write!(f, "OPTIMIZE")?;
+                if *table_keyword {
+                    write!(f, " TABLE")?;
+                }
+                write!(f, " {name}")?;
                 if let Some(on_cluster) = on_cluster {
                     write!(f, " ON CLUSTER {on_cluster}")?;
                 }
@@ -6087,6 +6106,12 @@ impl fmt::Display for Statement {
                 if let Some(deduplicate) = deduplicate {
                     write!(f, " {deduplicate}")?;
                 }
+                if let Some(predicate) = predicate {
+                    write!(f, " WHERE {predicate}")?;
+                }
+                if !zorder_by.is_empty() {
+                    write!(f, " ZORDER BY ({})", display_comma_separated(zorder_by))?;
+                }
                 Ok(())
             }
             Statement::LISTEN { channel } => {
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-3-codex/app/src/dialect/databricks.rs b/app/src/dialect/databricks.rs
index e903b07..55e4f56 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-3-codex/app/src/dialect/databricks.rs
+++ b/app/src/dialect/databricks.rs
@@ -85,4 +85,9 @@ impl Dialect for DatabricksDialect {
     fn supports_values_as_table_factor(&self) -> bool {
         true
     }
+
+    /// See <https://docs.databricks.com/en/sql/language-manual/delta-optimize.html>
+    fn supports_optimize_table(&self) -> bool {
+        true
+    }
 }
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-3-codex/app/src/parser/mod.rs b/app/src/parser/mod.rs
index 0b152f2..cbee003 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-3-codex/app/src/parser/mod.rs
+++ b/app/src/parser/mod.rs
@@ -3389,19 +3389,20 @@ impl<'a> Parser<'a> {
     fn parse_struct_field_def(
         &mut self,
     ) -> Result<(StructField, MatchedTrailingBracket), ParserError> {
-        // Look beyond the next item to infer whether both field name
-        // and type are specified.
-        let is_anonymous_field = !matches!(
-            (self.peek_nth_token(0).token, self.peek_nth_token(1).token),
-            (Token::Word(_), Token::Word(_))
-        );
-
-        let field_name = if is_anonymous_field {
-            None
-        } else {
-            Some(self.parse_identifier()?)
+        // Infer whether both field name and type are specified.
+        // Databricks also supports `field_name: field_type`.
+        let field_name = match (
+            self.peek_nth_token(0).token,
+            self.peek_nth_token(1).token,
+        ) {
+            (Token::Word(_), Token::Word(_)) | (Token::Word(_), Token::Colon) => {
+                Some(self.parse_identifier()?)
+            }
+            _ => None,
         };
 
+        let _ = self.consume_token(&Token::Colon);
+
         let (field_type, trailing_bracket) = self.parse_data_type_helper()?;
 
         let options = self.maybe_parse_options(Keyword::OPTIONS)?;
@@ -7985,7 +7986,7 @@ impl<'a> Parser<'a> {
     pub fn parse_hive_distribution(&mut self) -> Result<HiveDistributionStyle, ParserError> {
         if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) {
             self.expect_token(&Token::LParen)?;
-            let columns = self.parse_comma_separated(Parser::parse_column_def)?;
+            let columns = self.parse_comma_separated(Parser::parse_partitioned_column_def)?;
             self.expect_token(&Token::RParen)?;
             Ok(HiveDistributionStyle::PARTITIONED { columns })
         } else {
@@ -7993,6 +7994,22 @@ impl<'a> Parser<'a> {
         }
     }
 
+    fn parse_partitioned_column_def(&mut self) -> Result<ColumnDef, ParserError> {
+        let name = self.parse_identifier()?;
+        let data_type = if dialect_of!(self is DatabricksDialect | GenericDialect)
+            && matches!(self.peek_token().token, Token::Comma | Token::RParen)
+        {
+            DataType::Unspecified
+        } else {
+            self.parse_data_type()?
+        };
+        Ok(ColumnDef {
+            name,
+            data_type,
+            options: vec![],
+        })
+    }
+
     /// Parse Hive formats.
     pub fn parse_hive_formats(&mut self) -> Result<Option<HiveFormat>, ParserError> {
         let mut hive_format: Option<HiveFormat> = None;
@@ -11927,6 +11944,16 @@ impl<'a> Parser<'a> {
                         StructBracketKind::AngleBrackets,
                     ))
                 }
+                Keyword::STRUCT if dialect_is!(dialect is DatabricksDialect) => {
+                    self.prev_token();
+                    let (field_defs, _trailing_bracket) =
+                        self.parse_struct_type_def(Self::parse_struct_field_def)?;
+                    trailing_bracket = _trailing_bracket;
+                    Ok(DataType::Struct(
+                        field_defs,
+                        StructBracketKind::AngleBrackets,
+                    ))
+                }
                 Keyword::UNION if dialect_is!(dialect is DuckDbDialect | GenericDialect) => {
                     self.prev_token();
                     let fields = self.parse_union_type_def()?;
@@ -18481,15 +18508,30 @@ impl<'a> Parser<'a> {
     }
 
     /// ```sql
+    /// -- ClickHouse
     /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
+    ///
+    /// -- Databricks
+    /// OPTIMIZE [db.]name [WHERE predicate] [ZORDER BY (col1 [, ...])]
     /// ```
     /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize)
+    /// [Databricks](https://docs.databricks.com/en/sql/language-manual/delta-optimize.html)
     pub fn parse_optimize_table(&mut self) -> Result<Statement, ParserError> {
-        self.expect_keyword_is(Keyword::TABLE)?;
+        let table_keyword = self.parse_keyword(Keyword::TABLE);
+        if !table_keyword && dialect_of!(self is ClickHouseDialect) {
+            return self.expected("TABLE", self.peek_token());
+        }
+
         let name = self.parse_object_name(false)?;
-        let on_cluster = self.parse_optional_on_cluster()?;
+        let on_cluster = if dialect_of!(self is ClickHouseDialect | GenericDialect) {
+            self.parse_optional_on_cluster()?
+        } else {
+            None
+        };
 
-        let partition = if self.parse_keyword(Keyword::PARTITION) {
+        let partition = if dialect_of!(self is ClickHouseDialect | GenericDialect)
+            && self.parse_keyword(Keyword::PARTITION)
+        {
             if self.parse_keyword(Keyword::ID) {
                 Some(Partition::Identifier(self.parse_identifier()?))
             } else {
@@ -18499,8 +18541,11 @@ impl<'a> Parser<'a> {
             None
         };
 
-        let include_final = self.parse_keyword(Keyword::FINAL);
-        let deduplicate = if self.parse_keyword(Keyword::DEDUPLICATE) {
+        let include_final =
+            dialect_of!(self is ClickHouseDialect | GenericDialect) && self.parse_keyword(Keyword::FINAL);
+        let deduplicate = if dialect_of!(self is ClickHouseDialect | GenericDialect)
+            && self.parse_keyword(Keyword::DEDUPLICATE)
+        {
             if self.parse_keyword(Keyword::BY) {
                 Some(Deduplicate::ByExpression(self.parse_expr()?))
             } else {
@@ -18509,13 +18554,30 @@ impl<'a> Parser<'a> {
         } else {
             None
         };
+        let predicate = if dialect_of!(self is DatabricksDialect | GenericDialect)
+            && self.parse_keyword(Keyword::WHERE)
+        {
+            Some(self.parse_expr()?)
+        } else {
+            None
+        };
+        let zorder_by = if dialect_of!(self is DatabricksDialect | GenericDialect)
+            && self.parse_keywords(&[Keyword::ZORDER, Keyword::BY])
+        {
+            self.parse_parenthesized(|p| p.parse_comma_separated(Parser::parse_expr))?
+        } else {
+            vec![]
+        };
 
         Ok(Statement::OptimizeTable {
+            table_keyword,
             name,
             on_cluster,
             partition,
             include_final,
             deduplicate,
+            predicate,
+            zorder_by,
         })
     }
 
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-3-codex/app/tests/sqlparser_databricks.rs b/app/tests/sqlparser_databricks.rs
index b088afd..662575f 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-3-codex/app/tests/sqlparser_databricks.rs
+++ b/app/tests/sqlparser_databricks.rs
@@ -325,6 +325,69 @@ fn parse_databricks_struct_function() {
     );
 }
 
+#[test]
+fn parse_databricks_optimize_table() {
+    databricks().verified_stmt("OPTIMIZE table_name");
+    databricks().verified_stmt("OPTIMIZE db.table_name");
+    databricks().verified_stmt("OPTIMIZE table_name WHERE date_col >= '2025-01-01'");
+    databricks().verified_stmt("OPTIMIZE table_name ZORDER BY (id)");
+    databricks()
+        .verified_stmt("OPTIMIZE table_name WHERE date_col >= '2025-01-01' ZORDER BY (id, ts)");
+
+    match databricks().verified_stmt(
+        "OPTIMIZE table_name WHERE date_col >= '2025-01-01' ZORDER BY (id, ts)",
+    ) {
+        Statement::OptimizeTable {
+            table_keyword,
+            name,
+            predicate,
+            zorder_by,
+            ..
+        } => {
+            assert!(!table_keyword);
+            assert_eq!(name.to_string(), "table_name");
+            assert_eq!(zorder_by.len(), 2);
+            assert!(predicate.is_some());
+        }
+        _ => unreachable!(),
+    }
+}
+
+#[test]
+fn parse_databricks_struct_colon_field_types() {
+    match databricks().verified_stmt("CREATE TABLE foo (s STRUCT<a: INT, b: STRING>)") {
+        Statement::CreateTable(CreateTable { columns, .. }) => {
+            assert_eq!(columns.len(), 1);
+            assert_eq!(columns[0].name, Ident::new("s"));
+            assert_eq!(
+                columns[0].data_type,
+                DataType::Struct(
+                    vec![
+                        StructField {
+                            field_name: Some(Ident::new("a")),
+                            field_type: DataType::Int(None),
+                            options: None,
+                        },
+                        StructField {
+                            field_name: Some(Ident::new("b")),
+                            field_type: DataType::String(None),
+                            options: None,
+                        },
+                    ],
+                    StructBracketKind::AngleBrackets,
+                )
+            );
+        }
+        s => panic!("Unexpected statement: {s:?}"),
+    }
+}
+
+#[test]
+fn parse_databricks_partitioned_by_optional_types() {
+    databricks().verified_stmt("CREATE TABLE foo (id INT, p STRING) PARTITIONED BY (p)");
+    databricks().verified_stmt("CREATE TABLE foo (id INT, p STRING) PARTITIONED BY (p STRING)");
+}
+
 #[test]
 fn data_type_timestamp_ntz() {
     // Literal