STET

agent_patch

runs/2026-02-28__18-27-22__gpt-5-4/flux-pr-2170/flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/agent-logs/agent.patch

11171 bytes

Back to adjudication
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/app/src/ast/mod.rs b/app/src/ast/mod.rs
index 010a818..28cf1ff 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/app/src/ast/mod.rs
+++ b/app/src/ast/mod.rs
@@ -4580,16 +4580,24 @@ pub enum Statement {
     },
     /// ```sql
     /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
+    /// OPTIMIZE [db.]name [WHERE predicate] [ZORDER BY (expr, ...)]
     /// ```
     ///
     /// See ClickHouse <https://clickhouse.com/docs/en/sql-reference/statements/optimize>
+    /// and Databricks <https://docs.databricks.com/en/sql/language-manual/delta-optimize.html>
     OptimizeTable {
+        /// Whether the statement included the `TABLE` keyword.
+        include_table_keyword: bool,
         /// Table name to optimize.
         name: ObjectName,
         /// Optional cluster identifier.
         on_cluster: Option<Ident>,
         /// Optional partition spec.
         partition: Option<Partition>,
+        /// Optional filter for Databricks OPTIMIZE.
+        where_clause: Option<Expr>,
+        /// Optional ZORDER BY list for Databricks OPTIMIZE.
+        zorder_by: Vec<Expr>,
         /// Whether `FINAL` was specified.
         include_final: bool,
         /// Optional deduplication settings.
@@ -6068,19 +6076,32 @@ impl fmt::Display for Statement {
                 Ok(())
             }
             Statement::OptimizeTable {
+                include_table_keyword,
                 name,
                 on_cluster,
                 partition,
+                where_clause,
+                zorder_by,
                 include_final,
                 deduplicate,
             } => {
-                write!(f, "OPTIMIZE TABLE {name}")?;
+                write!(f, "OPTIMIZE")?;
+                if *include_table_keyword {
+                    write!(f, " TABLE")?;
+                }
+                write!(f, " {name}")?;
                 if let Some(on_cluster) = on_cluster {
                     write!(f, " ON CLUSTER {on_cluster}")?;
                 }
                 if let Some(partition) = partition {
                     write!(f, " {partition}")?;
                 }
+                if let Some(where_clause) = where_clause {
+                    write!(f, " WHERE {where_clause}")?;
+                }
+                if !zorder_by.is_empty() {
+                    write!(f, " ZORDER BY ({})", display_comma_separated(zorder_by))?;
+                }
                 if *include_final {
                     write!(f, " FINAL")?;
                 }
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/app/src/dialect/databricks.rs b/app/src/dialect/databricks.rs
index e903b07..ff9ef44 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/app/src/dialect/databricks.rs
+++ b/app/src/dialect/databricks.rs
@@ -81,6 +81,11 @@ impl Dialect for DatabricksDialect {
         true
     }
 
+    /// See <https://docs.databricks.com/en/sql/language-manual/delta-optimize.html>
+    fn supports_optimize_table(&self) -> bool {
+        true
+    }
+
     /// See <https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-values.html>
     fn supports_values_as_table_factor(&self) -> bool {
         true
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/app/src/parser/mod.rs b/app/src/parser/mod.rs
index 0b152f2..f9a9038 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/app/src/parser/mod.rs
+++ b/app/src/parser/mod.rs
@@ -3415,6 +3415,29 @@ impl<'a> Parser<'a> {
         ))
     }
 
+    /// Databricks struct field definition.
+    ///
+    /// Syntax:
+    /// ```sql
+    /// field_name: field_type
+    /// ```
+    fn parse_databricks_struct_field_def(
+        &mut self,
+    ) -> Result<(StructField, MatchedTrailingBracket), ParserError> {
+        let field_name = Some(self.parse_identifier()?);
+        self.expect_token(&Token::Colon)?;
+        let (field_type, trailing_bracket) = self.parse_data_type_helper()?;
+
+        Ok((
+            StructField {
+                field_name,
+                field_type,
+                options: None,
+            },
+            trailing_bracket,
+        ))
+    }
+
     /// DuckDB specific: Parse a Union type definition as a sequence of field-value pairs.
     ///
     /// Syntax:
@@ -7985,7 +8008,11 @@ impl<'a> Parser<'a> {
     pub fn parse_hive_distribution(&mut self) -> Result<HiveDistributionStyle, ParserError> {
         if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) {
             self.expect_token(&Token::LParen)?;
-            let columns = self.parse_comma_separated(Parser::parse_column_def)?;
+            let columns = if dialect_of!(self is DatabricksDialect) {
+                self.parse_comma_separated(Parser::parse_partitioned_by_column_def)?
+            } else {
+                self.parse_comma_separated(Parser::parse_column_def)?
+            };
             self.expect_token(&Token::RParen)?;
             Ok(HiveDistributionStyle::PARTITIONED { columns })
         } else {
@@ -8840,6 +8867,21 @@ impl<'a> Parser<'a> {
         })
     }
 
+    /// Parses a `PARTITIONED BY` column definition. Databricks allows identifiers without types.
+    fn parse_partitioned_by_column_def(&mut self) -> Result<ColumnDef, ParserError> {
+        let name = self.parse_identifier()?;
+        let data_type = match self.peek_token().token {
+            Token::Comma | Token::RParen => DataType::Unspecified,
+            _ => self.parse_data_type()?,
+        };
+
+        Ok(ColumnDef {
+            name,
+            data_type,
+            options: vec![],
+        })
+    }
+
     fn is_column_type_sqlite_unspecified(&mut self) -> bool {
         if dialect_of!(self is SQLiteDialect) {
             match self.peek_token().token {
@@ -11927,6 +11969,16 @@ impl<'a> Parser<'a> {
                         StructBracketKind::AngleBrackets,
                     ))
                 }
+                Keyword::STRUCT if dialect_is!(dialect is DatabricksDialect) => {
+                    self.prev_token();
+                    let (field_defs, _trailing_bracket) =
+                        self.parse_struct_type_def(Self::parse_databricks_struct_field_def)?;
+                    trailing_bracket = _trailing_bracket;
+                    Ok(DataType::Struct(
+                        field_defs,
+                        StructBracketKind::AngleBrackets,
+                    ))
+                }
                 Keyword::UNION if dialect_is!(dialect is DuckDbDialect | GenericDialect) => {
                     self.prev_token();
                     let fields = self.parse_union_type_def()?;
@@ -18482,10 +18534,12 @@ impl<'a> Parser<'a> {
 
     /// ```sql
     /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
+    /// OPTIMIZE [db.]name [WHERE predicate] [ZORDER BY (expr, ...)]
     /// ```
     /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize)
+    /// [Databricks](https://docs.databricks.com/en/sql/language-manual/delta-optimize.html)
     pub fn parse_optimize_table(&mut self) -> Result<Statement, ParserError> {
-        self.expect_keyword_is(Keyword::TABLE)?;
+        let include_table_keyword = self.parse_keyword(Keyword::TABLE);
         let name = self.parse_object_name(false)?;
         let on_cluster = self.parse_optional_on_cluster()?;
 
@@ -18499,6 +18553,22 @@ impl<'a> Parser<'a> {
             None
         };
 
+        let where_clause = if self.parse_keyword(Keyword::WHERE) {
+            Some(self.parse_expr()?)
+        } else {
+            None
+        };
+
+        let zorder_by = if self.parse_keyword(Keyword::ZORDER) {
+            self.expect_keyword_is(Keyword::BY)?;
+            self.expect_token(&Token::LParen)?;
+            let exprs = self.parse_comma_separated(Parser::parse_expr)?;
+            self.expect_token(&Token::RParen)?;
+            exprs
+        } else {
+            vec![]
+        };
+
         let include_final = self.parse_keyword(Keyword::FINAL);
         let deduplicate = if self.parse_keyword(Keyword::DEDUPLICATE) {
             if self.parse_keyword(Keyword::BY) {
@@ -18511,9 +18581,12 @@ impl<'a> Parser<'a> {
         };
 
         Ok(Statement::OptimizeTable {
+            include_table_keyword,
             name,
             on_cluster,
             partition,
+            where_clause,
+            zorder_by,
             include_final,
             deduplicate,
         })
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/app/tests/sqlparser_databricks.rs b/app/tests/sqlparser_databricks.rs
index b088afd..eca04b8 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/app/tests/sqlparser_databricks.rs
+++ b/app/tests/sqlparser_databricks.rs
@@ -325,6 +325,60 @@ fn parse_databricks_struct_function() {
     );
 }
 
+#[test]
+fn parse_databricks_optimize() {
+    databricks_and_generic().verified_stmt("OPTIMIZE t");
+    databricks_and_generic().verified_stmt("OPTIMIZE db.t WHERE part = 1");
+    databricks_and_generic().verified_stmt("OPTIMIZE t ZORDER BY (c1, c2)");
+    match databricks_and_generic()
+        .verified_stmt("OPTIMIZE db.t WHERE part = 1 ZORDER BY (c1, c2)")
+    {
+        Statement::OptimizeTable {
+            include_table_keyword,
+            name,
+            where_clause,
+            zorder_by,
+            on_cluster,
+            partition,
+            include_final,
+            deduplicate,
+        } => {
+            assert!(!include_table_keyword);
+            assert_eq!(name.to_string(), "db.t");
+            assert_eq!(
+                where_clause,
+                Some(Expr::BinaryOp {
+                    left: Box::new(Expr::Identifier(Ident::new("part"))),
+                    op: BinaryOperator::Eq,
+                    right: Box::new(Expr::value(number("1"))),
+                })
+            );
+            assert_eq!(
+                zorder_by,
+                vec![
+                    Expr::Identifier(Ident::new("c1")),
+                    Expr::Identifier(Ident::new("c2"))
+                ]
+            );
+            assert_eq!(on_cluster, None);
+            assert_eq!(partition, None);
+            assert!(!include_final);
+            assert_eq!(deduplicate, None);
+        }
+        statement => panic!("Unexpected statement: {statement:?}"),
+    }
+}
+
+#[test]
+fn parse_databricks_struct_type_and_partitioned_by() {
+    databricks().one_statement_parses_to(
+        "CREATE TABLE t (s STRUCT<a: INT, b: STRING>)",
+        "CREATE TABLE t (s STRUCT<a INT, b STRING>)",
+    );
+    databricks().verified_stmt("CREATE TABLE t (x INT) PARTITIONED BY (ds)");
+    databricks().verified_stmt("CREATE TABLE t (x INT) PARTITIONED BY (ds, region STRING)");
+}
+
 #[test]
 fn data_type_timestamp_ntz() {
     // Literal