agent_patch
runs/2026-02-28__18-27-22__gpt-5-4/flux-pr-2170/flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/agent-logs/agent.patch
11171 bytes
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/app/src/ast/mod.rs b/app/src/ast/mod.rs
index 010a818..28cf1ff 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/app/src/ast/mod.rs
+++ b/app/src/ast/mod.rs
@@ -4580,16 +4580,24 @@ pub enum Statement {
},
/// ```sql
/// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
+ /// OPTIMIZE [db.]name [WHERE predicate] [ZORDER BY (expr, ...)]
/// ```
///
/// See ClickHouse <https://clickhouse.com/docs/en/sql-reference/statements/optimize>
+ /// and Databricks <https://docs.databricks.com/en/sql/language-manual/delta-optimize.html>
OptimizeTable {
+ /// Whether the statement included the `TABLE` keyword.
+ include_table_keyword: bool,
/// Table name to optimize.
name: ObjectName,
/// Optional cluster identifier.
on_cluster: Option<Ident>,
/// Optional partition spec.
partition: Option<Partition>,
+ /// Optional filter for Databricks OPTIMIZE.
+ where_clause: Option<Expr>,
+ /// Optional ZORDER BY list for Databricks OPTIMIZE.
+ zorder_by: Vec<Expr>,
/// Whether `FINAL` was specified.
include_final: bool,
/// Optional deduplication settings.
@@ -6068,19 +6076,32 @@ impl fmt::Display for Statement {
Ok(())
}
Statement::OptimizeTable {
+ include_table_keyword,
name,
on_cluster,
partition,
+ where_clause,
+ zorder_by,
include_final,
deduplicate,
} => {
- write!(f, "OPTIMIZE TABLE {name}")?;
+ write!(f, "OPTIMIZE")?;
+ if *include_table_keyword {
+ write!(f, " TABLE")?;
+ }
+ write!(f, " {name}")?;
if let Some(on_cluster) = on_cluster {
write!(f, " ON CLUSTER {on_cluster}")?;
}
if let Some(partition) = partition {
write!(f, " {partition}")?;
}
+ if let Some(where_clause) = where_clause {
+ write!(f, " WHERE {where_clause}")?;
+ }
+ if !zorder_by.is_empty() {
+ write!(f, " ZORDER BY ({})", display_comma_separated(zorder_by))?;
+ }
if *include_final {
write!(f, " FINAL")?;
}
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/app/src/dialect/databricks.rs b/app/src/dialect/databricks.rs
index e903b07..ff9ef44 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/app/src/dialect/databricks.rs
+++ b/app/src/dialect/databricks.rs
@@ -81,6 +81,11 @@ impl Dialect for DatabricksDialect {
true
}
+ /// See <https://docs.databricks.com/en/sql/language-manual/delta-optimize.html>
+ fn supports_optimize_table(&self) -> bool {
+ true
+ }
+
/// See <https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-values.html>
fn supports_values_as_table_factor(&self) -> bool {
true
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/app/src/parser/mod.rs b/app/src/parser/mod.rs
index 0b152f2..f9a9038 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/app/src/parser/mod.rs
+++ b/app/src/parser/mod.rs
@@ -3415,6 +3415,29 @@ impl<'a> Parser<'a> {
))
}
+ /// Databricks struct field definition.
+ ///
+ /// Syntax:
+ /// ```sql
+ /// field_name: field_type
+ /// ```
+ fn parse_databricks_struct_field_def(
+ &mut self,
+ ) -> Result<(StructField, MatchedTrailingBracket), ParserError> {
+ let field_name = Some(self.parse_identifier()?);
+ self.expect_token(&Token::Colon)?;
+ let (field_type, trailing_bracket) = self.parse_data_type_helper()?;
+
+ Ok((
+ StructField {
+ field_name,
+ field_type,
+ options: None,
+ },
+ trailing_bracket,
+ ))
+ }
+
/// DuckDB specific: Parse a Union type definition as a sequence of field-value pairs.
///
/// Syntax:
@@ -7985,7 +8008,11 @@ impl<'a> Parser<'a> {
pub fn parse_hive_distribution(&mut self) -> Result<HiveDistributionStyle, ParserError> {
if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) {
self.expect_token(&Token::LParen)?;
- let columns = self.parse_comma_separated(Parser::parse_column_def)?;
+ let columns = if dialect_of!(self is DatabricksDialect) {
+ self.parse_comma_separated(Parser::parse_partitioned_by_column_def)?
+ } else {
+ self.parse_comma_separated(Parser::parse_column_def)?
+ };
self.expect_token(&Token::RParen)?;
Ok(HiveDistributionStyle::PARTITIONED { columns })
} else {
@@ -8840,6 +8867,21 @@ impl<'a> Parser<'a> {
})
}
+ /// Parses a `PARTITIONED BY` column definition. Databricks allows identifiers without types.
+ fn parse_partitioned_by_column_def(&mut self) -> Result<ColumnDef, ParserError> {
+ let name = self.parse_identifier()?;
+ let data_type = match self.peek_token().token {
+ Token::Comma | Token::RParen => DataType::Unspecified,
+ _ => self.parse_data_type()?,
+ };
+
+ Ok(ColumnDef {
+ name,
+ data_type,
+ options: vec![],
+ })
+ }
+
fn is_column_type_sqlite_unspecified(&mut self) -> bool {
if dialect_of!(self is SQLiteDialect) {
match self.peek_token().token {
@@ -11927,6 +11969,16 @@ impl<'a> Parser<'a> {
StructBracketKind::AngleBrackets,
))
}
+ Keyword::STRUCT if dialect_is!(dialect is DatabricksDialect) => {
+ self.prev_token();
+ let (field_defs, _trailing_bracket) =
+ self.parse_struct_type_def(Self::parse_databricks_struct_field_def)?;
+ trailing_bracket = _trailing_bracket;
+ Ok(DataType::Struct(
+ field_defs,
+ StructBracketKind::AngleBrackets,
+ ))
+ }
Keyword::UNION if dialect_is!(dialect is DuckDbDialect | GenericDialect) => {
self.prev_token();
let fields = self.parse_union_type_def()?;
@@ -18482,10 +18534,12 @@ impl<'a> Parser<'a> {
/// ```sql
/// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
+ /// OPTIMIZE [db.]name [WHERE predicate] [ZORDER BY (expr, ...)]
/// ```
/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize)
+ /// [Databricks](https://docs.databricks.com/en/sql/language-manual/delta-optimize.html)
pub fn parse_optimize_table(&mut self) -> Result<Statement, ParserError> {
- self.expect_keyword_is(Keyword::TABLE)?;
+ let include_table_keyword = self.parse_keyword(Keyword::TABLE);
let name = self.parse_object_name(false)?;
let on_cluster = self.parse_optional_on_cluster()?;
@@ -18499,6 +18553,22 @@ impl<'a> Parser<'a> {
None
};
+ let where_clause = if self.parse_keyword(Keyword::WHERE) {
+ Some(self.parse_expr()?)
+ } else {
+ None
+ };
+
+ let zorder_by = if self.parse_keyword(Keyword::ZORDER) {
+ self.expect_keyword_is(Keyword::BY)?;
+ self.expect_token(&Token::LParen)?;
+ let exprs = self.parse_comma_separated(Parser::parse_expr)?;
+ self.expect_token(&Token::RParen)?;
+ exprs
+ } else {
+ vec![]
+ };
+
let include_final = self.parse_keyword(Keyword::FINAL);
let deduplicate = if self.parse_keyword(Keyword::DEDUPLICATE) {
if self.parse_keyword(Keyword::BY) {
@@ -18511,9 +18581,12 @@ impl<'a> Parser<'a> {
};
Ok(Statement::OptimizeTable {
+ include_table_keyword,
name,
on_cluster,
partition,
+ where_clause,
+ zorder_by,
include_final,
deduplicate,
})
diff --git a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/app/tests/sqlparser_databricks.rs b/app/tests/sqlparser_databricks.rs
index b088afd..eca04b8 100644
--- a/tmp/agent-patch-flux-pr-2170.1-of-1.2026-02-28__18-27-22__gpt-5-4/app/tests/sqlparser_databricks.rs
+++ b/app/tests/sqlparser_databricks.rs
@@ -325,6 +325,60 @@ fn parse_databricks_struct_function() {
);
}
+#[test]
+fn parse_databricks_optimize() {
+ databricks_and_generic().verified_stmt("OPTIMIZE t");
+ databricks_and_generic().verified_stmt("OPTIMIZE db.t WHERE part = 1");
+ databricks_and_generic().verified_stmt("OPTIMIZE t ZORDER BY (c1, c2)");
+ match databricks_and_generic()
+ .verified_stmt("OPTIMIZE db.t WHERE part = 1 ZORDER BY (c1, c2)")
+ {
+ Statement::OptimizeTable {
+ include_table_keyword,
+ name,
+ where_clause,
+ zorder_by,
+ on_cluster,
+ partition,
+ include_final,
+ deduplicate,
+ } => {
+ assert!(!include_table_keyword);
+ assert_eq!(name.to_string(), "db.t");
+ assert_eq!(
+ where_clause,
+ Some(Expr::BinaryOp {
+ left: Box::new(Expr::Identifier(Ident::new("part"))),
+ op: BinaryOperator::Eq,
+ right: Box::new(Expr::value(number("1"))),
+ })
+ );
+ assert_eq!(
+ zorder_by,
+ vec![
+ Expr::Identifier(Ident::new("c1")),
+ Expr::Identifier(Ident::new("c2"))
+ ]
+ );
+ assert_eq!(on_cluster, None);
+ assert_eq!(partition, None);
+ assert!(!include_final);
+ assert_eq!(deduplicate, None);
+ }
+ statement => panic!("Unexpected statement: {statement:?}"),
+ }
+}
+
+#[test]
+fn parse_databricks_struct_type_and_partitioned_by() {
+ databricks().one_statement_parses_to(
+ "CREATE TABLE t (s STRUCT<a: INT, b: STRING>)",
+ "CREATE TABLE t (s STRUCT<a INT, b STRING>)",
+ );
+ databricks().verified_stmt("CREATE TABLE t (x INT) PARTITIONED BY (ds)");
+ databricks().verified_stmt("CREATE TABLE t (x INT) PARTITIONED BY (ds, region STRING)");
+}
+
#[test]
fn data_type_timestamp_ntz() {
// Literal