diff --git a/examples/cli.rs b/examples/cli.rs index a320a00bc..8af6246a0 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -46,6 +46,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname] "--hive" => Box::new(HiveDialect {}), "--redshift" => Box::new(RedshiftSqlDialect {}), "--clickhouse" => Box::new(ClickHouseDialect {}), + "--duckdb" => Box::new(DuckDbDialect {}), "--generic" | "" => Box::new(GenericDialect {}), s => panic!("Unexpected parameter: {s}"), }; diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 0ab9c66a4..b988265ba 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -85,6 +85,8 @@ pub enum BinaryOperator { BitwiseOr, BitwiseAnd, BitwiseXor, + /// Integer division operator `//` in DuckDB + DuckIntegerDivide, /// MySQL [`DIV`](https://dev.mysql.com/doc/refman/8.0/en/arithmetic-functions.html) integer division MyIntegerDivide, /// Support for custom operators (built by parsers outside this crate) @@ -126,6 +128,7 @@ impl fmt::Display for BinaryOperator { BinaryOperator::BitwiseOr => f.write_str("|"), BinaryOperator::BitwiseAnd => f.write_str("&"), BinaryOperator::BitwiseXor => f.write_str("^"), + BinaryOperator::DuckIntegerDivide => f.write_str("//"), BinaryOperator::MyIntegerDivide => f.write_str("DIV"), BinaryOperator::Custom(s) => f.write_str(s), BinaryOperator::PGBitwiseXor => f.write_str("#"), diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs new file mode 100644 index 000000000..55f258e53 --- /dev/null +++ b/src/dialect/duckdb.rs @@ -0,0 +1,31 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::dialect::Dialect; + +#[derive(Debug, Default)] +pub struct DuckDbDialect; + +// In most cases the redshift dialect is identical to [`PostgresSqlDialect`]. +impl Dialect for DuckDbDialect { + fn is_identifier_start(&self, ch: char) -> bool { + ch.is_alphabetic() || ch == '_' + } + + fn is_identifier_part(&self, ch: char) -> bool { + ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_' + } + + fn supports_filter_during_aggregation(&self) -> bool { + true + } +} diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 5744ae65e..48357501c 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -13,6 +13,7 @@ mod ansi; mod bigquery; mod clickhouse; +mod duckdb; mod generic; mod hive; mod mssql; @@ -31,6 +32,7 @@ use core::str::Chars; pub use self::ansi::AnsiDialect; pub use self::bigquery::BigQueryDialect; pub use self::clickhouse::ClickHouseDialect; +pub use self::duckdb::DuckDbDialect; pub use self::generic::GenericDialect; pub use self::hive::HiveDialect; pub use self::mssql::MsSqlDialect; @@ -163,6 +165,7 @@ pub fn dialect_from_str(dialect_name: impl AsRef) -> Option Some(Box::new(ClickHouseDialect {})), "bigquery" => Some(Box::new(BigQueryDialect)), "ansi" => Some(Box::new(AnsiDialect {})), + "duckdb" => Some(Box::new(DuckDbDialect {})), _ => None, } } @@ -214,6 +217,8 @@ mod tests { assert!(parse_dialect("BigQuery").is::()); assert!(parse_dialect("ansi").is::()); assert!(parse_dialect("ANSI").is::()); + assert!(parse_dialect("duckdb").is::()); + assert!(parse_dialect("DuckDb").is::()); // error cases assert!(dialect_from_str("Unknown").is_none()); diff --git a/src/parser.rs b/src/parser.rs index af577ce5d..734b86708 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -991,7 +991,7 @@ impl<'a> Parser<'a> { /// parse a group by expr. a group by expr can be one of group sets, roll up, cube, or simple /// expr. fn parse_group_by_expr(&mut self) -> Result { - if dialect_of!(self is PostgreSqlDialect | GenericDialect) { + if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) { self.expect_token(&Token::LParen)?; let result = self.parse_comma_separated(|p| p.parse_tuple(false, true))?; @@ -1661,10 +1661,13 @@ impl<'a> Parser<'a> { } Token::Ampersand => Some(BinaryOperator::BitwiseAnd), Token::Div => Some(BinaryOperator::Divide), - Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Token::DuckIntDiv if dialect_of!(self is DuckDbDialect | GenericDialect) => { + Some(BinaryOperator::DuckIntegerDivide) + } + Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { Some(BinaryOperator::PGBitwiseShiftLeft) } - Token::ShiftRight if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Token::ShiftRight if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { Some(BinaryOperator::PGBitwiseShiftRight) } Token::Sharp if dialect_of!(self is PostgreSqlDialect) => { @@ -2050,7 +2053,9 @@ impl<'a> Parser<'a> { Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22), Token::Ampersand => Ok(23), Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC), - Token::Mul | Token::Div | Token::Mod | Token::StringConcat => Ok(40), + Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => { + Ok(40) + } Token::DoubleColon => Ok(50), Token::Colon => Ok(50), Token::ExclamationMark => Ok(50), @@ -3841,7 +3846,7 @@ impl<'a> Parser<'a> { } else { let column_keyword = self.parse_keyword(Keyword::COLUMN); - let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | GenericDialect) + let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | DuckDbDialect | GenericDialect) { self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]) || if_not_exists @@ -6314,7 +6319,7 @@ impl<'a> Parser<'a> { self.expect_keyword(Keyword::SET)?; let assignments = self.parse_comma_separated(Parser::parse_assignment)?; let from = if self.parse_keyword(Keyword::FROM) - && dialect_of!(self is GenericDialect | PostgreSqlDialect | BigQueryDialect | SnowflakeDialect | RedshiftSqlDialect | MsSqlDialect) + && dialect_of!(self is GenericDialect | PostgreSqlDialect | DuckDbDialect | BigQueryDialect | SnowflakeDialect | RedshiftSqlDialect | MsSqlDialect) { Some(self.parse_table_and_joins()?) } else { @@ -6414,7 +6419,8 @@ impl<'a> Parser<'a> { pub fn parse_wildcard_additional_options( &mut self, ) -> Result { - let opt_exclude = if dialect_of!(self is GenericDialect | SnowflakeDialect) { + let opt_exclude = if dialect_of!(self is GenericDialect | DuckDbDialect | SnowflakeDialect) + { self.parse_optional_select_item_exclude()? } else { None diff --git a/src/test_utils.rs b/src/test_utils.rs index d01bbbab9..57b21e1c9 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -168,6 +168,7 @@ pub fn all_dialects() -> TestedDialects { Box::new(MySqlDialect {}), Box::new(BigQueryDialect {}), Box::new(SQLiteDialect {}), + Box::new(DuckDbDialect {}), ], options: None, } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index a550c4f5d..ffa1a96f2 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -35,7 +35,7 @@ use serde::{Deserialize, Serialize}; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::DollarQuotedString; -use crate::dialect::{BigQueryDialect, GenericDialect, SnowflakeDialect}; +use crate::dialect::{BigQueryDialect, DuckDbDialect, GenericDialect, SnowflakeDialect}; use crate::dialect::{Dialect, MySqlDialect}; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; @@ -98,6 +98,8 @@ pub enum Token { Mul, /// Division operator `/` Div, + /// Integer division operator `//` in DuckDB + DuckIntDiv, /// Modulo Operator `%` Mod, /// String concatenation `||` @@ -212,6 +214,7 @@ impl fmt::Display for Token { Token::Minus => f.write_str("-"), Token::Mul => f.write_str("*"), Token::Div => f.write_str("/"), + Token::DuckIntDiv => f.write_str("//"), Token::StringConcat => f.write_str("||"), Token::Mod => f.write_str("%"), Token::LParen => f.write_str("("), @@ -768,6 +771,9 @@ impl<'a> Tokenizer<'a> { comment, }))) } + Some('/') if dialect_of!(self is DuckDbDialect | GenericDialect) => { + self.consume_and_return(chars, Token::DuckIntDiv) + } // a regular '/' operator _ => Ok(Some(Token::Div)), } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ac5c18c0e..448100eb8 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -24,8 +24,9 @@ use sqlparser::ast::SelectItem::UnnamedExpr; use sqlparser::ast::TableFactor::Pivot; use sqlparser::ast::*; use sqlparser::dialect::{ - AnsiDialect, BigQueryDialect, ClickHouseDialect, GenericDialect, HiveDialect, MsSqlDialect, - MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect, SnowflakeDialect, + AnsiDialect, BigQueryDialect, ClickHouseDialect, DuckDbDialect, GenericDialect, HiveDialect, + MsSqlDialect, MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect, + SnowflakeDialect, }; use sqlparser::keywords::ALL_KEYWORDS; use sqlparser::parser::{Parser, ParserError, ParserOptions}; @@ -195,6 +196,7 @@ fn parse_update_set_from() { let dialects = TestedDialects { dialects: vec![ Box::new(GenericDialect {}), + Box::new(DuckDbDialect {}), Box::new(PostgreSqlDialect {}), Box::new(BigQueryDialect {}), Box::new(SnowflakeDialect {}), @@ -941,6 +943,7 @@ fn parse_exponent_in_select() -> Result<(), ParserError> { Box::new(AnsiDialect {}), Box::new(BigQueryDialect {}), Box::new(ClickHouseDialect {}), + Box::new(DuckDbDialect {}), Box::new(GenericDialect {}), // Box::new(HiveDialect {}), Box::new(MsSqlDialect {}), @@ -2053,6 +2056,7 @@ fn parse_array_agg_func() { let supported_dialects = TestedDialects { dialects: vec![ Box::new(GenericDialect {}), + Box::new(DuckDbDialect {}), Box::new(PostgreSqlDialect {}), Box::new(MsSqlDialect {}), Box::new(AnsiDialect {}), @@ -2848,6 +2852,7 @@ fn parse_alter_table_add_column_if_not_exists() { Box::new(PostgreSqlDialect {}), Box::new(BigQueryDialect {}), Box::new(GenericDialect {}), + Box::new(DuckDbDialect {}), ], options: None, }; @@ -6139,6 +6144,7 @@ fn test_placeholder() { let dialects = TestedDialects { dialects: vec![ Box::new(GenericDialect {}), + Box::new(DuckDbDialect {}), Box::new(PostgreSqlDialect {}), Box::new(MsSqlDialect {}), Box::new(AnsiDialect {}), @@ -6873,6 +6879,7 @@ fn parse_non_latin_identifiers() { let supported_dialects = TestedDialects { dialects: vec![ Box::new(GenericDialect {}), + Box::new(DuckDbDialect {}), Box::new(PostgreSqlDialect {}), Box::new(MsSqlDialect {}), Box::new(RedshiftSqlDialect {}), diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs new file mode 100644 index 000000000..1a4f04c33 --- /dev/null +++ b/tests/sqlparser_duckdb.rs @@ -0,0 +1,70 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[macro_use] +mod test_utils; + +use test_utils::*; + +use sqlparser::ast::*; +use sqlparser::dialect::{DuckDbDialect, GenericDialect}; + +fn duckdb() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(DuckDbDialect {})], + options: None, + } +} + +fn duckdb_and_generic() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(DuckDbDialect {}), Box::new(GenericDialect {})], + options: None, + } +} + +#[test] +fn test_select_wildcard_with_exclude() { + let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data"); + let expected = SelectItem::Wildcard(WildcardAdditionalOptions { + opt_exclude: Some(ExcludeSelectItem::Multiple(vec![Ident::new("col_a")])), + ..Default::default() + }); + assert_eq!(expected, select.projection[0]); + + let select = + duckdb().verified_only_select("SELECT name.* EXCLUDE department_id FROM employee_table"); + let expected = SelectItem::QualifiedWildcard( + ObjectName(vec![Ident::new("name")]), + WildcardAdditionalOptions { + opt_exclude: Some(ExcludeSelectItem::Single(Ident::new("department_id"))), + ..Default::default() + }, + ); + assert_eq!(expected, select.projection[0]); + + let select = duckdb() + .verified_only_select("SELECT * EXCLUDE (department_id, employee_id) FROM employee_table"); + let expected = SelectItem::Wildcard(WildcardAdditionalOptions { + opt_exclude: Some(ExcludeSelectItem::Multiple(vec![ + Ident::new("department_id"), + Ident::new("employee_id"), + ])), + ..Default::default() + }); + assert_eq!(expected, select.projection[0]); +} + +#[test] +fn parse_div_infix() { + duckdb_and_generic().verified_stmt(r#"SELECT 5 // 2"#); +}