From 728aa6d7b50578e040626f29bfbfaff47fa577bb Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Sun, 9 Mar 2025 14:50:50 +0800 Subject: [PATCH 1/6] Config: Add support default sql varchar to view types --- datafusion/common/src/config.rs | 5 +++ .../core/src/execution/session_state.rs | 2 + datafusion/core/tests/dataframe/mod.rs | 40 +++++++++++++++++++ datafusion/sql/src/planner.rs | 18 ++++++++- datafusion/sql/tests/sql_integration.rs | 2 + 5 files changed, 66 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 8c093a9db899e..2a9a173f08391 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -252,6 +252,11 @@ config_namespace! { /// string length and thus DataFusion can not enforce such limits. pub support_varchar_with_length: bool, default = true + /// If true, permit `VARCHAR` default convert to `Utf8View` in the logical plan. + /// If false, `VARCHAR` will be converted to `Utf8` in the logical plan. + /// Default is false. + pub support_varchar_to_view_types: bool, default = false + /// When set to true, the source locations relative to the original SQL /// query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected /// and recorded in the logical plan nodes. diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index 0e83156ab53f1..bff115cedf1ee 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -489,6 +489,8 @@ impl SessionState { enable_options_value_normalization: sql_parser_options .enable_options_value_normalization, support_varchar_with_length: sql_parser_options.support_varchar_with_length, + support_sql_varchar_to_view_types: sql_parser_options + .support_varchar_to_view_types, collect_spans: sql_parser_options.collect_spans, } } diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index 43428d6846a17..e258d3d5e0645 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -5673,3 +5673,43 @@ async fn test_fill_null_all_columns() -> Result<()> { assert_batches_sorted_eq!(expected, &results); Ok(()) } + +#[tokio::test] +async fn test_sql_support_sql_to_view_types() -> Result<()> { + // Test that the default behaviour is false + let cfg = SessionConfig::new(); + let ctx = SessionContext::new_with_config(cfg); + + ctx.sql("CREATE TABLE t (a varchar)").await?; + + let df = ctx.sql("describe t").await?.collect().await?; + let expected = [ + "+-------------+-----------+-------------+", + "| column_name | data_type | is_nullable |", + "+-------------+-----------+-------------+", + "| a | Utf8 | YES |", + "+-------------+-----------+-------------+", + ]; + + assert_batches_eq!(expected, &df); + + // Test that the behaviour can be changed to true which will return Utf8View + let mut cfg = SessionConfig::new(); + let opts = cfg.options_mut(); + opts.sql_parser.support_varchar_to_view_types = true; + let ctx = SessionContext::new_with_config(cfg); + + ctx.sql("CREATE TABLE t (a varchar)").await?; + + let df = ctx.sql("describe t").await?.collect().await?; + let expected = [ + "+-------------+-----------+-------------+", + "| column_name | data_type | is_nullable |", + "+-------------+-----------+-------------+", + "| a | Utf8View | YES |", + "+-------------+-----------+-------------+", + ]; + + assert_batches_eq!(expected, &df); + Ok(()) +} diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index bc7c2b7f43771..ca4f2f83677f0 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -54,6 +54,8 @@ pub struct ParserOptions { pub enable_options_value_normalization: bool, /// Whether to collect spans pub collect_spans: bool, + /// Whether support sql varchar to view types + pub support_sql_varchar_to_view_types: bool, } impl ParserOptions { @@ -72,6 +74,7 @@ impl ParserOptions { parse_float_as_decimal: false, enable_ident_normalization: true, support_varchar_with_length: true, + support_sql_varchar_to_view_types: false, enable_options_value_normalization: false, collect_spans: false, } @@ -111,6 +114,12 @@ impl ParserOptions { self } + /// Sets the `support_sql_varchar_to_view_types` option. + pub fn with_support_sql_varchar_to_view_types(mut self, value: bool) -> Self { + self.support_sql_varchar_to_view_types = value; + self + } + /// Sets the `enable_options_value_normalization` option. pub fn with_enable_options_value_normalization(mut self, value: bool) -> Self { self.enable_options_value_normalization = value; @@ -136,6 +145,7 @@ impl From<&SqlParserOptions> for ParserOptions { parse_float_as_decimal: options.parse_float_as_decimal, enable_ident_normalization: options.enable_ident_normalization, support_varchar_with_length: options.support_varchar_with_length, + support_sql_varchar_to_view_types: options.support_varchar_to_view_types, enable_options_value_normalization: options .enable_options_value_normalization, collect_spans: options.collect_spans, @@ -558,7 +568,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLDataType::Varchar(length) => { match (length, self.options.support_varchar_with_length) { (Some(_), false) => plan_err!("does not support Varchar with length, please set `support_varchar_with_length` to be true"), - _ => Ok(DataType::Utf8), + _ => { + if self.options.support_sql_varchar_to_view_types { + Ok(DataType::Utf8View) + } else { + Ok(DataType::Utf8) + } + } } } SQLDataType::UnsignedBigInt(_) | SQLDataType::UnsignedInt8(_) => Ok(DataType::UInt64), diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 1df18302687ec..289cc362d7faa 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -92,6 +92,7 @@ fn parse_decimals() { parse_float_as_decimal: true, enable_ident_normalization: false, support_varchar_with_length: false, + support_sql_varchar_to_view_types: false, enable_options_value_normalization: false, collect_spans: false, }, @@ -148,6 +149,7 @@ fn parse_ident_normalization() { parse_float_as_decimal: false, enable_ident_normalization, support_varchar_with_length: false, + support_sql_varchar_to_view_types: false, enable_options_value_normalization: false, collect_spans: false, }, From a1258e2e1e41fdb209868d33917fd69666c78473 Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Sun, 9 Mar 2025 16:33:26 +0800 Subject: [PATCH 2/6] Fix test --- datafusion/sqllogictest/test_files/information_schema.slt | 1 + docs/source/user-guide/configs.md | 1 + 2 files changed, 2 insertions(+) diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 454055b53930e..63b1a7a952250 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -265,6 +265,7 @@ datafusion.sql_parser.enable_ident_normalization true datafusion.sql_parser.enable_options_value_normalization false datafusion.sql_parser.parse_float_as_decimal false datafusion.sql_parser.recursion_limit 50 +datafusion.sql_parser.support_varchar_to_view_types false datafusion.sql_parser.support_varchar_with_length true # show all variables with verbose diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 635eb2b0a67fd..4aa6aca68b8a8 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -128,5 +128,6 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.sql_parser.enable_options_value_normalization | false | When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. | | datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks. | | datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. | +| datafusion.sql_parser.support_varchar_to_view_types | false | If true, permit `VARCHAR` default convert to `Utf8View` in the logical plan. If false, `VARCHAR` will be converted to `Utf8` in the logical plan. Default is false. | | datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. | | datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries | From 86433db30ee1ea302a1f6cdd664fb4d3f2154034 Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Sun, 9 Mar 2025 18:30:56 +0800 Subject: [PATCH 3/6] fix test --- datafusion/sqllogictest/test_files/information_schema.slt | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 63b1a7a952250..da827999487b9 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -364,6 +364,7 @@ datafusion.sql_parser.enable_ident_normalization true When set to true, SQL pars datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type datafusion.sql_parser.recursion_limit 50 Specifies the recursion depth limit when parsing complex SQL Queries +datafusion.sql_parser.support_varchar_to_view_types false If true, permit `VARCHAR` default convert to `Utf8View` in the logical plan. If false, `VARCHAR` will be converted to `Utf8` in the logical plan. Default is false. datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. # show_variable_in_config_options From 2d3d83bd70d162d50c21fb84d6200a773172ca0a Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Sun, 9 Mar 2025 22:28:05 +0800 Subject: [PATCH 4/6] Address comments --- datafusion/common/src/config.rs | 8 ++-- .../core/src/execution/session_state.rs | 3 +- datafusion/core/tests/dataframe/mod.rs | 40 ------------------- datafusion/sql/src/planner.rs | 16 ++++---- datafusion/sql/tests/sql_integration.rs | 4 +- datafusion/sqllogictest/test_files/ddl.slt | 28 +++++++++++++ .../test_files/information_schema.slt | 4 +- docs/source/user-guide/configs.md | 2 +- 8 files changed, 46 insertions(+), 59 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 2a9a173f08391..ed61a23f0c016 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -252,10 +252,10 @@ config_namespace! { /// string length and thus DataFusion can not enforce such limits. pub support_varchar_with_length: bool, default = true - /// If true, permit `VARCHAR` default convert to `Utf8View` in the logical plan. - /// If false, `VARCHAR` will be converted to `Utf8` in the logical plan. - /// Default is false. - pub support_varchar_to_view_types: bool, default = false + /// If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. + /// If false, `VARCHAR` is mapped to `Utf8` during SQL planning. + /// Default is false. + pub default_varchar_views: bool, default = false /// When set to true, the source locations relative to the original SQL /// query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index bff115cedf1ee..c92e9c964bfc4 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -489,8 +489,7 @@ impl SessionState { enable_options_value_normalization: sql_parser_options .enable_options_value_normalization, support_varchar_with_length: sql_parser_options.support_varchar_with_length, - support_sql_varchar_to_view_types: sql_parser_options - .support_varchar_to_view_types, + default_varchar_views: sql_parser_options.default_varchar_views, collect_spans: sql_parser_options.collect_spans, } } diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index e258d3d5e0645..43428d6846a17 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -5673,43 +5673,3 @@ async fn test_fill_null_all_columns() -> Result<()> { assert_batches_sorted_eq!(expected, &results); Ok(()) } - -#[tokio::test] -async fn test_sql_support_sql_to_view_types() -> Result<()> { - // Test that the default behaviour is false - let cfg = SessionConfig::new(); - let ctx = SessionContext::new_with_config(cfg); - - ctx.sql("CREATE TABLE t (a varchar)").await?; - - let df = ctx.sql("describe t").await?.collect().await?; - let expected = [ - "+-------------+-----------+-------------+", - "| column_name | data_type | is_nullable |", - "+-------------+-----------+-------------+", - "| a | Utf8 | YES |", - "+-------------+-----------+-------------+", - ]; - - assert_batches_eq!(expected, &df); - - // Test that the behaviour can be changed to true which will return Utf8View - let mut cfg = SessionConfig::new(); - let opts = cfg.options_mut(); - opts.sql_parser.support_varchar_to_view_types = true; - let ctx = SessionContext::new_with_config(cfg); - - ctx.sql("CREATE TABLE t (a varchar)").await?; - - let df = ctx.sql("describe t").await?.collect().await?; - let expected = [ - "+-------------+-----------+-------------+", - "| column_name | data_type | is_nullable |", - "+-------------+-----------+-------------+", - "| a | Utf8View | YES |", - "+-------------+-----------+-------------+", - ]; - - assert_batches_eq!(expected, &df); - Ok(()) -} diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index ca4f2f83677f0..77b0a0bf9b689 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -54,8 +54,8 @@ pub struct ParserOptions { pub enable_options_value_normalization: bool, /// Whether to collect spans pub collect_spans: bool, - /// Whether support sql varchar to view types - pub support_sql_varchar_to_view_types: bool, + /// Whether `VARCHAR` is mapped to `Utf8View` during SQL planning. + pub default_varchar_views: bool, } impl ParserOptions { @@ -74,7 +74,7 @@ impl ParserOptions { parse_float_as_decimal: false, enable_ident_normalization: true, support_varchar_with_length: true, - support_sql_varchar_to_view_types: false, + default_varchar_views: false, enable_options_value_normalization: false, collect_spans: false, } @@ -114,9 +114,9 @@ impl ParserOptions { self } - /// Sets the `support_sql_varchar_to_view_types` option. - pub fn with_support_sql_varchar_to_view_types(mut self, value: bool) -> Self { - self.support_sql_varchar_to_view_types = value; + /// Sets the `default_varchar_views` option. + pub fn with_default_varchar_views(mut self, value: bool) -> Self { + self.default_varchar_views = value; self } @@ -145,7 +145,7 @@ impl From<&SqlParserOptions> for ParserOptions { parse_float_as_decimal: options.parse_float_as_decimal, enable_ident_normalization: options.enable_ident_normalization, support_varchar_with_length: options.support_varchar_with_length, - support_sql_varchar_to_view_types: options.support_varchar_to_view_types, + default_varchar_views: options.default_varchar_views, enable_options_value_normalization: options .enable_options_value_normalization, collect_spans: options.collect_spans, @@ -569,7 +569,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { match (length, self.options.support_varchar_with_length) { (Some(_), false) => plan_err!("does not support Varchar with length, please set `support_varchar_with_length` to be true"), _ => { - if self.options.support_sql_varchar_to_view_types { + if self.options.default_varchar_views { Ok(DataType::Utf8View) } else { Ok(DataType::Utf8) diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 289cc362d7faa..04a5f4ba40b8c 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -92,7 +92,7 @@ fn parse_decimals() { parse_float_as_decimal: true, enable_ident_normalization: false, support_varchar_with_length: false, - support_sql_varchar_to_view_types: false, + default_varchar_views: false, enable_options_value_normalization: false, collect_spans: false, }, @@ -149,7 +149,7 @@ fn parse_ident_normalization() { parse_float_as_decimal: false, enable_ident_normalization, support_varchar_with_length: false, - support_sql_varchar_to_view_types: false, + default_varchar_views: false, enable_options_value_normalization: false, collect_spans: false, }, diff --git a/datafusion/sqllogictest/test_files/ddl.slt b/datafusion/sqllogictest/test_files/ddl.slt index 6f75a7d7f8fdb..76ee27bff7478 100644 --- a/datafusion/sqllogictest/test_files/ddl.slt +++ b/datafusion/sqllogictest/test_files/ddl.slt @@ -827,3 +827,31 @@ drop table table_with_pk; statement ok set datafusion.catalog.information_schema = false; + +# Test VARCHAR is mapped to Utf8View during SQL planning when setting default_varchar_views to true +statement ok +CREATE TABLE t1(c1 VARCHAR(10) NOT NULL, c2 VARCHAR); + +query TTT +DESCRIBE t1; +---- +c1 Utf8 NO +c2 Utf8 YES + +statement ok +set datafusion.sql_parser.default_varchar_views = true; + +statement ok +CREATE TABLE t2(c1 VARCHAR(10) NOT NULL, c2 VARCHAR); + +query TTT +DESCRIBE t2; +---- +c1 Utf8View NO +c2 Utf8View YES + +statement ok +DROP TABLE t1; + +statement ok +DROP TABLE t2; diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index da827999487b9..62b9a9b523f9d 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -260,12 +260,12 @@ datafusion.optimizer.repartition_windows true datafusion.optimizer.skip_failed_rules false datafusion.optimizer.top_down_join_key_reordering true datafusion.sql_parser.collect_spans false +datafusion.sql_parser.default_varchar_views false datafusion.sql_parser.dialect generic datafusion.sql_parser.enable_ident_normalization true datafusion.sql_parser.enable_options_value_normalization false datafusion.sql_parser.parse_float_as_decimal false datafusion.sql_parser.recursion_limit 50 -datafusion.sql_parser.support_varchar_to_view_types false datafusion.sql_parser.support_varchar_with_length true # show all variables with verbose @@ -359,12 +359,12 @@ datafusion.optimizer.repartition_windows true Should DataFusion repartition data datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. +datafusion.sql_parser.default_varchar_views false If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false. datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks. datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type datafusion.sql_parser.recursion_limit 50 Specifies the recursion depth limit when parsing complex SQL Queries -datafusion.sql_parser.support_varchar_to_view_types false If true, permit `VARCHAR` default convert to `Utf8View` in the logical plan. If false, `VARCHAR` will be converted to `Utf8` in the logical plan. Default is false. datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. # show_variable_in_config_options diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 4aa6aca68b8a8..433f9c10da118 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -128,6 +128,6 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.sql_parser.enable_options_value_normalization | false | When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. | | datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks. | | datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. | -| datafusion.sql_parser.support_varchar_to_view_types | false | If true, permit `VARCHAR` default convert to `Utf8View` in the logical plan. If false, `VARCHAR` will be converted to `Utf8` in the logical plan. Default is false. | +| datafusion.sql_parser.default_varchar_views | false | If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false. | | datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. | | datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries | From 07a6d9bddd1bc1496e2128bb3e700f421c81dbd5 Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Tue, 11 Mar 2025 23:07:05 +0800 Subject: [PATCH 5/6] Address comments --- datafusion/common/src/config.rs | 2 +- datafusion/core/src/execution/session_state.rs | 2 +- datafusion/sql/src/planner.rs | 14 +++++++------- datafusion/sql/tests/sql_integration.rs | 4 ++-- datafusion/sqllogictest/test_files/ddl.slt | 4 ++-- .../sqllogictest/test_files/information_schema.slt | 4 ++-- docs/source/user-guide/configs.md | 2 +- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index ed61a23f0c016..b0f17630c910c 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -255,7 +255,7 @@ config_namespace! { /// If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. /// If false, `VARCHAR` is mapped to `Utf8` during SQL planning. /// Default is false. - pub default_varchar_views: bool, default = false + pub map_varchar_to_utf8view: bool, default = false /// When set to true, the source locations relative to the original SQL /// query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index c92e9c964bfc4..f4b0fd0c125ff 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -489,7 +489,7 @@ impl SessionState { enable_options_value_normalization: sql_parser_options .enable_options_value_normalization, support_varchar_with_length: sql_parser_options.support_varchar_with_length, - default_varchar_views: sql_parser_options.default_varchar_views, + map_varchar_to_utf8view: sql_parser_options.map_varchar_to_utf8view, collect_spans: sql_parser_options.collect_spans, } } diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 77b0a0bf9b689..daaf70f953047 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -55,7 +55,7 @@ pub struct ParserOptions { /// Whether to collect spans pub collect_spans: bool, /// Whether `VARCHAR` is mapped to `Utf8View` during SQL planning. - pub default_varchar_views: bool, + pub map_varchar_to_utf8view: bool, } impl ParserOptions { @@ -74,7 +74,7 @@ impl ParserOptions { parse_float_as_decimal: false, enable_ident_normalization: true, support_varchar_with_length: true, - default_varchar_views: false, + map_varchar_to_utf8view: false, enable_options_value_normalization: false, collect_spans: false, } @@ -114,9 +114,9 @@ impl ParserOptions { self } - /// Sets the `default_varchar_views` option. - pub fn with_default_varchar_views(mut self, value: bool) -> Self { - self.default_varchar_views = value; + /// Sets the `map_varchar_to_utf8view` option. + pub fn with_map_varchar_to_utf8view(mut self, value: bool) -> Self { + self.map_varchar_to_utf8view = value; self } @@ -145,7 +145,7 @@ impl From<&SqlParserOptions> for ParserOptions { parse_float_as_decimal: options.parse_float_as_decimal, enable_ident_normalization: options.enable_ident_normalization, support_varchar_with_length: options.support_varchar_with_length, - default_varchar_views: options.default_varchar_views, + map_varchar_to_utf8view: options.map_varchar_to_utf8view, enable_options_value_normalization: options .enable_options_value_normalization, collect_spans: options.collect_spans, @@ -569,7 +569,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { match (length, self.options.support_varchar_with_length) { (Some(_), false) => plan_err!("does not support Varchar with length, please set `support_varchar_with_length` to be true"), _ => { - if self.options.default_varchar_views { + if self.options.map_varchar_to_utf8view { Ok(DataType::Utf8View) } else { Ok(DataType::Utf8) diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 04a5f4ba40b8c..b98763a9d6b57 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -92,7 +92,7 @@ fn parse_decimals() { parse_float_as_decimal: true, enable_ident_normalization: false, support_varchar_with_length: false, - default_varchar_views: false, + map_varchar_to_utf8view: false, enable_options_value_normalization: false, collect_spans: false, }, @@ -149,7 +149,7 @@ fn parse_ident_normalization() { parse_float_as_decimal: false, enable_ident_normalization, support_varchar_with_length: false, - default_varchar_views: false, + map_varchar_to_utf8view: false, enable_options_value_normalization: false, collect_spans: false, }, diff --git a/datafusion/sqllogictest/test_files/ddl.slt b/datafusion/sqllogictest/test_files/ddl.slt index 76ee27bff7478..bc15f2210380e 100644 --- a/datafusion/sqllogictest/test_files/ddl.slt +++ b/datafusion/sqllogictest/test_files/ddl.slt @@ -828,7 +828,7 @@ drop table table_with_pk; statement ok set datafusion.catalog.information_schema = false; -# Test VARCHAR is mapped to Utf8View during SQL planning when setting default_varchar_views to true +# Test VARCHAR is mapped to Utf8View during SQL planning when setting map_varchar_to_utf8view to true statement ok CREATE TABLE t1(c1 VARCHAR(10) NOT NULL, c2 VARCHAR); @@ -839,7 +839,7 @@ c1 Utf8 NO c2 Utf8 YES statement ok -set datafusion.sql_parser.default_varchar_views = true; +set datafusion.sql_parser.map_varchar_to_utf8view = true; statement ok CREATE TABLE t2(c1 VARCHAR(10) NOT NULL, c2 VARCHAR); diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 62b9a9b523f9d..c8861f65e5e8e 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -260,7 +260,7 @@ datafusion.optimizer.repartition_windows true datafusion.optimizer.skip_failed_rules false datafusion.optimizer.top_down_join_key_reordering true datafusion.sql_parser.collect_spans false -datafusion.sql_parser.default_varchar_views false +datafusion.sql_parser.map_varchar_to_utf8view false datafusion.sql_parser.dialect generic datafusion.sql_parser.enable_ident_normalization true datafusion.sql_parser.enable_options_value_normalization false @@ -359,7 +359,7 @@ datafusion.optimizer.repartition_windows true Should DataFusion repartition data datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. -datafusion.sql_parser.default_varchar_views false If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false. +datafusion.sql_parser.map_varchar_to_utf8view false If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false. datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks. datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 433f9c10da118..b6b53cfe49b3b 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -128,6 +128,6 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.sql_parser.enable_options_value_normalization | false | When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. | | datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks. | | datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. | -| datafusion.sql_parser.default_varchar_views | false | If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false. | +| datafusion.sql_parser.map_varchar_to_utf8view | false | If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false. | | datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. | | datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries | From 7d0c9b09ac0bcff24afc60c833cf08164f0e046b Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Tue, 11 Mar 2025 23:31:51 +0800 Subject: [PATCH 6/6] Fix slt test --- datafusion/sqllogictest/test_files/information_schema.slt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index c8861f65e5e8e..496f24abf6ed7 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -260,10 +260,10 @@ datafusion.optimizer.repartition_windows true datafusion.optimizer.skip_failed_rules false datafusion.optimizer.top_down_join_key_reordering true datafusion.sql_parser.collect_spans false -datafusion.sql_parser.map_varchar_to_utf8view false datafusion.sql_parser.dialect generic datafusion.sql_parser.enable_ident_normalization true datafusion.sql_parser.enable_options_value_normalization false +datafusion.sql_parser.map_varchar_to_utf8view false datafusion.sql_parser.parse_float_as_decimal false datafusion.sql_parser.recursion_limit 50 datafusion.sql_parser.support_varchar_with_length true @@ -359,10 +359,10 @@ datafusion.optimizer.repartition_windows true Should DataFusion repartition data datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. -datafusion.sql_parser.map_varchar_to_utf8view false If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false. datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks. datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. +datafusion.sql_parser.map_varchar_to_utf8view false If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false. datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type datafusion.sql_parser.recursion_limit 50 Specifies the recursion depth limit when parsing complex SQL Queries datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.