From 323665ffc5aabf0688e3d699c54ad7db0efad616 Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Tue, 23 Jul 2024 15:36:24 +0530 Subject: [PATCH 1/3] check struct field names for uniqueness --- datafusion/functions/src/core/named_struct.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/datafusion/functions/src/core/named_struct.rs b/datafusion/functions/src/core/named_struct.rs index 8ccda977f3a42..5707e00b35ecb 100644 --- a/datafusion/functions/src/core/named_struct.rs +++ b/datafusion/functions/src/core/named_struct.rs @@ -20,6 +20,7 @@ use arrow::datatypes::{DataType, Field, Fields}; use datafusion_common::{exec_err, internal_err, Result, ScalarValue}; use datafusion_expr::{ColumnarValue, Expr, ExprSchemable}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use hashbrown::HashSet; use std::any::Any; use std::sync::Arc; @@ -45,7 +46,6 @@ fn named_struct_expr(args: &[ColumnarValue]) -> Result { .map(|(i, chunk)| { let name_column = &chunk[0]; - let name = match name_column { ColumnarValue::Scalar(ScalarValue::Utf8(Some(name_scalar))) => name_scalar, _ => return exec_err!("named_struct even arguments must be string literals, got {name_column:?} instead at position {}", i * 2) @@ -57,6 +57,15 @@ fn named_struct_expr(args: &[ColumnarValue]) -> Result { .into_iter() .unzip(); + // Check to enforce the uniqueness of struct field name + let unique_field_names_set = names.iter().collect::>(); + + if unique_field_names_set.len() != names.len() { + return exec_err!("named_struct requires unique field names"); + } + + drop(unique_field_names_set); + let arrays = ColumnarValue::values_to_arrays(&values)?; let fields = names From 3a0badca36809ebfaa43b5ee43e6539a9bf442a4 Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Tue, 23 Jul 2024 17:11:09 +0530 Subject: [PATCH 2/3] add logic test --- datafusion/sqllogictest/test_files/struct.slt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index a7384fd4d8ad6..caa612f556fed 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -122,6 +122,10 @@ physical_plan query error select named_struct(); +# error on duplicate field names +query error +select named_struct('c0': 1, 'c1': 2, 'c1': 3); + # error on odd number of arguments #1 query error DataFusion error: Execution error: named_struct requires an even number of arguments, got 1 instead select named_struct('a'); From 248ca5805c69579b27450ba28c528307ea052237 Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Wed, 24 Jul 2024 11:32:32 +0530 Subject: [PATCH 3/3] improve error log --- datafusion/functions/src/core/named_struct.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/datafusion/functions/src/core/named_struct.rs b/datafusion/functions/src/core/named_struct.rs index 5707e00b35ecb..f71b1b00f0fe6 100644 --- a/datafusion/functions/src/core/named_struct.rs +++ b/datafusion/functions/src/core/named_struct.rs @@ -57,15 +57,19 @@ fn named_struct_expr(args: &[ColumnarValue]) -> Result { .into_iter() .unzip(); - // Check to enforce the uniqueness of struct field name - let unique_field_names_set = names.iter().collect::>(); - - if unique_field_names_set.len() != names.len() { - return exec_err!("named_struct requires unique field names"); + { + // Check to enforce the uniqueness of struct field name + let mut unique_field_names = HashSet::new(); + for name in names.iter() { + if unique_field_names.contains(name) { + return exec_err!( + "named_struct requires unique field names. Field {name} is used more than once." + ); + } + unique_field_names.insert(name); + } } - drop(unique_field_names_set); - let arrays = ColumnarValue::values_to_arrays(&values)?; let fields = names