@@ -46,27 +46,56 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
4646 schema : & DFSchema ,
4747 planner_context : & mut PlannerContext ,
4848 ) -> Result < Expr > {
49- // Workaround for https://github.com/apache/arrow-datafusion/issues/4065
50- //
51- // Minimize stack space required in debug builds to plan
52- // deeply nested binary operators by keeping the stack space
53- // needed for sql_expr_to_logical_expr minimal for BinaryOp
54- //
55- // The reason this reduces stack size in debug builds is
56- // explained in the "Technical Backstory" heading of
57- // https://github.com/apache/arrow-datafusion/pull/1047
58- //
59- // A likely better way to support deeply nested expressions
60- // would be to avoid recursion all together and use an
61- // iterative algorithm.
62- match sql {
63- SQLExpr :: BinaryOp { left, op, right } => {
64- self . parse_sql_binary_op ( * left, op, * right, schema, planner_context)
49+ enum StackEntry {
50+ SQLExpr ( Box < SQLExpr > ) ,
51+ Operator ( Operator ) ,
52+ }
53+
54+ // Virtual stack machine to convert SQLExpr to Expr
55+ // This allows visiting the expr tree in a depth-first manner which
56+ // produces expressions in postfix notations, i.e. `a + b` => `a b +`.
57+ // See https://github.com/apache/arrow-datafusion/issues/1444
58+ let mut stack = vec ! [ StackEntry :: SQLExpr ( Box :: new( sql) ) ] ;
59+ let mut eval_stack = vec ! [ ] ;
60+
61+ while let Some ( entry) = stack. pop ( ) {
62+ match entry {
63+ StackEntry :: SQLExpr ( sql_expr) => {
64+ match * sql_expr {
65+ SQLExpr :: BinaryOp { left, op, right } => {
66+ // Note the order that we push the entries to the stack
67+ // is important. We want to visit the left node first.
68+ let op = self . parse_sql_binary_op ( op) ?;
69+ stack. push ( StackEntry :: Operator ( op) ) ;
70+ stack. push ( StackEntry :: SQLExpr ( right) ) ;
71+ stack. push ( StackEntry :: SQLExpr ( left) ) ;
72+ }
73+ _ => {
74+ let expr = self . sql_expr_to_logical_expr_internal (
75+ * sql_expr,
76+ schema,
77+ planner_context,
78+ ) ?;
79+ eval_stack. push ( expr) ;
80+ }
81+ }
82+ }
83+ StackEntry :: Operator ( op) => {
84+ let right = eval_stack. pop ( ) . unwrap ( ) ;
85+ let left = eval_stack. pop ( ) . unwrap ( ) ;
86+ let expr = Expr :: BinaryExpr ( BinaryExpr :: new (
87+ Box :: new ( left) ,
88+ op,
89+ Box :: new ( right) ,
90+ ) ) ;
91+ eval_stack. push ( expr) ;
92+ }
6593 }
66- // since this function requires more space per frame
67- // avoid calling it for binary ops
68- _ => self . sql_expr_to_logical_expr_internal ( sql, schema, planner_context) ,
6994 }
95+
96+ assert_eq ! ( 1 , eval_stack. len( ) ) ;
97+ let expr = eval_stack. pop ( ) . unwrap ( ) ;
98+ Ok ( expr)
7099 }
71100
72101 /// Generate a relational expression from a SQL expression
@@ -574,3 +603,124 @@ fn plan_indexed(expr: Expr, mut keys: Vec<SQLExpr>) -> Result<Expr> {
574603 plan_key ( key) ?,
575604 ) ) )
576605}
606+
607+ #[ cfg( test) ]
608+ mod tests {
609+ use super :: * ;
610+
611+ use std:: collections:: HashMap ;
612+ use std:: sync:: Arc ;
613+
614+ use arrow:: datatypes:: { DataType , Field , Schema } ;
615+ use sqlparser:: dialect:: GenericDialect ;
616+ use sqlparser:: parser:: Parser ;
617+
618+ use datafusion_common:: config:: ConfigOptions ;
619+ use datafusion_expr:: logical_plan:: builder:: LogicalTableSource ;
620+ use datafusion_expr:: { AggregateUDF , ScalarUDF , TableSource } ;
621+
622+ use crate :: TableReference ;
623+
624+ struct TestSchemaProvider {
625+ options : ConfigOptions ,
626+ tables : HashMap < String , Arc < dyn TableSource > > ,
627+ }
628+
629+ impl TestSchemaProvider {
630+ pub fn new ( ) -> Self {
631+ let mut tables = HashMap :: new ( ) ;
632+ tables. insert (
633+ "table1" . to_string ( ) ,
634+ create_table_source ( vec ! [ Field :: new(
635+ "column1" . to_string( ) ,
636+ DataType :: Utf8 ,
637+ false ,
638+ ) ] ) ,
639+ ) ;
640+
641+ Self {
642+ options : Default :: default ( ) ,
643+ tables,
644+ }
645+ }
646+ }
647+
648+ impl ContextProvider for TestSchemaProvider {
649+ fn get_table_provider (
650+ & self ,
651+ name : TableReference ,
652+ ) -> Result < Arc < dyn TableSource > > {
653+ match self . tables . get ( name. table ( ) ) {
654+ Some ( table) => Ok ( table. clone ( ) ) ,
655+ _ => Err ( DataFusionError :: Plan ( format ! (
656+ "Table not found: {}" ,
657+ name. table( )
658+ ) ) ) ,
659+ }
660+ }
661+
662+ fn get_function_meta ( & self , _name : & str ) -> Option < Arc < ScalarUDF > > {
663+ None
664+ }
665+
666+ fn get_aggregate_meta ( & self , _name : & str ) -> Option < Arc < AggregateUDF > > {
667+ None
668+ }
669+
670+ fn get_variable_type ( & self , _variable_names : & [ String ] ) -> Option < DataType > {
671+ None
672+ }
673+
674+ fn options ( & self ) -> & ConfigOptions {
675+ & self . options
676+ }
677+ }
678+
679+ fn create_table_source ( fields : Vec < Field > ) -> Arc < dyn TableSource > {
680+ Arc :: new ( LogicalTableSource :: new ( Arc :: new (
681+ Schema :: new_with_metadata ( fields, HashMap :: new ( ) ) ,
682+ ) ) )
683+ }
684+
685+ macro_rules! test_stack_overflow {
686+ ( $num_expr: expr) => {
687+ paste:: item! {
688+ #[ test]
689+ fn [ <test_stack_overflow_ $num_expr>] ( ) {
690+ let schema = DFSchema :: empty( ) ;
691+ let mut planner_context = PlannerContext :: default ( ) ;
692+
693+ let expr_str = ( 0 ..$num_expr)
694+ . map( |i| format!( "column1 = 'value{:?}'" , i) )
695+ . collect:: <Vec <String >>( )
696+ . join( " OR " ) ;
697+
698+ let dialect = GenericDialect { } ;
699+ let mut parser = Parser :: new( & dialect)
700+ . try_with_sql( expr_str. as_str( ) )
701+ . unwrap( ) ;
702+ let sql_expr = parser. parse_expr( ) . unwrap( ) ;
703+
704+ let schema_provider = TestSchemaProvider :: new( ) ;
705+ let sql_to_rel = SqlToRel :: new( & schema_provider) ;
706+
707+ // Should not stack overflow
708+ sql_to_rel. sql_expr_to_logical_expr(
709+ sql_expr,
710+ & schema,
711+ & mut planner_context,
712+ ) . unwrap( ) ;
713+ }
714+ }
715+ } ;
716+ }
717+
718+ test_stack_overflow ! ( 64 ) ;
719+ test_stack_overflow ! ( 128 ) ;
720+ test_stack_overflow ! ( 256 ) ;
721+ test_stack_overflow ! ( 512 ) ;
722+ test_stack_overflow ! ( 1024 ) ;
723+ test_stack_overflow ! ( 2048 ) ;
724+ test_stack_overflow ! ( 4096 ) ;
725+ test_stack_overflow ! ( 8192 ) ;
726+ }
0 commit comments