Skip to content

Commit 7c6b41a

Browse files
aprimadialamb
andauthored
Switch to non-recursive on heap virtual stack when building logical plan from SQL expression (#6360)
* Adding stack overflow test * Implement heap based non-recursive visitor * Fix clippy * Update datafusion/sql/src/expr/mod.rs Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org> * Update datafusion/sql/src/expr/mod.rs Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org> --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent c24830a commit 7c6b41a

3 files changed

Lines changed: 177 additions & 39 deletions

File tree

datafusion/sql/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,4 +47,5 @@ sqlparser = "0.33"
4747
[dev-dependencies]
4848
ctor = "0.2.0"
4949
env_logger = "0.10"
50+
paste = "^1.0"
5051
rstest = "0.17"

datafusion/sql/src/expr/binary_op.rs

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,14 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
19-
use datafusion_common::{DFSchema, DataFusionError, Result};
20-
use datafusion_expr::{BinaryExpr, Expr, Operator};
21-
use sqlparser::ast::{BinaryOperator, Expr as SQLExpr};
18+
use crate::planner::{ContextProvider, SqlToRel};
19+
use datafusion_common::{DataFusionError, Result};
20+
use datafusion_expr::Operator;
21+
use sqlparser::ast::BinaryOperator;
2222

2323
impl<'a, S: ContextProvider> SqlToRel<'a, S> {
24-
pub(crate) fn parse_sql_binary_op(
25-
&self,
26-
left: SQLExpr,
27-
op: BinaryOperator,
28-
right: SQLExpr,
29-
schema: &DFSchema,
30-
planner_context: &mut PlannerContext,
31-
) -> Result<Expr> {
32-
let operator = match op {
24+
pub(crate) fn parse_sql_binary_op(&self, op: BinaryOperator) -> Result<Operator> {
25+
match op {
3326
BinaryOperator::Gt => Ok(Operator::Gt),
3427
BinaryOperator::GtEq => Ok(Operator::GtEq),
3528
BinaryOperator::Lt => Ok(Operator::Lt),
@@ -56,12 +49,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
5649
_ => Err(DataFusionError::NotImplemented(format!(
5750
"Unsupported SQL binary operator {op:?}"
5851
))),
59-
}?;
60-
61-
Ok(Expr::BinaryExpr(BinaryExpr::new(
62-
Box::new(self.sql_expr_to_logical_expr(left, schema, planner_context)?),
63-
operator,
64-
Box::new(self.sql_expr_to_logical_expr(right, schema, planner_context)?),
65-
)))
52+
}
6653
}
6754
}

datafusion/sql/src/expr/mod.rs

Lines changed: 169 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -46,27 +46,56 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
4646
schema: &DFSchema,
4747
planner_context: &mut PlannerContext,
4848
) -> Result<Expr> {
49-
// Workaround for https://github.com/apache/arrow-datafusion/issues/4065
50-
//
51-
// Minimize stack space required in debug builds to plan
52-
// deeply nested binary operators by keeping the stack space
53-
// needed for sql_expr_to_logical_expr minimal for BinaryOp
54-
//
55-
// The reason this reduces stack size in debug builds is
56-
// explained in the "Technical Backstory" heading of
57-
// https://github.com/apache/arrow-datafusion/pull/1047
58-
//
59-
// A likely better way to support deeply nested expressions
60-
// would be to avoid recursion all together and use an
61-
// iterative algorithm.
62-
match sql {
63-
SQLExpr::BinaryOp { left, op, right } => {
64-
self.parse_sql_binary_op(*left, op, *right, schema, planner_context)
49+
enum StackEntry {
50+
SQLExpr(Box<SQLExpr>),
51+
Operator(Operator),
52+
}
53+
54+
// Virtual stack machine to convert SQLExpr to Expr
55+
// This allows visiting the expr tree in a depth-first manner which
56+
// produces expressions in postfix notations, i.e. `a + b` => `a b +`.
57+
// See https://github.com/apache/arrow-datafusion/issues/1444
58+
let mut stack = vec![StackEntry::SQLExpr(Box::new(sql))];
59+
let mut eval_stack = vec![];
60+
61+
while let Some(entry) = stack.pop() {
62+
match entry {
63+
StackEntry::SQLExpr(sql_expr) => {
64+
match *sql_expr {
65+
SQLExpr::BinaryOp { left, op, right } => {
66+
// Note the order that we push the entries to the stack
67+
// is important. We want to visit the left node first.
68+
let op = self.parse_sql_binary_op(op)?;
69+
stack.push(StackEntry::Operator(op));
70+
stack.push(StackEntry::SQLExpr(right));
71+
stack.push(StackEntry::SQLExpr(left));
72+
}
73+
_ => {
74+
let expr = self.sql_expr_to_logical_expr_internal(
75+
*sql_expr,
76+
schema,
77+
planner_context,
78+
)?;
79+
eval_stack.push(expr);
80+
}
81+
}
82+
}
83+
StackEntry::Operator(op) => {
84+
let right = eval_stack.pop().unwrap();
85+
let left = eval_stack.pop().unwrap();
86+
let expr = Expr::BinaryExpr(BinaryExpr::new(
87+
Box::new(left),
88+
op,
89+
Box::new(right),
90+
));
91+
eval_stack.push(expr);
92+
}
6593
}
66-
// since this function requires more space per frame
67-
// avoid calling it for binary ops
68-
_ => self.sql_expr_to_logical_expr_internal(sql, schema, planner_context),
6994
}
95+
96+
assert_eq!(1, eval_stack.len());
97+
let expr = eval_stack.pop().unwrap();
98+
Ok(expr)
7099
}
71100

72101
/// Generate a relational expression from a SQL expression
@@ -574,3 +603,124 @@ fn plan_indexed(expr: Expr, mut keys: Vec<SQLExpr>) -> Result<Expr> {
574603
plan_key(key)?,
575604
)))
576605
}
606+
607+
#[cfg(test)]
608+
mod tests {
609+
use super::*;
610+
611+
use std::collections::HashMap;
612+
use std::sync::Arc;
613+
614+
use arrow::datatypes::{DataType, Field, Schema};
615+
use sqlparser::dialect::GenericDialect;
616+
use sqlparser::parser::Parser;
617+
618+
use datafusion_common::config::ConfigOptions;
619+
use datafusion_expr::logical_plan::builder::LogicalTableSource;
620+
use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource};
621+
622+
use crate::TableReference;
623+
624+
struct TestSchemaProvider {
625+
options: ConfigOptions,
626+
tables: HashMap<String, Arc<dyn TableSource>>,
627+
}
628+
629+
impl TestSchemaProvider {
630+
pub fn new() -> Self {
631+
let mut tables = HashMap::new();
632+
tables.insert(
633+
"table1".to_string(),
634+
create_table_source(vec![Field::new(
635+
"column1".to_string(),
636+
DataType::Utf8,
637+
false,
638+
)]),
639+
);
640+
641+
Self {
642+
options: Default::default(),
643+
tables,
644+
}
645+
}
646+
}
647+
648+
impl ContextProvider for TestSchemaProvider {
649+
fn get_table_provider(
650+
&self,
651+
name: TableReference,
652+
) -> Result<Arc<dyn TableSource>> {
653+
match self.tables.get(name.table()) {
654+
Some(table) => Ok(table.clone()),
655+
_ => Err(DataFusionError::Plan(format!(
656+
"Table not found: {}",
657+
name.table()
658+
))),
659+
}
660+
}
661+
662+
fn get_function_meta(&self, _name: &str) -> Option<Arc<ScalarUDF>> {
663+
None
664+
}
665+
666+
fn get_aggregate_meta(&self, _name: &str) -> Option<Arc<AggregateUDF>> {
667+
None
668+
}
669+
670+
fn get_variable_type(&self, _variable_names: &[String]) -> Option<DataType> {
671+
None
672+
}
673+
674+
fn options(&self) -> &ConfigOptions {
675+
&self.options
676+
}
677+
}
678+
679+
fn create_table_source(fields: Vec<Field>) -> Arc<dyn TableSource> {
680+
Arc::new(LogicalTableSource::new(Arc::new(
681+
Schema::new_with_metadata(fields, HashMap::new()),
682+
)))
683+
}
684+
685+
macro_rules! test_stack_overflow {
686+
($num_expr:expr) => {
687+
paste::item! {
688+
#[test]
689+
fn [<test_stack_overflow_ $num_expr>]() {
690+
let schema = DFSchema::empty();
691+
let mut planner_context = PlannerContext::default();
692+
693+
let expr_str = (0..$num_expr)
694+
.map(|i| format!("column1 = 'value{:?}'", i))
695+
.collect::<Vec<String>>()
696+
.join(" OR ");
697+
698+
let dialect = GenericDialect{};
699+
let mut parser = Parser::new(&dialect)
700+
.try_with_sql(expr_str.as_str())
701+
.unwrap();
702+
let sql_expr = parser.parse_expr().unwrap();
703+
704+
let schema_provider = TestSchemaProvider::new();
705+
let sql_to_rel = SqlToRel::new(&schema_provider);
706+
707+
// Should not stack overflow
708+
sql_to_rel.sql_expr_to_logical_expr(
709+
sql_expr,
710+
&schema,
711+
&mut planner_context,
712+
).unwrap();
713+
}
714+
}
715+
};
716+
}
717+
718+
test_stack_overflow!(64);
719+
test_stack_overflow!(128);
720+
test_stack_overflow!(256);
721+
test_stack_overflow!(512);
722+
test_stack_overflow!(1024);
723+
test_stack_overflow!(2048);
724+
test_stack_overflow!(4096);
725+
test_stack_overflow!(8192);
726+
}

0 commit comments

Comments
 (0)