Skip to content

Commit 0d1cd55

Browse files
authored
remove the type coercion in the simplify_expressions rule (#3657)
1 parent 29b8bbd commit 0d1cd55

2 files changed

Lines changed: 18 additions & 44 deletions

File tree

datafusion/optimizer/src/simplify_expressions.rs

Lines changed: 16 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
//! Simplify expressions optimizer rule
1919
2020
use crate::expr_simplifier::ExprSimplifiable;
21-
use crate::type_coercion::TypeCoercionRewriter;
2221
use crate::{expr_simplifier::SimplifyInfo, OptimizerConfig, OptimizerRule};
2322
use arrow::array::new_null_array;
2423
use arrow::datatypes::{DataType, Field, Schema};
@@ -33,7 +32,6 @@ use datafusion_expr::{
3332
BuiltinScalarFunction, ColumnarValue, Expr, ExprSchemable, Operator, Volatility,
3433
};
3534
use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps};
36-
use std::sync::Arc;
3735

3836
/// Provides simplification information based on schema and properties
3937
pub(crate) struct SimplifyContext<'a, 'b> {
@@ -377,9 +375,6 @@ pub struct ConstEvaluator<'a> {
377375
execution_props: &'a ExecutionProps,
378376
input_schema: DFSchema,
379377
input_batch: RecordBatch,
380-
// Needed until we ensure type coercion is done before any optimizations
381-
// https://github.com/apache/arrow-datafusion/issues/3557
382-
type_coercion_helper: TypeCoercionRewriter,
383378
}
384379

385380
impl<'a> ExprRewriter for ConstEvaluator<'a> {
@@ -434,14 +429,12 @@ impl<'a> ConstEvaluator<'a> {
434429
// Need a single "input" row to produce a single output row
435430
let col = new_null_array(&DataType::Null, 1);
436431
let input_batch = RecordBatch::try_new(std::sync::Arc::new(schema), vec![col])?;
437-
let type_coercion = TypeCoercionRewriter::new(Arc::new(input_schema.clone()));
438432

439433
Ok(Self {
440434
can_evaluate: vec![],
441435
execution_props,
442436
input_schema,
443437
input_batch,
444-
type_coercion_helper: type_coercion,
445438
})
446439
}
447440

@@ -510,15 +503,6 @@ impl<'a> ConstEvaluator<'a> {
510503
return Ok(s);
511504
}
512505

513-
// TODO: https://github.com/apache/arrow-datafusion/issues/3582
514-
// TODO: https://github.com/apache/arrow-datafusion/issues/3556
515-
// Do the type coercion in the simplify expression
516-
// this is just a work around for removing the type coercion in the physical phase
517-
// we need to support eval the result without the physical expr.
518-
// If we don't do the type coercion, we will meet the
519-
// https://github.com/apache/arrow-datafusion/issues/3556 when create the physical expr
520-
// to try evaluate the result.
521-
let expr = expr.rewrite(&mut self.type_coercion_helper)?;
522506
let phys_expr = create_physical_expr(
523507
&expr,
524508
&self.input_schema,
@@ -1223,12 +1207,6 @@ mod tests {
12231207
assert_eq!(simplify(expr_eq), lit(true));
12241208
}
12251209

1226-
#[test]
1227-
fn test_simplify_with_type_coercion() {
1228-
let expr_plus = binary_expr(lit(1_i32), Operator::Plus, lit(1_i64));
1229-
assert_eq!(simplify(expr_plus), lit(2_i64));
1230-
}
1231-
12321210
#[test]
12331211
fn test_simplify_concat_ws_null_separator() {
12341212
fn build_concat_ws_expr(args: &[Expr]) -> Expr {
@@ -1351,13 +1329,13 @@ mod tests {
13511329
// now() --> ts
13521330
test_evaluate_with_start_time(now_expr(), lit_timestamp_nano(ts_nanos), &time);
13531331

1354-
// CAST(now() as int64) + 100 --> ts + 100
1355-
let expr = cast_to_int64_expr(now_expr()) + lit(100);
1332+
// CAST(now() as int64) + 100_i64 --> ts + 100_i64
1333+
let expr = cast_to_int64_expr(now_expr()) + lit(100_i64);
13561334
test_evaluate_with_start_time(expr, lit(ts_nanos + 100), &time);
13571335

1358-
// CAST(now() as int64) < cast(to_timestamp(...) as int64) + 50000 ---> true
1336+
// CAST(now() as int64) < cast(to_timestamp(...) as int64) + 50000_i64 ---> true
13591337
let expr = cast_to_int64_expr(now_expr())
1360-
.lt(cast_to_int64_expr(to_timestamp_expr(ts_string)) + lit(50000));
1338+
.lt(cast_to_int64_expr(to_timestamp_expr(ts_string)) + lit(50000i64));
13611339
test_evaluate_with_start_time(expr, lit(true), &time);
13621340
}
13631341

@@ -2208,19 +2186,21 @@ mod tests {
22082186
let ts_string = "2020-09-08T12:05:00+00:00";
22092187
let time = chrono::Utc.timestamp_nanos(1599566400000000000i64);
22102188

2211-
// cast(now() as int) < cast(to_timestamp(...) as int) + 5000000000
2212-
let plan = LogicalPlanBuilder::from(table_scan)
2213-
.filter(
2214-
cast_to_int64_expr(now_expr())
2215-
.lt(cast_to_int64_expr(to_timestamp_expr(ts_string)) + lit(50000)),
2216-
)
2217-
.unwrap()
2218-
.build()
2219-
.unwrap();
2189+
// cast(now() as int) < cast(to_timestamp(...) as int) + 50000_i64
2190+
let plan =
2191+
LogicalPlanBuilder::from(table_scan)
2192+
.filter(
2193+
cast_to_int64_expr(now_expr())
2194+
.lt(cast_to_int64_expr(to_timestamp_expr(ts_string))
2195+
+ lit(50000_i64)),
2196+
)
2197+
.unwrap()
2198+
.build()
2199+
.unwrap();
22202200

22212201
// Note that constant folder runs and folds the entire
22222202
// expression down to a single constant (true)
2223-
let expected = "Filter: Boolean(true) AS now() < totimestamp(Utf8(\"2020-09-08T12:05:00+00:00\")) + Int32(50000)\
2203+
let expected = "Filter: Boolean(true) AS now() < totimestamp(Utf8(\"2020-09-08T12:05:00+00:00\")) + Int64(50000)\
22242204
\n TableScan: test";
22252205
let actual = get_optimized_plan_formatted(&plan, &time);
22262206

datafusion/optimizer/src/type_coercion.rs

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -117,16 +117,10 @@ fn optimize_internal(
117117
from_plan(plan, &new_expr, &new_inputs)
118118
}
119119

120-
pub(crate) struct TypeCoercionRewriter {
120+
struct TypeCoercionRewriter {
121121
pub(crate) schema: DFSchemaRef,
122122
}
123123

124-
impl TypeCoercionRewriter {
125-
pub(crate) fn new(schema: DFSchemaRef) -> TypeCoercionRewriter {
126-
TypeCoercionRewriter { schema }
127-
}
128-
}
129-
130124
impl ExprRewriter for TypeCoercionRewriter {
131125
fn pre_visit(&mut self, _expr: &Expr) -> Result<RewriteRecursion> {
132126
Ok(RewriteRecursion::Continue)
@@ -796,7 +790,7 @@ mod test {
796790
)
797791
.unwrap(),
798792
);
799-
let mut rewriter = TypeCoercionRewriter::new(schema);
793+
let mut rewriter = TypeCoercionRewriter { schema };
800794
let expr = is_true(lit(12i32).eq(lit(13i64)));
801795
let expected = is_true(
802796
cast(lit(ScalarValue::Int32(Some(12))), DataType::Int64)

0 commit comments

Comments
 (0)