fix push_down_projection push redundant columns.#4487
Conversation
|
cc @liukun4515 |
7d3c02c to
d97d3e7
Compare
| #[tokio::test] | ||
| #[ignore] | ||
| // https://github.com/apache/arrow-datafusion/issues/3635 | ||
| // Test issue: https://github.com/apache/arrow-datafusion/issues/3635 | ||
| async fn multiple_or_predicates() -> Result<()> { |
There was a problem hiding this comment.
Please notice this Test
push_down_projection push redundant columns.push_down_projection push duplicated columns.
push_down_projection push duplicated columns.push_down_projection push redundant columns.
alamb
left a comment
There was a problem hiding this comment.
Thank you @jackwener -- this is epically beautiful -- 3 tickets closed with a very small number of lines changed ❤️
| " ProjectionExec: expr=[c1@0 as c2]", | ||
| " RepartitionExec: partitioning=RoundRobinBatch(9000)", | ||
| " CsvExec: files={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, has_header=true, limit=None, projection=[c1, c2]", | ||
| " CsvExec: files={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, has_header=true, limit=None, projection=[c1]", |
|
|
||
| let mut new_expr = Vec::new(); | ||
| let mut new_fields = Vec::new(); | ||
| let mut new_required_columns = HashSet::new(); |
There was a problem hiding this comment.
Since this is shadowing a variable in the outer scope, I wonder if it would make sense to comment why we are starting with an empty set in this case rather than the pushed down columns?
There was a problem hiding this comment.
👍Yes, I have added comment to explain it.
| " TableScan: lineitem projection=[l_partkey, l_quantity] [l_partkey:Int64, l_quantity:Decimal128(15, 2)]", | ||
| " TableScan: part projection=[p_partkey, p_brand, p_size] [p_partkey:Int64, p_brand:Utf8, p_size:Int32]", | ||
| " Projection: lineitem.l_partkey, lineitem.l_quantity [l_partkey:Int64, l_quantity:Decimal128(15, 2)]", | ||
| " Filter: (lineitem.l_quantity >= Decimal128(Some(100),15,2)Decimal128(Some(100),15,2)lineitem.l_quantity AND lineitem.l_quantity <= Decimal128(Some(1100),15,2)Decimal128(Some(1100),15,2)lineitem.l_quantity OR lineitem.l_quantity >= Decimal128(Some(1000),15,2)Decimal128(Some(1000),15,2)lineitem.l_quantity AND lineitem.l_quantity <= Decimal128(Some(2000),15,2)Decimal128(Some(2000),15,2)lineitem.l_quantity OR lineitem.l_quantity >= Decimal128(Some(2000),15,2)Decimal128(Some(2000),15,2)lineitem.l_quantity AND lineitem.l_quantity <= Decimal128(Some(3000),15,2)Decimal128(Some(3000),15,2)lineitem.l_quantity) AND (lineitem.l_quantity >= Decimal128(Some(100),15,2) OR lineitem.l_quantity >= Decimal128(Some(1000),15,2)lineitem.l_quantity >= Decimal128(Some(1000),15,2)Decimal128(Some(1000),15,2)lineitem.l_quantitylineitem.l_quantity >= Decimal128(Some(100),15,2)Decimal128(Some(100),15,2)lineitem.l_quantity OR lineitem.l_quantity >= Decimal128(Some(2000),15,2)Decimal128(Some(2000),15,2)lineitem.l_quantity) AND (lineitem.l_quantity >= Decimal128(Some(100),15,2) OR lineitem.l_quantity >= Decimal128(Some(1000),15,2)lineitem.l_quantity >= Decimal128(Some(1000),15,2)Decimal128(Some(1000),15,2)lineitem.l_quantitylineitem.l_quantity >= Decimal128(Some(100),15,2)Decimal128(Some(100),15,2)lineitem.l_quantity OR lineitem.l_quantity <= Decimal128(Some(3000),15,2)Decimal128(Some(3000),15,2)lineitem.l_quantity) AND (lineitem.l_quantity >= Decimal128(Some(100),15,2) OR lineitem.l_quantity <= Decimal128(Some(2000),15,2)lineitem.l_quantity <= Decimal128(Some(2000),15,2)Decimal128(Some(2000),15,2)lineitem.l_quantitylineitem.l_quantity >= Decimal128(Some(100),15,2)Decimal128(Some(100),15,2)lineitem.l_quantity OR lineitem.l_quantity >= Decimal128(Some(2000),15,2)Decimal128(Some(2000),15,2)lineitem.l_quantity) AND (lineitem.l_quantity >= Decimal128(Some(100),15,2) OR lineitem.l_quantity <= Decimal128(Some(2000),15,2)lineitem.l_quantity <= Decimal128(Some(2000),15,2)Decimal128(Some(2000),15,2)lineitem.l_quantitylineitem.l_quantity >= Decimal128(Some(100),15,2)Decimal128(Some(100),15,2)lineitem.l_quantity OR lineitem.l_quantity <= Decimal128(Some(3000),15,2)Decimal128(Some(3000),15,2)lineitem.l_quantity) AND (lineitem.l_quantity <= Decimal128(Some(1100),15,2) OR lineitem.l_quantity >= Decimal128(Some(1000),15,2)lineitem.l_quantity >= Decimal128(Some(1000),15,2)Decimal128(Some(1000),15,2)lineitem.l_quantitylineitem.l_quantity <= Decimal128(Some(1100),15,2)Decimal128(Some(1100),15,2)lineitem.l_quantity OR lineitem.l_quantity >= Decimal128(Some(2000),15,2)Decimal128(Some(2000),15,2)lineitem.l_quantity) AND (lineitem.l_quantity <= Decimal128(Some(1100),15,2) OR lineitem.l_quantity >= Decimal128(Some(1000),15,2)lineitem.l_quantity >= Decimal128(Some(1000),15,2)Decimal128(Some(1000),15,2)lineitem.l_quantitylineitem.l_quantity <= Decimal128(Some(1100),15,2)Decimal128(Some(1100),15,2)lineitem.l_quantity OR lineitem.l_quantity <= Decimal128(Some(3000),15,2)Decimal128(Some(3000),15,2)lineitem.l_quantity) AND (lineitem.l_quantity <= Decimal128(Some(1100),15,2) OR lineitem.l_quantity <= Decimal128(Some(2000),15,2)lineitem.l_quantity <= Decimal128(Some(2000),15,2)Decimal128(Some(2000),15,2)lineitem.l_quantitylineitem.l_quantity <= Decimal128(Some(1100),15,2)Decimal128(Some(1100),15,2)lineitem.l_quantity OR lineitem.l_quantity >= Decimal128(Some(2000),15,2)Decimal128(Some(2000),15,2)lineitem.l_quantity) AND (lineitem.l_quantity <= Decimal128(Some(1100),15,2) OR lineitem.l_quantity <= Decimal128(Some(2000),15,2)lineitem.l_quantity <= Decimal128(Some(2000),15,2)Decimal128(Some(2000),15,2)lineitem.l_quantitylineitem.l_quantity <= Decimal128(Some(1100),15,2)Decimal128(Some(1100),15,2)lineitem.l_quantity OR lineitem.l_quantity <= Decimal128(Some(3000),15,2)Decimal128(Some(3000),15,2)lineitem.l_quantity) [lineitem.l_quantity <= Decimal128(Some(1100),15,2) OR lineitem.l_quantity <= Decimal128(Some(2000),15,2)lineitem.l_quantity <= Decimal128(Some(2000),15,2)Decimal128(Some(2000),15,2)lineitem.l_quantitylineitem.l_quantity <= Decimal128(Some(1100),15,2)Decimal128(Some(1100),15,2)lineitem.l_quantity:Boolean;N, lineitem.l_quantity <= Decimal128(Some(1100),15,2) OR lineitem.l_quantity >= Decimal128(Some(1000),15,2)lineitem.l_quantity >= Decimal128(Some(1000),15,2)Decimal128(Some(1000),15,2)lineitem.l_quantitylineitem.l_quantity <= Decimal128(Some(1100),15,2)Decimal128(Some(1100),15,2)lineitem.l_quantity:Boolean;N, lineitem.l_quantity <= Decimal128(Some(1100),15,2)Decimal128(Some(1100),15,2)lineitem.l_quantity:Boolean;N, lineitem.l_quantity <= Decimal128(Some(2000),15,2)Decimal128(Some(2000),15,2)lineitem.l_quantity:Boolean;N, lineitem.l_quantity <= Decimal128(Some(3000),15,2)Decimal128(Some(3000),15,2)lineitem.l_quantity:Boolean;N, lineitem.l_quantity >= Decimal128(Some(100),15,2) OR lineitem.l_quantity <= Decimal128(Some(2000),15,2)lineitem.l_quantity <= Decimal128(Some(2000),15,2)Decimal128(Some(2000),15,2)lineitem.l_quantitylineitem.l_quantity >= Decimal128(Some(100),15,2)Decimal128(Some(100),15,2)lineitem.l_quantity:Boolean;N, lineitem.l_quantity >= Decimal128(Some(100),15,2) OR lineitem.l_quantity >= Decimal128(Some(1000),15,2)lineitem.l_quantity >= Decimal128(Some(1000),15,2)Decimal128(Some(1000),15,2)lineitem.l_quantitylineitem.l_quantity >= Decimal128(Some(100),15,2)Decimal128(Some(100),15,2)lineitem.l_quantity:Boolean;N, lineitem.l_quantity >= Decimal128(Some(100),15,2)Decimal128(Some(100),15,2)lineitem.l_quantity:Boolean;N, lineitem.l_quantity >= Decimal128(Some(1000),15,2)Decimal128(Some(1000),15,2)lineitem.l_quantity:Boolean;N, lineitem.l_quantity >= Decimal128(Some(2000),15,2)Decimal128(Some(2000),15,2)lineitem.l_quantity:Boolean;N, l_partkey:Int64, l_quantity:Decimal128(15, 2)]", |
There was a problem hiding this comment.
this filter looks pretty nasty -- perhaps it is related to #3903 🤔
On the other hand, there is a lot of pushed down filtering now 👍
cc @Ted-Jiang
There was a problem hiding this comment.
yes, seems not readable, maybe we can try to keep both input and result only show the input one 🤔
|
(there are several conflicts in this PR that need to be solved prior to merge) |
a37e207 to
5a238cd
Compare
|
Resolved conflict and added comment. Thanks @alamb review |
5a238cd to
de6da48
Compare
Sorry for the delay in reviewing -- the PRs are flying fast and furious these days |
|
Benchmark runs are scheduled for baseline = 740a4fa and contender = d9a47d6. d9a47d6 is a master commit associated with this PR. Results will be available as each benchmark for each run completes. |
Which issue does this PR close?
Closes #4486
Closes #4174
Closes #3635.
Rationale for this change
What changes are included in this PR?
Are these changes tested?
Are there any user-facing changes?