fix(prqlc): handle complex append cases#5366
Conversation
Forcing selection in requirements meant compute columns with dependencies sometimes appeared in SELECT statement where those dependencies were not available (not selected in subsequent CTE). Selection status is likely to be properly set before-hand anyway.
Chain of unselected compute columns added a lot of useless requirements so skipping them improve results and avoid edge cases in positional mapping.
…atching Previous version of positional mapping worked on all columns. Now, we stick to selected ones to make sure we don't have extra columns. Also, there was a mistake in positional mapping matching, which meant e.g. RIId 1 was associated with RIId 2, and therefore absurd mappings.
|
Hi @max-sixty! You may want to re-trigger the checks. |
|
Hi @max-sixty! The PR is ready for review. Please, let me know if there is anything I can do to help! |
|
sorry for the delay! I replaced some of the more verbose integration tests with some inline tests — are those OK? (for transparency, I had LLM help; I always footnote this when I get help) |
Fanaen
left a comment
There was a problem hiding this comment.
No worries, thank you very much for looking into it!
They are! The noisiness is real.
The integration tests yielded different results for some dialects and those changes are somewhat related so I added them as suggestion.
|
|
||
| #[test] | ||
| fn test_append_select_compute() { | ||
| // Test for handling complex append with select and compute operations | ||
| assert_snapshot!(compile(r###" | ||
| from invoices | ||
| derive total = case [total < 10 => total * 2, true => total] | ||
| select { customer_id, invoice_id, total } | ||
| take 5 | ||
| append ( | ||
| from invoice_items | ||
| derive unit_price = case [unit_price < 1 => unit_price * 2, true => unit_price] | ||
| select { invoice_line_id, invoice_id, unit_price } | ||
| take 5 | ||
| ) | ||
| select { a = customer_id * 2, b = math.round 1 (invoice_id * total) } | ||
| "###).unwrap(), @r" | ||
| WITH table_1 AS ( | ||
| SELECT | ||
| * | ||
| FROM | ||
| ( | ||
| SELECT | ||
| invoice_id, | ||
| CASE | ||
| WHEN total < 10 THEN total * 2 | ||
| ELSE total | ||
| END AS _expr_0, | ||
| customer_id | ||
| FROM | ||
| invoices | ||
| LIMIT | ||
| 5 | ||
| ) AS table_3 | ||
| UNION | ||
| ALL | ||
| SELECT | ||
| * | ||
| FROM | ||
| ( | ||
| SELECT | ||
| invoice_id, | ||
| CASE | ||
| WHEN unit_price < 1 THEN unit_price * 2 | ||
| ELSE unit_price | ||
| END AS unit_price, | ||
| invoice_line_id | ||
| FROM | ||
| invoice_items | ||
| LIMIT | ||
| 5 | ||
| ) AS table_4 | ||
| ) | ||
| SELECT | ||
| customer_id * 2 AS a, | ||
| ROUND(invoice_id * _expr_0, 1) AS b | ||
| FROM | ||
| table_1 | ||
| "); | ||
| } |
There was a problem hiding this comment.
Included in append_select_compute.prql.
| #[test] | |
| fn test_append_select_compute() { | |
| // Test for handling complex append with select and compute operations | |
| assert_snapshot!(compile(r###" | |
| from invoices | |
| derive total = case [total < 10 => total * 2, true => total] | |
| select { customer_id, invoice_id, total } | |
| take 5 | |
| append ( | |
| from invoice_items | |
| derive unit_price = case [unit_price < 1 => unit_price * 2, true => unit_price] | |
| select { invoice_line_id, invoice_id, unit_price } | |
| take 5 | |
| ) | |
| select { a = customer_id * 2, b = math.round 1 (invoice_id * total) } | |
| "###).unwrap(), @r" | |
| WITH table_1 AS ( | |
| SELECT | |
| * | |
| FROM | |
| ( | |
| SELECT | |
| invoice_id, | |
| CASE | |
| WHEN total < 10 THEN total * 2 | |
| ELSE total | |
| END AS _expr_0, | |
| customer_id | |
| FROM | |
| invoices | |
| LIMIT | |
| 5 | |
| ) AS table_3 | |
| UNION | |
| ALL | |
| SELECT | |
| * | |
| FROM | |
| ( | |
| SELECT | |
| invoice_id, | |
| CASE | |
| WHEN unit_price < 1 THEN unit_price * 2 | |
| ELSE unit_price | |
| END AS unit_price, | |
| invoice_line_id | |
| FROM | |
| invoice_items | |
| LIMIT | |
| 5 | |
| ) AS table_4 | |
| ) | |
| SELECT | |
| customer_id * 2 AS a, | |
| ROUND(invoice_id * _expr_0, 1) AS b | |
| FROM | |
| table_1 | |
| "); | |
| } |
| GROUP BY | ||
| billing_country | ||
| "); | ||
| } |
There was a problem hiding this comment.
Highlighted feature: distinct on support.
| } | |
| } | |
| #[test] | |
| fn test_distinct_on_sort_on_compute_postgres() { | |
| // Test for handling distinct on with sorting on computed columns | |
| assert_snapshot!(compile(r###"prql target:sql.postgres | |
| from invoices | |
| derive code = case [customer_id < 10 => billing_postal_code, true => null] | |
| group {customer_id, billing_city, billing_country} ( | |
| sort {-this.code} | |
| take 1 | |
| ) | |
| filter (customer_id | in [4]) | |
| group {billing_country} (aggregate {total = math.round 2 (sum total)}) | |
| "###).unwrap(), @r" | |
| WITH table_1 AS ( | |
| SELECT | |
| billing_country, | |
| total, | |
| customer_id, | |
| billing_city, | |
| CASE | |
| WHEN customer_id < 10 THEN billing_postal_code | |
| ELSE NULL | |
| END AS _expr_0, | |
| billing_postal_code | |
| FROM | |
| invoices | |
| ), | |
| table_0 AS ( | |
| SELECT | |
| DISTINCT ON (customer_id, billing_city, billing_country) billing_country, | |
| total, | |
| customer_id, | |
| billing_city, | |
| _expr_0 | |
| FROM | |
| table_1 | |
| ORDER BY | |
| customer_id, | |
| billing_city, | |
| billing_country, | |
| _expr_0 DESC | |
| ) | |
| SELECT | |
| billing_country, | |
| ROUND((COALESCE(SUM(total), 0))::numeric, 2) AS total | |
| FROM | |
| table_0 | |
| WHERE | |
| customer_id IN (4) | |
| GROUP BY | |
| billing_country | |
| "); | |
| } |
| total | ||
| "); | ||
| } | ||
|
|
There was a problem hiding this comment.
Highlighted feature: UNION ALL with sub-queries instead of CTEs.
| #[test] | |
| fn test_append_select_multiple_postgres() { | |
| // Test for handling multiple append operations with grouping and aggregation | |
| assert_snapshot!(compile(r###"prql target:sql.postgres | |
| from invoices | |
| select { customer_id, invoice_id, total, useless1, useless2 } | |
| take 5 | |
| append ( | |
| from employees | |
| select { employee_id, employee_id + 1, reports_to, useless3, useless4 } | |
| take 5 | |
| ) | |
| group { customer_id } (aggregate { invoice_id = math.round 1 (sum invoice_id), total = math.round 1 (sum total), useless1 = sum useless1 }) | |
| append ( | |
| from invoice_items | |
| select { invoice_id, invoice_line_id, 0, useless5 } | |
| take 5 | |
| ) | |
| sort { +invoice_id, +total } | |
| select { total, invoice_id } | |
| "###).unwrap(), @r" | |
| WITH table_3 AS ( | |
| ( | |
| SELECT | |
| customer_id, | |
| total, | |
| invoice_id | |
| FROM | |
| invoices | |
| LIMIT | |
| 5 | |
| ) | |
| UNION | |
| ALL ( | |
| SELECT | |
| employee_id, | |
| reports_to, | |
| employee_id + 1 | |
| FROM | |
| employees | |
| LIMIT | |
| 5 | |
| ) | |
| ), table_2 AS ( | |
| SELECT | |
| ROUND((COALESCE(SUM(total), 0))::numeric, 1) AS total, | |
| ROUND((COALESCE(SUM(invoice_id), 0))::numeric, 1) AS invoice_id | |
| FROM | |
| table_3 | |
| GROUP BY | |
| customer_id | |
| UNION | |
| ALL ( | |
| SELECT | |
| invoice_id, | |
| invoice_line_id | |
| FROM | |
| invoice_items | |
| LIMIT | |
| 5 | |
| ) | |
| ) | |
| SELECT | |
| total, | |
| invoice_id | |
| FROM | |
| table_2 | |
| ORDER BY | |
| invoice_id, | |
| total | |
| "); | |
| } |
|
thanks! yes if the results by dialect are different and the differences are important, the first port of call is to do normal snapshot tests with a couple of dialects (fine to add those in another PR if we really want) thanks! I will be faster at reviewing next time! |
Another case of #5354 but more complex this time, and with extra columns in
UNION.I've encountered a really scary query with multiple occurences of all
PRQLverbs short ofloop. Obviously, it triggered several edge cases, some of which, in my own code 😄So, this PR harden the positional mapping and fine-tune the requirement logic without changing the surounding architecture.
test-rust,test-rust-coverageandtest-rust-external-dbson most commits.