diff --git a/prqlc/prqlc/src/ir/pl/lineage.rs b/prqlc/prqlc/src/ir/pl/lineage.rs index 21a64ebfb3d4..15a82c2545ae 100644 --- a/prqlc/prqlc/src/ir/pl/lineage.rs +++ b/prqlc/prqlc/src/ir/pl/lineage.rs @@ -4,7 +4,7 @@ use std::fmt::{Debug, Display, Formatter}; use enum_as_inner::EnumAsInner; use itertools::{Itertools, Position}; use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Serialize, Serializer}; use super::Ident; @@ -48,10 +48,23 @@ pub enum LineageColumn { /// All columns (including unknown ones) from an input (i.e. `foo_table.*`) All { input_id: usize, + + #[serde(serialize_with = "sorted_set")] except: HashSet, }, } +pub fn sorted_set( + value: &HashSet, + serializer: S, +) -> Result { + value + .iter() + .sorted() + .collect::>() + .serialize(serializer) +} + impl Display for Lineage { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { display_lineage(self, f, false) diff --git a/prqlc/prqlc/src/sql/pq/anchor.rs b/prqlc/prqlc/src/sql/pq/anchor.rs index 0e424b7389e7..5b41a636d212 100644 --- a/prqlc/prqlc/src/sql/pq/anchor.rs +++ b/prqlc/prqlc/src/sql/pq/anchor.rs @@ -75,7 +75,7 @@ pub(super) fn split_off_back( return (None, Vec::new()); } - let mapping_before = compute_positional_mappings(&pipeline); + let mapping_before = compute_positional_mappings(&pipeline, None); log::debug!("traversing pipeline to obtain columns: {output:?}"); @@ -100,7 +100,7 @@ pub(super) fn split_off_back( } // anchor and record all requirements - let required = get_requirements(&transform, &following_transforms); + let required = get_requirements(&transform, &following_transforms, &inputs_required); log::debug!(".. transform {} requires {required:?}", transform.as_str(),); inputs_required = inputs_required.append(required.clone()); @@ -188,10 +188,12 @@ pub(super) fn split_off_back( curr_pipeline_rev.reverse(); // This will compare columns for order sensitive transform and correct it in subsequent relation. - let mapping_after = compute_positional_mappings(&curr_pipeline_rev); - for (before, after) in mapping_before.iter().zip(mapping_after.iter()) { - ctx.positional_mapping - .compute_and_store_mapping(before, after); + let mapping_after = compute_positional_mappings(&curr_pipeline_rev, Some(&inputs_required)); + for (riid, after) in mapping_after { + if let Some((_, before)) = mapping_before.iter().find(|(r, _)| &riid == r) { + ctx.positional_mapping + .compute_and_store_mapping(before, &after, &riid); + } } (remaining_pipeline, curr_pipeline_rev) @@ -483,6 +485,14 @@ impl Requirements { } self } + + pub fn is_selected(&self, id: &CId) -> bool { + self.0.iter().any(|r| r.selected && &r.col == id) + } + + pub fn is_required(&self, id: &CId) -> bool { + self.0.iter().any(|r| &r.col == id) + } } impl std::fmt::Debug for Requirement { @@ -496,19 +506,14 @@ impl std::fmt::Debug for Requirement { pub(super) fn get_requirements( transform: &SqlTransform, following: &HashSet, + previous_requirements: &Requirements, ) -> Requirements { use SqlTransform::Super; match transform { - Super(Transform::Aggregate { partition, compute }) => { - let partition_requirements = Requirements::from_cids(partition.iter()); - let compute_requirements = - Requirements::from_cids(compute.iter()).allow_up_to(Complexity::Aggregation); + Super(Transform::Aggregate { partition, .. }) => Requirements::from_cids(partition.iter()), - partition_requirements.append(compute_requirements) - } - - Super(Transform::Compute(compute)) => { + Super(Transform::Compute(compute)) if previous_requirements.is_required(&compute.id) => { let requirements = Requirements::from_expr(&compute.expr).allow_up_to( match infer_complexity(compute) { // plain expressions can be included in anything less complex than Aggregation @@ -552,9 +557,11 @@ pub(super) fn get_requirements( .should_select(true) } + SqlTransform::Sort(sorts) if !following.contains("Aggregate") => { + Requirements::from_cids(sorts.iter().map(|s| &s.column)) + } + SqlTransform::DistinctOn(partition) => Requirements::from_cids(partition.iter()) - // Partition columns must be selected in order to push compute columns down CTE. - .should_select(true) // Since there is aggregation anyway, columns can have any complexity .allow_up_to(Complexity::highest()), diff --git a/prqlc/prqlc/src/sql/pq/positional_mapping.rs b/prqlc/prqlc/src/sql/pq/positional_mapping.rs index e58ada0eee8f..b1e087531f41 100644 --- a/prqlc/prqlc/src/sql/pq/positional_mapping.rs +++ b/prqlc/prqlc/src/sql/pq/positional_mapping.rs @@ -1,8 +1,11 @@ use std::collections::HashMap; use crate::{ - ir::rq::{CId, Transform}, - sql::{pq::context::RIId, pq_ast::SqlTransform}, + ir::rq::{CId, Compute, Transform}, + sql::{ + pq::{anchor::Requirements, context::RIId}, + pq_ast::SqlTransform, + }, }; /// State required to properly handle the transforms that are order sensitive like `Union`. @@ -16,29 +19,24 @@ impl PositionalMapper { /// Remember the mapping for this `RIId` to know what to apply for `apply_positional_mapping`. pub(crate) fn activate_mapping(&mut self, riid: &RIId) { self.active_positional_mapping = self.relation_positional_mapping.remove(riid); + log::trace!( + "loading remapping for {riid:?}: {:?}", + self.active_positional_mapping + ); } /// Reorder or remove columns to make `Union` happy. pub(crate) fn apply_active_mapping(&mut self, output: Vec) -> Vec { if let Some(mapping) = &self.active_positional_mapping { let new_output = mapping.iter().map(|idx| output[*idx]).collect(); - log::debug!("remapping {output:?} to {new_output:?}"); + log::debug!("remapping {output:?} to {new_output:?} via {mapping:?}"); new_output } else { output } } - pub fn compute_and_store_mapping( - &mut self, - (_, before): &(RIId, Vec), - (riid, after): &(RIId, Vec), - ) { - if after == before { - log::trace!(".. relation {riid:?} is already correctly mapped: {after:?}"); - return; - } - + pub fn compute_and_store_mapping(&mut self, before: &[CId], after: &[CId], riid: &RIId) { let mapping: Vec<_> = after .iter() .flat_map(|a| match before.iter().position(|b| b == a) { @@ -60,28 +58,37 @@ impl PositionalMapper { /// Outputs the columns required for position sensitive transforms in the pipeline. pub fn compute_positional_mappings( pipeline: &[SqlTransform], + requirements: Option<&Requirements>, ) -> Vec<(RIId, Vec)> { let mut constraints = vec![]; let mut columns = vec![]; log::trace!("traversing pipeline to obtain positional mapping:"); + // Only process selected columns to avoid surnumerary one + let add_columns = |columns: &mut Vec, cids: &[CId]| { + if let Some(requirements) = requirements { + columns.extend(cids.iter().filter(|cid| requirements.is_selected(cid))); + } else { + columns.extend_from_slice(cids); + } + }; + for transform in pipeline { match transform { SqlTransform::Super(s) => match s { - Transform::Compute(compute) => { - if !columns.contains(&compute.id) { - columns.push(compute.id); + Transform::Compute(Compute { id, .. }) => { + if !columns.contains(id) { + add_columns(&mut columns, &[*id]); } } Transform::Select(cids) => { columns.clear(); - columns.extend_from_slice(cids.as_slice()); + add_columns(&mut columns, cids); } - Transform::Aggregate { partition, compute } => { + Transform::Aggregate { compute, .. } => { columns.clear(); - columns.extend_from_slice(partition.as_slice()); - columns.extend_from_slice(compute.as_slice()); + add_columns(&mut columns, compute); } _ => (), }, @@ -89,10 +96,17 @@ pub fn compute_positional_mappings( | SqlTransform::Intersect { bottom, .. } | SqlTransform::Union { bottom, .. } => { constraints.push((*bottom, columns.clone())); + log::trace!( + ".. mapping for {}/{bottom:?}: {columns:?}", + transform.as_str() + ); } _ => (), } - log::trace!(".. columns after {}: {columns:?}", transform.as_str()); + log::trace!( + ".. selected columns after {}: {columns:?}", + transform.as_str() + ); } constraints diff --git a/prqlc/prqlc/tests/integration/queries/append_select_compute.prql b/prqlc/prqlc/tests/integration/queries/append_select_compute.prql index ee64ba181c32..9ea6a89f4862 100644 --- a/prqlc/prqlc/tests/integration/queries/append_select_compute.prql +++ b/prqlc/prqlc/tests/integration/queries/append_select_compute.prql @@ -1,8 +1,10 @@ from invoices +derive total = case [total < 10 => total * 2, true => total] select { customer_id, invoice_id, total } take 5 append ( from invoice_items + derive unit_price = case [unit_price < 1 => unit_price * 2, true => unit_price] select { invoice_line_id, invoice_id, unit_price } take 5 ) diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__compile__append_select_compute.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__compile__append_select_compute.snap index 2e94e3fc0458..63ecf95c65c9 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__compile__append_select_compute.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__compile__append_select_compute.snap @@ -1,6 +1,6 @@ --- source: prqlc/prqlc/tests/integration/queries.rs -expression: "from invoices\nselect { customer_id, invoice_id, total }\ntake 5\nappend (\n from invoice_items\n select { invoice_line_id, invoice_id, unit_price }\n take 5\n)\nselect { a = customer_id * 2, b = math.round 1 (invoice_id * total) }\n" +expression: "from invoices\nderive total = case [total < 10 => total * 2, true => total]\nselect { customer_id, invoice_id, total }\ntake 5\nappend (\n from invoice_items\n derive unit_price = case [unit_price < 1 => unit_price * 2, true => unit_price]\n select { invoice_line_id, invoice_id, unit_price }\n take 5\n)\nselect { a = customer_id * 2, b = math.round 1 (invoice_id * total) }\n" input_file: prqlc/prqlc/tests/integration/queries/append_select_compute.prql --- WITH table_1 AS ( @@ -10,7 +10,10 @@ WITH table_1 AS ( ( SELECT invoice_id, - total, + CASE + WHEN total < 10 THEN total * 2 + ELSE total + END AS _expr_0, customer_id FROM invoices @@ -25,7 +28,10 @@ WITH table_1 AS ( ( SELECT invoice_id, - unit_price, + CASE + WHEN unit_price < 1 THEN unit_price * 2 + ELSE unit_price + END AS unit_price, invoice_line_id FROM invoice_items @@ -35,6 +41,6 @@ WITH table_1 AS ( ) SELECT customer_id * 2 AS a, - ROUND(invoice_id * total, 1) AS b + ROUND(invoice_id * _expr_0, 1) AS b FROM table_1 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__compileall__append_select_compute.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__compileall__append_select_compute.snap index a49972b431b3..7fe1d3938ac8 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__compileall__append_select_compute.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__compileall__append_select_compute.snap @@ -1,12 +1,12 @@ --- source: prqlc/prqlc/tests/integration/queries.rs -expression: "from invoices\nselect { customer_id, invoice_id, total }\ntake 5\nappend (\n from invoice_items\n select { invoice_line_id, invoice_id, unit_price }\n take 5\n)\nselect { a = customer_id * 2, b = math.round 1 (invoice_id * total) }\n" +expression: "from invoices\nderive total = case [total < 10 => total * 2, true => total]\nselect { customer_id, invoice_id, total }\ntake 5\nappend (\n from invoice_items\n derive unit_price = case [unit_price < 1 => unit_price * 2, true => unit_price]\n select { invoice_line_id, invoice_id, unit_price }\n take 5\n)\nselect { a = customer_id * 2, b = math.round 1 (invoice_id * total) }\n" input_file: prqlc/prqlc/tests/integration/queries/append_select_compute.prql --- --- generic +++ glaredb -@@ -23,13 +23,13 @@ - unit_price, +@@ -29,13 +29,13 @@ + END AS unit_price, invoice_line_id FROM invoice_items @@ -16,15 +16,15 @@ input_file: prqlc/prqlc/tests/integration/queries/append_select_compute.prql ) SELECT customer_id * 2 AS a, -- ROUND(invoice_id * total, 1) AS b -+ ROUND((invoice_id * total)::numeric, 1) AS b +- ROUND(invoice_id * _expr_0, 1) AS b ++ ROUND((invoice_id * _expr_0)::numeric, 1) AS b FROM table_1 --- generic +++ postgres -@@ -1,35 +1,28 @@ +@@ -1,41 +1,34 @@ WITH table_1 AS ( - SELECT - * @@ -32,7 +32,10 @@ input_file: prqlc/prqlc/tests/integration/queries/append_select_compute.prql - ( - SELECT - invoice_id, -- total, +- CASE +- WHEN total < 10 THEN total * 2 +- ELSE total +- END AS _expr_0, - customer_id - FROM - invoices @@ -42,7 +45,10 @@ input_file: prqlc/prqlc/tests/integration/queries/append_select_compute.prql + ( + SELECT + invoice_id, -+ total, ++ CASE ++ WHEN total < 10 THEN total * 2 ++ ELSE total ++ END AS _expr_0, + customer_id + FROM + invoices @@ -57,7 +63,10 @@ input_file: prqlc/prqlc/tests/integration/queries/append_select_compute.prql - ( - SELECT - invoice_id, -- unit_price, +- CASE +- WHEN unit_price < 1 THEN unit_price * 2 +- ELSE unit_price +- END AS unit_price, - invoice_line_id - FROM - invoice_items @@ -67,7 +76,10 @@ input_file: prqlc/prqlc/tests/integration/queries/append_select_compute.prql + ALL ( + SELECT + invoice_id, -+ unit_price, ++ CASE ++ WHEN unit_price < 1 THEN unit_price * 2 ++ ELSE unit_price ++ END AS unit_price, + invoice_line_id + FROM + invoice_items @@ -77,7 +89,7 @@ input_file: prqlc/prqlc/tests/integration/queries/append_select_compute.prql ) SELECT customer_id * 2 AS a, -- ROUND(invoice_id * total, 1) AS b -+ ROUND((invoice_id * total)::numeric, 1) AS b +- ROUND(invoice_id * _expr_0, 1) AS b ++ ROUND((invoice_id * _expr_0)::numeric, 1) AS b FROM table_1 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__append_select_compute.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__append_select_compute.snap index e78d7c60000f..2453eedbd9f9 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__append_select_compute.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__append_select_compute.snap @@ -1,357 +1,523 @@ --- source: prqlc/prqlc/tests/integration/queries.rs -expression: "from invoices\nselect { customer_id, invoice_id, total }\ntake 5\nappend (\n from invoice_items\n select { invoice_line_id, invoice_id, unit_price }\n take 5\n)\nselect { a = customer_id * 2, b = math.round 1 (invoice_id * total) }\n" +expression: "from invoices\nderive total = case [total < 10 => total * 2, true => total]\nselect { customer_id, invoice_id, total }\ntake 5\nappend (\n from invoice_items\n derive unit_price = case [unit_price < 1 => unit_price * 2, true => unit_price]\n select { invoice_line_id, invoice_id, unit_price }\n take 5\n)\nselect { a = customer_id * 2, b = math.round 1 (invoice_id * total) }\n" input_file: prqlc/prqlc/tests/integration/queries/append_select_compute.prql --- frames: -- - 1:14-55 +- - 1:14-74 + - columns: + - !All + input_id: 161 + except: [] + - !Single + name: + - total + target_id: 163 + target_name: null + inputs: + - id: 161 + name: invoices + table: + - default_db + - invoices +- - 1:75-116 - columns: - !Single name: - invoices - customer_id - target_id: 144 + target_id: 176 target_name: null - !Single name: - invoices - invoice_id - target_id: 145 + target_id: 177 target_name: null - !Single name: - - invoices - total - target_id: 146 + target_id: 178 target_name: null inputs: - - id: 142 + - id: 161 name: invoices table: - default_db - invoices -- - 1:56-62 +- - 1:117-123 - columns: - !Single name: - invoices - customer_id - target_id: 144 + target_id: 176 target_name: null - !Single name: - invoices - invoice_id - target_id: 145 + target_id: 177 target_name: null - !Single name: - - invoices - total - target_id: 146 + target_id: 178 target_name: null inputs: - - id: 142 + - id: 161 name: invoices table: - default_db - invoices -- - 1:95-145 +- - 1:156-235 + - columns: + - !All + input_id: 127 + except: [] + - !Single + name: + - unit_price + target_id: 129 + target_name: null + inputs: + - id: 127 + name: invoice_items + table: + - default_db + - invoice_items +- - 1:238-288 - columns: - !Single name: - invoice_items - invoice_line_id - target_id: 126 + target_id: 142 target_name: null - !Single name: - invoice_items - invoice_id - target_id: 127 + target_id: 143 target_name: null - !Single name: - - invoice_items - unit_price - target_id: 128 + target_id: 144 target_name: null inputs: - - id: 124 + - id: 127 name: invoice_items table: - default_db - invoice_items -- - 1:148-154 +- - 1:291-297 - columns: - !Single name: - invoice_items - invoice_line_id - target_id: 126 + target_id: 142 target_name: null - !Single name: - invoice_items - invoice_id - target_id: 127 + target_id: 143 target_name: null - !Single name: - - invoice_items - unit_price - target_id: 128 + target_id: 144 target_name: null inputs: - - id: 124 + - id: 127 name: invoice_items table: - default_db - invoice_items -- - 1:63-156 +- - 1:124-299 - columns: - !Single name: - invoices - customer_id - target_id: 144 + target_id: 176 target_name: null - !Single name: - invoices - invoice_id - target_id: 145 + target_id: 177 target_name: null - !Single name: - - invoices - total - target_id: 146 + target_id: 178 target_name: null inputs: - - id: 142 + - id: 161 name: invoices table: - default_db - invoices -- - 1:157-226 +- - 1:300-369 - columns: - !Single name: - a - target_id: 153 + target_id: 185 target_name: null - !Single name: - b - target_id: 157 + target_id: 189 target_name: null inputs: - - id: 142 + - id: 161 name: invoices table: - default_db - invoices nodes: -- id: 124 +- id: 127 kind: Ident - span: 1:74-92 + span: 1:135-153 ident: !Ident - default_db - invoice_items - parent: 130 -- id: 126 + parent: 141 +- id: 129 + kind: Case + span: 1:176-235 + alias: unit_price + targets: + - 130 + - 134 + - 138 + - 139 + parent: 140 +- id: 130 + kind: RqOperator + span: 1:182-196 + targets: + - 132 + - 133 +- id: 132 kind: Ident - span: 1:104-119 + span: 1:182-192 ident: !Ident - this - invoice_items - - invoice_line_id + - unit_price targets: - - 124 - parent: 129 -- id: 127 + - 127 +- id: 133 + kind: Literal + span: 1:195-196 +- id: 134 + kind: RqOperator + span: 1:200-214 + targets: + - 136 + - 137 +- id: 136 kind: Ident - span: 1:121-131 + span: 1:200-210 ident: !Ident - this - invoice_items - - invoice_id + - unit_price targets: - - 124 - parent: 129 -- id: 128 + - 127 +- id: 137 + kind: Literal + span: 1:213-214 +- id: 138 + kind: Literal + span: 1:216-220 +- id: 139 kind: Ident - span: 1:133-143 + span: 1:224-234 ident: !Ident - this - invoice_items - unit_price targets: - - 124 - parent: 129 -- id: 129 + - 127 +- id: 140 kind: Tuple - span: 1:102-145 + span: 1:176-235 + children: + - 129 + parent: 141 +- id: 141 + kind: 'TransformCall: Derive' + span: 1:156-235 children: - - 126 - 127 - - 128 - parent: 130 -- id: 130 + - 140 + parent: 146 +- id: 142 + kind: Ident + span: 1:247-262 + ident: !Ident + - this + - invoice_items + - invoice_line_id + targets: + - 127 + parent: 145 +- id: 143 + kind: Ident + span: 1:264-274 + ident: !Ident + - this + - invoice_items + - invoice_id + targets: + - 127 + parent: 145 +- id: 144 + kind: Ident + span: 1:276-286 + ident: !Ident + - this + - unit_price + targets: + - 129 + parent: 145 +- id: 145 + kind: Tuple + span: 1:245-288 + children: + - 142 + - 143 + - 144 + parent: 146 +- id: 146 kind: 'TransformCall: Select' - span: 1:95-145 + span: 1:238-288 children: - - 124 - - 129 - parent: 132 -- id: 132 + - 141 + - 145 + parent: 148 +- id: 148 kind: 'TransformCall: Take' - span: 1:148-154 + span: 1:291-297 children: - - 130 - - 133 - parent: 152 -- id: 133 + - 146 + - 149 + parent: 184 +- id: 149 kind: Literal - parent: 132 -- id: 142 + parent: 148 +- id: 161 kind: Ident span: 1:0-13 ident: !Ident - default_db - invoices - parent: 148 -- id: 144 + parent: 175 +- id: 163 + kind: Case + span: 1:29-74 + alias: total + targets: + - 164 + - 168 + - 172 + - 173 + parent: 174 +- id: 164 + kind: RqOperator + span: 1:35-45 + targets: + - 166 + - 167 +- id: 166 + kind: Ident + span: 1:35-40 + ident: !Ident + - this + - invoices + - total + targets: + - 161 +- id: 167 + kind: Literal + span: 1:43-45 +- id: 168 + kind: RqOperator + span: 1:49-58 + targets: + - 170 + - 171 +- id: 170 + kind: Ident + span: 1:49-54 + ident: !Ident + - this + - invoices + - total + targets: + - 161 +- id: 171 + kind: Literal + span: 1:57-58 +- id: 172 + kind: Literal + span: 1:60-64 +- id: 173 + kind: Ident + span: 1:68-73 + ident: !Ident + - this + - invoices + - total + targets: + - 161 +- id: 174 + kind: Tuple + span: 1:29-74 + children: + - 163 + parent: 175 +- id: 175 + kind: 'TransformCall: Derive' + span: 1:14-74 + children: + - 161 + - 174 + parent: 180 +- id: 176 kind: Ident - span: 1:23-34 + span: 1:84-95 ident: !Ident - this - invoices - customer_id targets: - - 142 - parent: 147 -- id: 145 + - 161 + parent: 179 +- id: 177 kind: Ident - span: 1:36-46 + span: 1:97-107 ident: !Ident - this - invoices - invoice_id targets: - - 142 - parent: 147 -- id: 146 + - 161 + parent: 179 +- id: 178 kind: Ident - span: 1:48-53 + span: 1:109-114 ident: !Ident - this - - invoices - total targets: - - 142 - parent: 147 -- id: 147 + - 163 + parent: 179 +- id: 179 kind: Tuple - span: 1:21-55 + span: 1:82-116 children: - - 144 - - 145 - - 146 - parent: 148 -- id: 148 + - 176 + - 177 + - 178 + parent: 180 +- id: 180 kind: 'TransformCall: Select' - span: 1:14-55 + span: 1:75-116 children: - - 142 - - 147 - parent: 150 -- id: 150 + - 175 + - 179 + parent: 182 +- id: 182 kind: 'TransformCall: Take' - span: 1:56-62 + span: 1:117-123 children: - - 148 - - 151 - parent: 152 -- id: 151 + - 180 + - 183 + parent: 184 +- id: 183 kind: Literal - parent: 150 -- id: 152 + parent: 182 +- id: 184 kind: 'TransformCall: Append' - span: 1:63-156 + span: 1:124-299 children: - - 150 - - 132 - parent: 165 -- id: 153 + - 182 + - 148 + parent: 197 +- id: 185 kind: RqOperator - span: 1:170-185 + span: 1:313-328 alias: a targets: - - 155 - - 156 - parent: 164 -- id: 155 + - 187 + - 188 + parent: 196 +- id: 187 kind: Ident - span: 1:170-181 + span: 1:313-324 ident: !Ident - this - invoices - customer_id targets: - - 144 -- id: 156 + - 176 +- id: 188 kind: Literal - span: 1:184-185 -- id: 157 + span: 1:327-328 +- id: 189 kind: RqOperator - span: 1:191-224 + span: 1:334-367 alias: b targets: - - 159 - - 160 - parent: 164 -- id: 159 + - 191 + - 192 + parent: 196 +- id: 191 kind: Literal - span: 1:202-203 -- id: 160 + span: 1:345-346 +- id: 192 kind: RqOperator - span: 1:205-223 + span: 1:348-366 targets: - - 162 - - 163 -- id: 162 + - 194 + - 195 +- id: 194 kind: Ident - span: 1:205-215 + span: 1:348-358 ident: !Ident - this - invoices - invoice_id targets: - - 145 -- id: 163 + - 177 +- id: 195 kind: Ident - span: 1:218-223 + span: 1:361-366 ident: !Ident - this - - invoices - total targets: - - 146 -- id: 164 + - 178 +- id: 196 kind: Tuple - span: 1:164-226 + span: 1:307-369 children: - - 153 - - 157 - parent: 165 -- id: 165 + - 185 + - 189 + parent: 197 +- id: 197 kind: 'TransformCall: Select' - span: 1:157-226 + span: 1:300-369 children: - - 152 - - 164 + - 184 + - 196 ast: name: Project stmts: @@ -374,36 +540,78 @@ ast: - FuncCall: name: Ident: - - select + - derive span: 1:14-20 args: + - Case: + - condition: + Binary: + left: + Ident: + - total + span: 1:35-40 + op: Lt + right: + Literal: + Integer: 10 + span: 1:43-45 + span: 1:35-45 + value: + Binary: + left: + Ident: + - total + span: 1:49-54 + op: Mul + right: + Literal: + Integer: 2 + span: 1:57-58 + span: 1:49-58 + - condition: + Literal: + Boolean: true + span: 1:60-64 + value: + Ident: + - total + span: 1:68-73 + span: 1:29-74 + alias: total + span: 1:14-74 + - FuncCall: + name: + Ident: + - select + span: 1:75-81 + args: - Tuple: - Ident: - customer_id - span: 1:23-34 + span: 1:84-95 - Ident: - invoice_id - span: 1:36-46 + span: 1:97-107 - Ident: - total - span: 1:48-53 - span: 1:21-55 - span: 1:14-55 + span: 1:109-114 + span: 1:82-116 + span: 1:75-116 - FuncCall: name: Ident: - take - span: 1:56-60 + span: 1:117-121 args: - Literal: Integer: 5 - span: 1:61-62 - span: 1:56-62 + span: 1:122-123 + span: 1:117-123 - FuncCall: name: Ident: - append - span: 1:63-69 + span: 1:124-130 args: - Pipeline: exprs: @@ -411,85 +619,127 @@ ast: name: Ident: - from - span: 1:74-78 + span: 1:135-139 args: - Ident: - invoice_items - span: 1:79-92 - span: 1:74-92 + span: 1:140-153 + span: 1:135-153 + - FuncCall: + name: + Ident: + - derive + span: 1:156-162 + args: + - Case: + - condition: + Binary: + left: + Ident: + - unit_price + span: 1:182-192 + op: Lt + right: + Literal: + Integer: 1 + span: 1:195-196 + span: 1:182-196 + value: + Binary: + left: + Ident: + - unit_price + span: 1:200-210 + op: Mul + right: + Literal: + Integer: 2 + span: 1:213-214 + span: 1:200-214 + - condition: + Literal: + Boolean: true + span: 1:216-220 + value: + Ident: + - unit_price + span: 1:224-234 + span: 1:176-235 + alias: unit_price + span: 1:156-235 - FuncCall: name: Ident: - select - span: 1:95-101 + span: 1:238-244 args: - Tuple: - Ident: - invoice_line_id - span: 1:104-119 + span: 1:247-262 - Ident: - invoice_id - span: 1:121-131 + span: 1:264-274 - Ident: - unit_price - span: 1:133-143 - span: 1:102-145 - span: 1:95-145 + span: 1:276-286 + span: 1:245-288 + span: 1:238-288 - FuncCall: name: Ident: - take - span: 1:148-152 + span: 1:291-295 args: - Literal: Integer: 5 - span: 1:153-154 - span: 1:148-154 - span: 1:74-154 - span: 1:63-156 + span: 1:296-297 + span: 1:291-297 + span: 1:135-297 + span: 1:124-299 - FuncCall: name: Ident: - select - span: 1:157-163 + span: 1:300-306 args: - Tuple: - Binary: left: Ident: - customer_id - span: 1:170-181 + span: 1:313-324 op: Mul right: Literal: Integer: 2 - span: 1:184-185 - span: 1:170-185 + span: 1:327-328 + span: 1:313-328 alias: a - FuncCall: name: Ident: - math - round - span: 1:191-201 + span: 1:334-344 args: - Literal: Integer: 1 - span: 1:202-203 + span: 1:345-346 - Binary: left: Ident: - invoice_id - span: 1:205-215 + span: 1:348-358 op: Mul right: Ident: - total - span: 1:218-223 - span: 1:205-223 - span: 1:191-224 + span: 1:361-366 + span: 1:348-366 + span: 1:334-367 alias: b - span: 1:164-226 - span: 1:157-226 - span: 1:0-226 - span: 1:0-226 + span: 1:307-369 + span: 1:300-369 + span: 1:0-369 + span: 1:0-369 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__fmt__append_select_compute.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__fmt__append_select_compute.snap index 9caef69eb576..1870538f72c8 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__fmt__append_select_compute.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__fmt__append_select_compute.snap @@ -1,13 +1,21 @@ --- source: prqlc/prqlc/tests/integration/queries.rs -expression: "from invoices\nselect { customer_id, invoice_id, total }\ntake 5\nappend (\n from invoice_items\n select { invoice_line_id, invoice_id, unit_price }\n take 5\n)\nselect { a = customer_id * 2, b = math.round 1 (invoice_id * total) }\n" +expression: "from invoices\nderive total = case [total < 10 => total * 2, true => total]\nselect { customer_id, invoice_id, total }\ntake 5\nappend (\n from invoice_items\n derive unit_price = case [unit_price < 1 => unit_price * 2, true => unit_price]\n select { invoice_line_id, invoice_id, unit_price }\n take 5\n)\nselect { a = customer_id * 2, b = math.round 1 (invoice_id * total) }\n" input_file: prqlc/prqlc/tests/integration/queries/append_select_compute.prql --- from invoices +derive total = case [ + total < 10 => total * 2, + true => total, +] select {customer_id, invoice_id, total} take 5 append ( from invoice_items + derive unit_price = case [ + unit_price < 1 => unit_price * 2, + true => unit_price, + ] select {invoice_line_id, invoice_id, unit_price} take 5 ) diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__lex__append_select_compute.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__lex__append_select_compute.snap index ee9ab54bf862..739d9d6b00c3 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__lex__append_select_compute.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__lex__append_select_compute.snap @@ -9,58 +9,94 @@ Tokens( 0..4: Ident("from"), 5..13: Ident("invoices"), 13..14: NewLine, - 14..20: Ident("select"), - 21..22: Control('{'), - 23..34: Ident("customer_id"), - 34..35: Control(','), - 36..46: Ident("invoice_id"), - 46..47: Control(','), - 48..53: Ident("total"), - 54..55: Control('}'), - 55..56: NewLine, - 56..60: Ident("take"), - 61..62: Literal(Integer(5)), - 62..63: NewLine, - 63..69: Ident("append"), - 70..71: Control('('), - 71..72: NewLine, - 74..78: Ident("from"), - 79..92: Ident("invoice_items"), - 92..93: NewLine, - 95..101: Ident("select"), - 102..103: Control('{'), - 104..119: Ident("invoice_line_id"), - 119..120: Control(','), - 121..131: Ident("invoice_id"), - 131..132: Control(','), - 133..143: Ident("unit_price"), - 144..145: Control('}'), - 145..146: NewLine, - 148..152: Ident("take"), - 153..154: Literal(Integer(5)), - 154..155: NewLine, - 155..156: Control(')'), - 156..157: NewLine, - 157..163: Ident("select"), - 164..165: Control('{'), - 166..167: Ident("a"), - 168..169: Control('='), - 170..181: Ident("customer_id"), - 182..183: Control('*'), - 184..185: Literal(Integer(2)), - 185..186: Control(','), - 187..188: Ident("b"), - 189..190: Control('='), - 191..195: Ident("math"), - 195..196: Control('.'), - 196..201: Ident("round"), - 202..203: Literal(Integer(1)), - 204..205: Control('('), - 205..215: Ident("invoice_id"), - 216..217: Control('*'), - 218..223: Ident("total"), - 223..224: Control(')'), - 225..226: Control('}'), - 226..227: NewLine, + 14..20: Ident("derive"), + 21..26: Ident("total"), + 27..28: Control('='), + 29..33: Keyword("case"), + 34..35: Control('['), + 35..40: Ident("total"), + 41..42: Control('<'), + 43..45: Literal(Integer(10)), + 46..48: ArrowFat, + 49..54: Ident("total"), + 55..56: Control('*'), + 57..58: Literal(Integer(2)), + 58..59: Control(','), + 60..64: Literal(Boolean(true)), + 65..67: ArrowFat, + 68..73: Ident("total"), + 73..74: Control(']'), + 74..75: NewLine, + 75..81: Ident("select"), + 82..83: Control('{'), + 84..95: Ident("customer_id"), + 95..96: Control(','), + 97..107: Ident("invoice_id"), + 107..108: Control(','), + 109..114: Ident("total"), + 115..116: Control('}'), + 116..117: NewLine, + 117..121: Ident("take"), + 122..123: Literal(Integer(5)), + 123..124: NewLine, + 124..130: Ident("append"), + 131..132: Control('('), + 132..133: NewLine, + 135..139: Ident("from"), + 140..153: Ident("invoice_items"), + 153..154: NewLine, + 156..162: Ident("derive"), + 163..173: Ident("unit_price"), + 174..175: Control('='), + 176..180: Keyword("case"), + 181..182: Control('['), + 182..192: Ident("unit_price"), + 193..194: Control('<'), + 195..196: Literal(Integer(1)), + 197..199: ArrowFat, + 200..210: Ident("unit_price"), + 211..212: Control('*'), + 213..214: Literal(Integer(2)), + 214..215: Control(','), + 216..220: Literal(Boolean(true)), + 221..223: ArrowFat, + 224..234: Ident("unit_price"), + 234..235: Control(']'), + 235..236: NewLine, + 238..244: Ident("select"), + 245..246: Control('{'), + 247..262: Ident("invoice_line_id"), + 262..263: Control(','), + 264..274: Ident("invoice_id"), + 274..275: Control(','), + 276..286: Ident("unit_price"), + 287..288: Control('}'), + 288..289: NewLine, + 291..295: Ident("take"), + 296..297: Literal(Integer(5)), + 297..298: NewLine, + 298..299: Control(')'), + 299..300: NewLine, + 300..306: Ident("select"), + 307..308: Control('{'), + 309..310: Ident("a"), + 311..312: Control('='), + 313..324: Ident("customer_id"), + 325..326: Control('*'), + 327..328: Literal(Integer(2)), + 328..329: Control(','), + 330..331: Ident("b"), + 332..333: Control('='), + 334..338: Ident("math"), + 338..339: Control('.'), + 339..344: Ident("round"), + 345..346: Literal(Integer(1)), + 347..348: Control('('), + 348..358: Ident("invoice_id"), + 359..360: Control('*'), + 361..366: Ident("total"), + 366..367: Control(')'), + 368..369: Control('}'), + 369..370: NewLine, ], ) diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__results__append_select_compute.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__results__append_select_compute.snap index 07016729aac5..9e8c92958086 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__results__append_select_compute.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__results__append_select_compute.snap @@ -1,15 +1,15 @@ --- source: prqlc/prqlc/tests/integration/queries.rs -expression: "from invoices\nselect { customer_id, invoice_id, total }\ntake 5\nappend (\n from invoice_items\n select { invoice_line_id, invoice_id, unit_price }\n take 5\n)\nselect { a = customer_id * 2, b = math.round 1 (invoice_id * total) }\n" +expression: "from invoices\nderive total = case [total < 10 => total * 2, true => total]\nselect { customer_id, invoice_id, total }\ntake 5\nappend (\n from invoice_items\n derive unit_price = case [unit_price < 1 => unit_price * 2, true => unit_price]\n select { invoice_line_id, invoice_id, unit_price }\n take 5\n)\nselect { a = customer_id * 2, b = math.round 1 (invoice_id * total) }\n" input_file: prqlc/prqlc/tests/integration/queries/append_select_compute.prql --- -4,2 -8,7.9 -16,17.8 -28,35.6 +4,4 +8,15.8 +16,35.6 +28,71.3 46,69.3 -2,1 -4,1 -6,2 -8,2 -10,2 +2,2 +4,2 +6,4 +8,4 +10,4 diff --git a/prqlc/prqlc/tests/integration/sql.rs b/prqlc/prqlc/tests/integration/sql.rs index 0964a69fc7d8..dd3767856fb0 100644 --- a/prqlc/prqlc/tests/integration/sql.rs +++ b/prqlc/prqlc/tests/integration/sql.rs @@ -2518,17 +2518,11 @@ fn test_distinct_on_03() { derive foo = 1 select foo "###).unwrap()), @r" - WITH table_1 AS ( + WITH table_0 AS ( SELECT - DISTINCT ON (col1) col1 + DISTINCT ON (col1) NULL FROM tab1 - ), - table_0 AS ( - SELECT - NULL - FROM - table_1 ) SELECT 1 AS foo @@ -5693,8 +5687,13 @@ fn test_missing_columns_group_complex_compute() { "#, ) .unwrap(), @r" - WITH table_0 AS ( - SELECT + SELECT + DISTINCT ON ( + EXTRACT( + year + from + hire_date + ), CONCAT( 'Year ', EXTRACT( @@ -5702,30 +5701,216 @@ fn test_missing_columns_group_complex_compute() { from hire_date ) - ) AS year_label, + ) + ) CONCAT( + 'Year ', EXTRACT( year from hire_date - ) AS _expr_0, + ) + ) AS year_label + FROM + employees + "); +} + +#[test] +fn test_append_select_compute() { + // Test for handling complex append with select and compute operations + assert_snapshot!(compile(r###" + from invoices + derive total = case [total < 10 => total * 2, true => total] + select { customer_id, invoice_id, total } + take 5 + append ( + from invoice_items + derive unit_price = case [unit_price < 1 => unit_price * 2, true => unit_price] + select { invoice_line_id, invoice_id, unit_price } + take 5 + ) + select { a = customer_id * 2, b = math.round 1 (invoice_id * total) } + "###).unwrap(), @r" + WITH table_1 AS ( + SELECT + * + FROM + ( + SELECT + invoice_id, + CASE + WHEN total < 10 THEN total * 2 + ELSE total + END AS _expr_0, + customer_id + FROM + invoices + LIMIT + 5 + ) AS table_3 + UNION + ALL + SELECT + * + FROM + ( + SELECT + invoice_id, + CASE + WHEN unit_price < 1 THEN unit_price * 2 + ELSE unit_price + END AS unit_price, + invoice_line_id + FROM + invoice_items + LIMIT + 5 + ) AS table_4 + ) + SELECT + customer_id * 2 AS a, + ROUND(invoice_id * _expr_0, 1) AS b + FROM + table_1 + "); +} + +#[test] +fn test_append_select_multiple() { + // Test for handling multiple append operations with grouping and aggregation + assert_snapshot!(compile(r###" + from invoices + select { customer_id, invoice_id, total, useless1, useless2 } + take 5 + append ( + from employees + select { employee_id, employee_id + 1, reports_to, useless3, useless4 } + take 5 + ) + group { customer_id } (aggregate { invoice_id = math.round 1 (sum invoice_id), total = math.round 1 (sum total), useless1 = sum useless1 }) + append ( + from invoice_items + select { invoice_id, invoice_line_id, 0, useless5 } + take 5 + ) + sort { +invoice_id, +total } + select { total, invoice_id } + "###).unwrap(), @r" + WITH table_3 AS ( + SELECT + * + FROM + ( + SELECT + customer_id, + total, + invoice_id + FROM + invoices + LIMIT + 5 + ) AS table_6 + UNION + ALL + SELECT + * + FROM + ( + SELECT + employee_id, + reports_to, + employee_id + 1 + FROM + employees + LIMIT + 5 + ) AS table_7 + ), + table_2 AS ( + SELECT + ROUND(COALESCE(SUM(total), 0), 1) AS total, + ROUND(COALESCE(SUM(invoice_id), 0), 1) AS invoice_id + FROM + table_3 + GROUP BY + customer_id + UNION + ALL + SELECT + * + FROM + ( + SELECT + invoice_id, + invoice_line_id + FROM + invoice_items + LIMIT + 5 + ) AS table_8 + ) + SELECT + total, + invoice_id + FROM + table_2 + ORDER BY + invoice_id, + total + "); +} + +#[test] +fn test_distinct_on_sort_on_compute() { + // Test for handling distinct on with sorting on computed columns + assert_snapshot!(compile(r###" + from invoices + derive code = case [customer_id < 10 => billing_postal_code, true => null] + group {customer_id, billing_city, billing_country} ( + sort {-this.code} + take 1 + ) + filter (customer_id | in [4]) + group {billing_country} (aggregate {total = math.round 2 (sum total)}) + "###).unwrap(), @r" + WITH table_1 AS ( + SELECT + billing_country, + total, + customer_id, + billing_city, CASE - WHEN city = 'Calgary' THEN 'A city' - ELSE city + WHEN customer_id < 10 THEN billing_postal_code + ELSE NULL END AS _expr_1, - city + billing_postal_code FROM - employees + invoices ), - table_1 AS ( + table_0 AS ( SELECT - DISTINCT ON (_expr_0, year_label) year_label, - _expr_0 + billing_country, + total, + customer_id, + ROW_NUMBER() OVER ( + PARTITION BY customer_id, + billing_city, + billing_country + ORDER BY + _expr_1 DESC + ) AS _expr_0 FROM - table_0 + table_1 ) SELECT - year_label + billing_country, + ROUND(COALESCE(SUM(total), 0), 2) AS total FROM - table_1 + table_0 + WHERE + _expr_0 <= 1 + AND customer_id IN (4) + GROUP BY + billing_country "); }