From 14cf08fd6ffbcf961f36dc0175461d0579aeef1e Mon Sep 17 00:00:00 2001 From: Luka Peschke Date: Mon, 26 May 2025 15:26:10 +0200 Subject: [PATCH 1/3] fix(prqlc): deduplicate selected items in gen_projection fixes #5302 Signed-off-by: Luka Peschke --- prqlc/prqlc/src/sql/gen_projection.rs | 40 ++++++++++++++++ prqlc/prqlc/tests/integration/sql.rs | 68 +++++++++++++++++++++++---- 2 files changed, 98 insertions(+), 10 deletions(-) diff --git a/prqlc/prqlc/src/sql/gen_projection.rs b/prqlc/prqlc/src/sql/gen_projection.rs index 9e7074d9940b..5bc0d0a01209 100644 --- a/prqlc/prqlc/src/sql/gen_projection.rs +++ b/prqlc/prqlc/src/sql/gen_projection.rs @@ -120,6 +120,44 @@ pub(super) fn translate_wildcards(ctx: &AnchorContext, cols: Vec) -> (Vec) { + // Dropping all duplicated identifiers + let mut seen = HashSet::new(); + items.retain(|select_item| match select_item { + SelectItem::UnnamedExpr(expr) => { + if let sql_ast::Expr::CompoundIdentifier(idents) = expr { + // If any of the identifiers hadn't been seen yet, retain the expr + idents.iter().any(|ident| seen.insert(ident.clone())) + } else { + true + } + } + SelectItem::ExprWithAlias { alias, .. } => seen.insert(alias.clone()), + _ => true, + }); + + // Dropping all expressions which are already selected as an alias + let compounds_with_aliases = items + .iter() + .filter_map(|select_item| { + if let SelectItem::ExprWithAlias { expr, .. } = select_item { + if matches!(expr, sql_ast::Expr::CompoundIdentifier(_)) { + Some(expr.clone()) + } else { + None + } + } else { + None + } + }) + .collect::>(); + + items.retain(|select_item| match select_item { + SelectItem::UnnamedExpr(expr) => !compounds_with_aliases.contains(expr), + _ => true, + }); +} + pub(super) fn translate_select_items( cols: Vec, mut excluded: Excluded, @@ -164,6 +202,8 @@ pub(super) fn translate_select_items( }) .try_collect()?; + deduplicate_select_items(&mut res); + if res.is_empty() && !ctx.dialect.supports_zero_columns() { // In some cases, no columns will appear in the projection // for SQL to parse correctly, we inject a `NULL`. diff --git a/prqlc/prqlc/tests/integration/sql.rs b/prqlc/prqlc/tests/integration/sql.rs index a94bb07f7ee4..1e530ad95d9a 100644 --- a/prqlc/prqlc/tests/integration/sql.rs +++ b/prqlc/prqlc/tests/integration/sql.rs @@ -918,6 +918,57 @@ fn test_sort_in_nested_join() { ); } +#[test] +fn test_sort_in_nested_join_with_extra_derive_and_select() { + // #5302 + assert_snapshot!(compile(r#" + from albums + join side:left ( + from artists + derive { + my_new_col = f"artist: {name}" + } + group {my_new_col} (aggregate { first_name = first this.`name`}) + sort {this.my_new_col, first_name} + derive {new_name = first_name} + select {this.my_new_col, this.new_name} + ) (this.id == that.my_new_col) + "#).unwrap(), + @r#" + WITH table_1 AS ( + SELECT + CONCAT('artist: ', name) AS my_new_col, + FIRST_VALUE(name) AS _expr_0 + FROM + artists + GROUP BY + CONCAT('artist: ', name) + ), + table_2 AS ( + SELECT + my_new_col, + _expr_0 AS new_name + FROM + table_1 + ), + table_0 AS ( + SELECT + my_new_col, + new_name + FROM + table_2 + ) + SELECT + albums.*, + table_0.my_new_col, + table_0.new_name + FROM + albums + LEFT OUTER JOIN table_0 ON albums.id = table_0.my_new_col + "# + ); +} + #[test] fn test_sort_in_nested_append() { assert_snapshot!(compile(r#" @@ -3885,7 +3936,6 @@ fn test_name_shadowing() { "###).unwrap(), @r" SELECT - a AS _expr_0, a AS _expr_0, a + 1 AS a FROM @@ -3903,7 +3953,6 @@ fn test_name_shadowing() { "###).unwrap(), @r" SELECT - a AS _expr_0, a AS _expr_0, a + 1, a + 1 + 2 AS a @@ -5098,7 +5147,6 @@ fn test_lineage() { ' 3' AS a ) SELECT - a, a FROM table_0 @@ -5513,22 +5561,22 @@ fn unstable_ordering() { assert_snapshot!(compile(r###" # All lines are mandatory from foo -take 10000 +take 10000 # We need 8+ aliases to trigger the issue derive { a1 = 1, a2 = 1, a3 = 1, a4 = 1, a5 = 1, a6 = 1, a7 = 1, a8 = 1 } # The `select !` itself is required, but its content is not -select !{ a1, a2, a3, a4, a5, a6, a7, a8 } +select !{ a1, a2, a3, a4, a5, a6, a7, a8 } # We may remove `u` from both these statements, but the `select !` must remain select { b, c, u } -select !{ u } +select !{ u } # Aggregate verb seems to not matter -group { b } ( aggregate { c = count c } ) -derive { d = c } -select !{ c } +group { b } ( aggregate { c = count c } ) +derive { d = c } +select !{ c } -group { d } ( aggregate { b = sum b } ) +group { d } ( aggregate { b = sum b } ) sort { d }"###).unwrap(), @r" WITH table_1 AS ( SELECT From af68203b6cfcee57c3ef8f53c355515b8ea50baa Mon Sep 17 00:00:00 2001 From: Luka Peschke Date: Tue, 27 May 2025 10:41:52 +0200 Subject: [PATCH 2/3] refactor: only deduplicate unnamed expressions for now + ensure multiple aliases work as expected in the test Signed-off-by: Luka Peschke --- prqlc/prqlc/src/sql/gen_projection.rs | 21 --------------------- prqlc/prqlc/tests/integration/sql.rs | 15 ++++++++++----- 2 files changed, 10 insertions(+), 26 deletions(-) diff --git a/prqlc/prqlc/src/sql/gen_projection.rs b/prqlc/prqlc/src/sql/gen_projection.rs index 5bc0d0a01209..fbc367f3ccd1 100644 --- a/prqlc/prqlc/src/sql/gen_projection.rs +++ b/prqlc/prqlc/src/sql/gen_projection.rs @@ -135,27 +135,6 @@ fn deduplicate_select_items(items: &mut Vec) { SelectItem::ExprWithAlias { alias, .. } => seen.insert(alias.clone()), _ => true, }); - - // Dropping all expressions which are already selected as an alias - let compounds_with_aliases = items - .iter() - .filter_map(|select_item| { - if let SelectItem::ExprWithAlias { expr, .. } = select_item { - if matches!(expr, sql_ast::Expr::CompoundIdentifier(_)) { - Some(expr.clone()) - } else { - None - } - } else { - None - } - }) - .collect::>(); - - items.retain(|select_item| match select_item { - SelectItem::UnnamedExpr(expr) => !compounds_with_aliases.contains(expr), - _ => true, - }); } pub(super) fn translate_select_items( diff --git a/prqlc/prqlc/tests/integration/sql.rs b/prqlc/prqlc/tests/integration/sql.rs index 1e530ad95d9a..d61fbcd1fd19 100644 --- a/prqlc/prqlc/tests/integration/sql.rs +++ b/prqlc/prqlc/tests/integration/sql.rs @@ -930,8 +930,8 @@ fn test_sort_in_nested_join_with_extra_derive_and_select() { } group {my_new_col} (aggregate { first_name = first this.`name`}) sort {this.my_new_col, first_name} - derive {new_name = first_name} - select {this.my_new_col, this.new_name} + derive {new_name = first_name, other_new_name = first_name} + select {this.my_new_col, this.new_name, this.other_new_name} ) (this.id == that.my_new_col) "#).unwrap(), @r#" @@ -947,21 +947,26 @@ fn test_sort_in_nested_join_with_extra_derive_and_select() { table_2 AS ( SELECT my_new_col, - _expr_0 AS new_name + _expr_0 AS new_name, + _expr_0 AS other_new_name, + _expr_0 FROM table_1 ), table_0 AS ( SELECT my_new_col, - new_name + new_name, + other_new_name, + FIRST_VALUE(name) AS _expr_0 FROM table_2 ) SELECT albums.*, table_0.my_new_col, - table_0.new_name + table_0.new_name, + table_0.other_new_name FROM albums LEFT OUTER JOIN table_0 ON albums.id = table_0.my_new_col From cb44a6a89fb7327e11f3602cd7fa7ecc52239f34 Mon Sep 17 00:00:00 2001 From: Luka Peschke Date: Tue, 27 May 2025 10:45:17 +0200 Subject: [PATCH 3/3] chore: clippy Signed-off-by: Luka Peschke --- prqlc/prqlc/src/sql/gen_projection.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/prqlc/prqlc/src/sql/gen_projection.rs b/prqlc/prqlc/src/sql/gen_projection.rs index fbc367f3ccd1..67be9c45a6dd 100644 --- a/prqlc/prqlc/src/sql/gen_projection.rs +++ b/prqlc/prqlc/src/sql/gen_projection.rs @@ -124,13 +124,9 @@ fn deduplicate_select_items(items: &mut Vec) { // Dropping all duplicated identifiers let mut seen = HashSet::new(); items.retain(|select_item| match select_item { - SelectItem::UnnamedExpr(expr) => { - if let sql_ast::Expr::CompoundIdentifier(idents) = expr { - // If any of the identifiers hadn't been seen yet, retain the expr - idents.iter().any(|ident| seen.insert(ident.clone())) - } else { - true - } + SelectItem::UnnamedExpr(sql_ast::Expr::CompoundIdentifier(idents)) => { + // If any of the identifiers hadn't been seen yet, retain the expr + idents.iter().any(|ident| seen.insert(ident.clone())) } SelectItem::ExprWithAlias { alias, .. } => seen.insert(alias.clone()), _ => true,