diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a52e9651..936ac1faa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,12 @@ - Added panel decorations (grid lines, axes, background) for polar coordinates (#156). - Added `radar` setting to polar coordinates for making radar plots (#418). +### Changed + +- `boxplot`, `violin`, and `range` now support omitting the categorical + aesthetic, matching `bar`. `point` now treats both position aesthetics as + optional. + ## 0.3.2 - 2026-05-05 ### Fixed diff --git a/doc/syntax/layer/type/boxplot.qmd b/doc/syntax/layer/type/boxplot.qmd index 39ecc33f1..712eb21b6 100644 --- a/doc/syntax/layer/type/boxplot.qmd +++ b/doc/syntax/layer/type/boxplot.qmd @@ -9,10 +9,12 @@ Boxplots display a summary of a continuous distribution. In the style of Tukey, The following aesthetics are recognised by the boxplot layer. ### Required -* Primary axis (e.g. `x`): The categorical variable to group by * Secondary axis (e.g. `y`): The continuous variable to summarize ### Optional +* Primary axis (e.g. `x`): The categorical variable to group by. If omitted a + single boxplot is drawn for the whole distribution and the (one-tick) + categorical axis is hidden. * `stroke`: The colour of the box contours, whiskers, median line and outliers. * `fill`: The colour of the box interior. * `colour`: Shorthand for setting `stroke` and `fill` simultaneously. Note that the median line will have bad visibility if `stroke` and `fill` are the same. @@ -91,3 +93,12 @@ VISUALISE FROM ggsql:penguins DRAW boxplot MAPPING species AS y, bill_len AS x ``` + +Omit the categorical axis to summarise the whole distribution as a single +boxplot: + +```{ggsql} +VISUALISE FROM ggsql:penguins +DRAW boxplot + MAPPING bill_len AS y +``` diff --git a/doc/syntax/layer/type/point.qmd b/doc/syntax/layer/type/point.qmd index b9fd50163..aa167ca7f 100644 --- a/doc/syntax/layer/type/point.qmd +++ b/doc/syntax/layer/type/point.qmd @@ -10,10 +10,15 @@ The point layer is used to create scatterplots. The scatterplot is most useful f The following aesthetics are recognised by the point layer. ### Required -* Primary axis (e.g. `x`): Position along the primary axis. -* Secondary axis (e.g. `y`): Position along the secondary axis. +The point layer has no required aesthetics. ### Optional +* Primary axis (e.g. `x`): Position along the primary axis. If omitted, all + points are drawn at a single discrete primary-axis position (a strip plot) + and the categorical axis is hidden. +* Secondary axis (e.g. `y`): Position along the secondary axis. Same dummy-axis + treatment as the primary. If both axes are omitted, all rows pile up at a + single point — only useful in combination with `aggregate`. * `size`: The size of each point * `colour`: The default colour of each point * `stroke`: The colour of the stroke around each point (if any). Overrides `colour` diff --git a/doc/syntax/layer/type/range.qmd b/doc/syntax/layer/type/range.qmd index 35771ef9f..cead370ba 100644 --- a/doc/syntax/layer/type/range.qmd +++ b/doc/syntax/layer/type/range.qmd @@ -10,11 +10,13 @@ The range layer displays an interval between two values along the secondary axis The following aesthetics are recognised by the range layer. ### Required -* Primary axis (e.g. `x`): Position along the primary axis. * Secondary axis minimum (e.g. `ymin`): Lower position along the secondary axis. * Secondary axis maximum (e.g. `ymax`): Upper position along the secondary axis. ### Optional +* Primary axis (e.g. `x`): Position along the primary axis. If omitted a + single interval is drawn over the whole dataset and the (one-tick) + categorical axis is hidden. * `stroke`/`colour`: The colour of the lines in the range. * `opacity`: The opacity of the colour. * `linewidth`: The width of the lines in the range. diff --git a/doc/syntax/layer/type/violin.qmd b/doc/syntax/layer/type/violin.qmd index 37a59560a..fff5f5ba1 100644 --- a/doc/syntax/layer/type/violin.qmd +++ b/doc/syntax/layer/type/violin.qmd @@ -11,10 +11,12 @@ The violins are mirrored kernel density estimates, similar to the [density](dens The following aesthetics are recognised by the violin layer. ### Required -* Primary axis (e.g. `x`): The categorical variable for grouping. * Secondary axis (e.g. `y`): The continuous variable to compute density for. ### Optional +* Primary axis (e.g. `x`): The categorical variable for grouping. If omitted + a single violin is drawn for the whole distribution and the (one-tick) + categorical axis is hidden. * `stroke`: The colour of the contour lines. * `fill`: The colour of the inner area. * `colour`: Shorthand for setting `stroke` and `fill` simultaneously. diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 04963f94b..37c014c66 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -3048,11 +3048,12 @@ mod tests { ) .unwrap(); - // Query missing required aesthetic 'y' - should show 'y' not 'pos2' + // Query missing required aesthetic 'y' - should show 'y' not 'pos2'. + // Use line, which still requires both x and y (point's x is optional). let query = r#" SELECT * FROM test_data VISUALISE - DRAW point MAPPING a AS x + DRAW line MAPPING a AS x "#; let result = prepare_data_with_reader(query, &reader); diff --git a/src/plot/layer/geom/area.rs b/src/plot/layer/geom/area.rs index 6fc357063..266add133 100644 --- a/src/plot/layer/geom/area.rs +++ b/src/plot/layer/geom/area.rs @@ -60,10 +60,6 @@ impl GeomTrait for Area { Some(&["pos1"]) } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn apply_stat_transform( &self, query: &str, diff --git a/src/plot/layer/geom/bar.rs b/src/plot/layer/geom/bar.rs index 211e89a08..0efed840b 100644 --- a/src/plot/layer/geom/bar.rs +++ b/src/plot/layer/geom/bar.rs @@ -4,7 +4,7 @@ use std::collections::HashMap; use std::collections::HashSet; use super::stat_aggregate; -use super::types::{get_column_name, POSITION_VALUES}; +use super::types::{get_column_name, wrap_stat_with_dummy_pos1, POSITION_VALUES}; use super::{ has_aggregate_param, DefaultAesthetics, DefaultParamValue, GeomTrait, GeomType, ParamConstraint, ParamDefinition, StatResult, @@ -85,10 +85,6 @@ impl GeomTrait for Bar { Some(&[]) } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true // Bar stat decides COUNT vs identity based on y mapping - } - fn apply_stat_transform( &self, query: &str, @@ -101,7 +97,7 @@ impl GeomTrait for Bar { aesthetic_ctx: &crate::plot::aesthetic::AestheticContext, ) -> Result { if has_aggregate_param(parameters) { - return stat_aggregate::apply( + let aggregated = stat_aggregate::apply( query, schema, aesthetics, @@ -110,7 +106,16 @@ impl GeomTrait for Bar { dialect, aesthetic_ctx, self.aggregate_domain_aesthetics().unwrap_or(&[]), - ); + )?; + // When the user omits the categorical axis, decorate the aggregate + // output with the dummy pos1 column so the writer suppresses the + // (otherwise meaningless) one-tick axis. Composes with whatever + // shape the aggregate stat produced. + return if get_column_name(aesthetics, "pos1").is_none() { + Ok(wrap_stat_with_dummy_pos1(query, aggregated)) + } else { + Ok(aggregated) + }; } stat_bar_count(query, schema, aesthetics, group_by) } diff --git a/src/plot/layer/geom/boxplot.rs b/src/plot/layer/geom/boxplot.rs index 5d99b358c..621c2ffdb 100644 --- a/src/plot/layer/geom/boxplot.rs +++ b/src/plot/layer/geom/boxplot.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; -use super::types::POSITION_VALUES; +use super::types::{wrap_with_dummy_axis, POSITION_VALUES}; use super::{DefaultAesthetics, GeomTrait, GeomType}; use crate::{ naming, @@ -26,7 +26,10 @@ impl GeomTrait for Boxplot { fn aesthetics(&self) -> DefaultAesthetics { DefaultAesthetics { defaults: &[ - ("pos1", DefaultAestheticValue::Required), + // pos1 is optional - if omitted, stat_boxplot synthesises a + // dummy categorical axis so the geom renders a single boxplot + // of the whole pos2 distribution. + ("pos1", DefaultAestheticValue::Null), ("pos2", DefaultAestheticValue::Required), ("stroke", DefaultAestheticValue::String("black")), ("fill", DefaultAestheticValue::String("white")), @@ -46,10 +49,6 @@ impl GeomTrait for Boxplot { &["pos2"] } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn default_params(&self) -> &'static [super::ParamDefinition] { const PARAMS: &[ParamDefinition] = &[ ParamDefinition { @@ -79,6 +78,7 @@ impl GeomTrait for Boxplot { fn default_remappings(&self) -> DefaultAesthetics { DefaultAesthetics { defaults: &[ + ("pos1", DefaultAestheticValue::Column("pos1")), ("pos2", DefaultAestheticValue::Column("value")), ("pos2end", DefaultAestheticValue::Column("value2")), ("type", DefaultAestheticValue::Column("type")), @@ -117,9 +117,17 @@ fn stat_boxplot( let y = get_column_name(aesthetics, "pos2").ok_or_else(|| { GgsqlError::ValidationError("Boxplot requires 'y' aesthetic mapping".to_string()) })?; - let x = get_column_name(aesthetics, "pos1").ok_or_else(|| { - GgsqlError::ValidationError("Boxplot requires 'x' aesthetic mapping".to_string()) - })?; + + // pos1 is optional. When the user omits it, wrap the input query with a + // synthetic dummy categorical column and group by that column, so the + // existing GROUP BY / summary pipeline collapses to a single boxplot. + let (working_query, x, use_dummy) = match get_column_name(aesthetics, "pos1") { + Some(col) => (query.to_string(), col, false), + None => { + let dummy_col = naming::stat_column("pos1"); + (wrap_with_dummy_axis(query, "pos1"), dummy_col, true) + } + }; // Get coef parameter (validated by ParamConstraint::number_min) let ParameterValue::Number(coef) = parameters.get("coef").unwrap() else { @@ -148,17 +156,25 @@ fn stat_boxplot( } // Query for boxplot summary statistics - let summary = boxplot_sql_compute_summary(query, &groups, &value_col, coef, dialect); - let stats_query = boxplot_sql_append_outliers(&summary, &groups, &value_col, query, outliers); + let summary = boxplot_sql_compute_summary(&working_query, &groups, &value_col, coef, dialect); + let stats_query = + boxplot_sql_append_outliers(&summary, &groups, &value_col, &working_query, outliers); + + let mut stat_columns = vec![ + "type".to_string(), + "value".to_string(), + "value2".to_string(), + ]; + let mut dummy_columns: Vec = vec![]; + if use_dummy { + stat_columns.push("pos1".to_string()); + dummy_columns.push("pos1".to_string()); + } Ok(StatResult::Transformed { query: stats_query, - stat_columns: vec![ - "type".to_string(), - "value".to_string(), - "value2".to_string(), - ], - dummy_columns: vec![], + stat_columns, + dummy_columns, consumed_aesthetics: vec!["pos2".to_string()], }) } @@ -517,9 +533,10 @@ mod tests { let boxplot = Boxplot; let aes = boxplot.aesthetics(); - assert!(aes.is_required("pos1")); + // pos1 is optional (omit → dummy categorical axis); pos2 is required. + assert!(!aes.is_required("pos1")); assert!(aes.is_required("pos2")); - assert_eq!(aes.required().len(), 2); + assert_eq!(aes.required(), vec!["pos2"]); } #[test] @@ -575,7 +592,10 @@ mod tests { let boxplot = Boxplot; let remappings = boxplot.default_remappings(); - assert_eq!(remappings.defaults.len(), 3); + assert_eq!(remappings.defaults.len(), 4); + assert!(remappings + .defaults + .contains(&("pos1", DefaultAestheticValue::Column("pos1")))); assert!(remappings .defaults .contains(&("pos2", DefaultAestheticValue::Column("value")))); @@ -587,6 +607,48 @@ mod tests { .contains(&("type", DefaultAestheticValue::Column("type")))); } + #[test] + fn test_boxplot_dummy_pos1_when_unmapped() { + use crate::plot::AestheticValue; + let mut aesthetics = Mappings::new(); + aesthetics.insert( + "pos2".to_string(), + AestheticValue::standard_column("value".to_string()), + ); + let mut parameters: HashMap = HashMap::new(); + parameters.insert("coef".to_string(), ParameterValue::Number(1.5)); + parameters.insert("outliers".to_string(), ParameterValue::Boolean(true)); + + let result = stat_boxplot( + "SELECT * FROM data", + &aesthetics, + &[], + ¶meters, + &AnsiDialect, + ) + .expect("stat_boxplot should succeed without pos1"); + + match result { + StatResult::Transformed { + query, + stat_columns, + dummy_columns, + consumed_aesthetics, + } => { + // The wrapped input introduces a synthetic pos1 column that the + // GROUP BY then collapses to a single boxplot. + assert!(query.contains("__ggsql_stat_dummy")); + assert!(query.contains("__ggsql_stat_pos1")); + assert!(stat_columns.contains(&"pos1".to_string())); + assert!(stat_columns.contains(&"type".to_string())); + assert!(stat_columns.contains(&"value".to_string())); + assert_eq!(dummy_columns, vec!["pos1".to_string()]); + assert_eq!(consumed_aesthetics, vec!["pos2".to_string()]); + } + _ => panic!("expected Transformed"), + } + } + #[test] fn test_boxplot_stat_consumed_aesthetics() { let boxplot = Boxplot; @@ -596,13 +658,6 @@ mod tests { assert_eq!(consumed[0], "pos2"); } - #[test] - fn test_boxplot_needs_stat_transform() { - let boxplot = Boxplot; - let aesthetics = Mappings::new(); - assert!(boxplot.needs_stat_transform(&aesthetics)); - } - #[test] fn test_boxplot_display() { let boxplot = Boxplot; diff --git a/src/plot/layer/geom/density.rs b/src/plot/layer/geom/density.rs index 3fe62f9af..8032f5f47 100644 --- a/src/plot/layer/geom/density.rs +++ b/src/plot/layer/geom/density.rs @@ -54,10 +54,6 @@ impl GeomTrait for Density { } } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn default_params(&self) -> &'static [ParamDefinition] { const PARAMS: &[ParamDefinition] = &[ ParamDefinition { diff --git a/src/plot/layer/geom/histogram.rs b/src/plot/layer/geom/histogram.rs index bfb800502..fc37c10f6 100644 --- a/src/plot/layer/geom/histogram.rs +++ b/src/plot/layer/geom/histogram.rs @@ -84,10 +84,6 @@ impl GeomTrait for Histogram { &["pos1"] } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn apply_stat_transform( &self, query: &str, diff --git a/src/plot/layer/geom/line.rs b/src/plot/layer/geom/line.rs index 624034586..b40b975f5 100644 --- a/src/plot/layer/geom/line.rs +++ b/src/plot/layer/geom/line.rs @@ -48,10 +48,6 @@ impl GeomTrait for Line { Some(&["pos1"]) } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn apply_stat_transform( &self, query: &str, diff --git a/src/plot/layer/geom/mod.rs b/src/plot/layer/geom/mod.rs index 74004da5c..1d57f78f3 100644 --- a/src/plot/layer/geom/mod.rs +++ b/src/plot/layer/geom/mod.rs @@ -20,6 +20,7 @@ //! assert!(point.aesthetics().is_required("pos1")); //! ``` +use crate::plot::types::DefaultAestheticValue; use crate::{DataFrame, Mappings, Result}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -193,11 +194,6 @@ pub trait GeomTrait: std::fmt::Debug + std::fmt::Display + Send + Sync { &[] } - /// Check if this geom requires a statistical transformation - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - false - } - /// Whether the Aggregate stat applies to this geom, and which aesthetics /// stay as group keys when it does. /// @@ -224,9 +220,16 @@ pub trait GeomTrait: std::fmt::Debug + std::fmt::Display + Send + Sync { /// Apply statistical transformation to the layer query. /// - /// The default implementation dispatches to the Aggregate stat when - /// `supports_aggregate()` is true and the `aggregate` parameter is set; - /// otherwise returns identity (no transformation). + /// The default implementation: + /// 1. Dispatches to the Aggregate stat when `supports_aggregate()` is + /// true and the `aggregate` parameter is set. + /// 2. For each position axis (`pos1`, `pos2`) declared as `Null` in + /// `aesthetics()`, post-wraps the result with a dummy categorical + /// column when *no* aesthetic in the axis's family (e.g. `pos1`, + /// `pos1min`, `pos1max`, …) is mapped. The writer then suppresses + /// the (otherwise one-tick) axis. Geoms whose bespoke stat already + /// synthesises positions (e.g. `bar`, `boxplot`, `violin`, + /// `histogram`) override `apply_stat_transform` and are unaffected. #[allow(clippy::too_many_arguments)] fn apply_stat_transform( &self, @@ -239,11 +242,11 @@ pub trait GeomTrait: std::fmt::Debug + std::fmt::Display + Send + Sync { dialect: &dyn SqlDialect, aesthetic_ctx: &AestheticContext, ) -> Result { - if let (Some(domain), true) = ( + let mut result = if let (Some(domain), true) = ( self.aggregate_domain_aesthetics(), has_aggregate_param(parameters), ) { - return stat_aggregate::apply( + stat_aggregate::apply( query, schema, aesthetics, @@ -252,9 +255,20 @@ pub trait GeomTrait: std::fmt::Debug + std::fmt::Display + Send + Sync { dialect, aesthetic_ctx, domain, - ); + )? + } else { + StatResult::Identity + }; + + let aes = self.aesthetics(); + for axis in ["pos1", "pos2"] { + let optional = matches!(aes.get(axis), Some(DefaultAestheticValue::Null)); + if optional && !types::axis_family_has_mapping(aesthetics, axis) { + result = types::wrap_stat_with_dummy_axis(query, result, axis); + } } - Ok(StatResult::Identity) + + Ok(result) } /// Post-process the DataFrame after stat query execution. @@ -466,11 +480,6 @@ impl Geom { self.0.stat_consumed_aesthetics() } - /// Check if stat transform is needed - pub fn needs_stat_transform(&self, aesthetics: &Mappings) -> bool { - self.0.needs_stat_transform(aesthetics) - } - /// Apply stat transform #[allow(clippy::too_many_arguments)] pub fn apply_stat_transform( @@ -621,8 +630,9 @@ mod tests { fn test_geom_aesthetics() { let point = Geom::point(); let aes = point.aesthetics(); - assert!(aes.is_required("pos1")); - assert!(aes.is_required("pos2")); + // Both axes are optional - omitted axes become dummy categorical axes. + assert!(!aes.is_required("pos1")); + assert!(!aes.is_required("pos2")); } #[test] diff --git a/src/plot/layer/geom/point.rs b/src/plot/layer/geom/point.rs index f6b454c9e..13cbfabcc 100644 --- a/src/plot/layer/geom/point.rs +++ b/src/plot/layer/geom/point.rs @@ -18,8 +18,13 @@ impl GeomTrait for Point { fn aesthetics(&self) -> DefaultAesthetics { DefaultAesthetics { defaults: &[ - ("pos1", DefaultAestheticValue::Required), - ("pos2", DefaultAestheticValue::Required), + // Both axes are optional. Whichever the user omits is + // synthesised as a dummy categorical column by the default + // `apply_stat_transform`; the writer then hides that axis. + // Mapping neither degrades to all points overlapping at a + // single dummy spot — useful only with `aggregate`. + ("pos1", DefaultAestheticValue::Null), + ("pos2", DefaultAestheticValue::Null), ("size", DefaultAestheticValue::Number(3.0)), ("stroke", DefaultAestheticValue::String("black")), ("fill", DefaultAestheticValue::String("black")), @@ -30,6 +35,19 @@ impl GeomTrait for Point { } } + fn default_remappings(&self) -> DefaultAesthetics { + DefaultAesthetics { + defaults: &[ + ("pos1", DefaultAestheticValue::Column("pos1")), + ("pos2", DefaultAestheticValue::Column("pos2")), + ], + } + } + + fn valid_stat_columns(&self) -> &'static [&'static str] { + &["pos1", "pos2"] + } + fn default_params(&self) -> &'static [ParamDefinition] { const PARAMS: &[ParamDefinition] = &[ ParamDefinition { diff --git a/src/plot/layer/geom/range.rs b/src/plot/layer/geom/range.rs index d547187b6..e1239a8ef 100644 --- a/src/plot/layer/geom/range.rs +++ b/src/plot/layer/geom/range.rs @@ -18,7 +18,10 @@ impl GeomTrait for Range { fn aesthetics(&self) -> DefaultAesthetics { DefaultAesthetics { defaults: &[ - ("pos1", DefaultAestheticValue::Required), + // pos1 is optional - if no aesthetic in the pos1 family is + // mapped, the default `apply_stat_transform` synthesises a + // dummy categorical axis and the writer hides it. + ("pos1", DefaultAestheticValue::Null), ("pos2min", DefaultAestheticValue::Required), ("pos2max", DefaultAestheticValue::Required), ("stroke", DefaultAestheticValue::String("black")), @@ -29,6 +32,16 @@ impl GeomTrait for Range { } } + fn default_remappings(&self) -> DefaultAesthetics { + DefaultAesthetics { + defaults: &[("pos1", DefaultAestheticValue::Column("pos1"))], + } + } + + fn valid_stat_columns(&self) -> &'static [&'static str] { + &["pos1"] + } + fn default_params(&self) -> &'static [ParamDefinition] { const PARAMS: &[ParamDefinition] = &[ ParamDefinition { diff --git a/src/plot/layer/geom/ribbon.rs b/src/plot/layer/geom/ribbon.rs index 5b3ca13a3..47f9bc26d 100644 --- a/src/plot/layer/geom/ribbon.rs +++ b/src/plot/layer/geom/ribbon.rs @@ -47,10 +47,6 @@ impl GeomTrait for Ribbon { Some(&["pos1"]) } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn apply_stat_transform( &self, query: &str, diff --git a/src/plot/layer/geom/smooth.rs b/src/plot/layer/geom/smooth.rs index c523201a4..d032ec099 100644 --- a/src/plot/layer/geom/smooth.rs +++ b/src/plot/layer/geom/smooth.rs @@ -78,10 +78,6 @@ impl GeomTrait for Smooth { PARAMS } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn default_remappings(&self) -> DefaultAesthetics { DefaultAesthetics { defaults: &[ diff --git a/src/plot/layer/geom/spatial.rs b/src/plot/layer/geom/spatial.rs index 3ce1df9a4..97766ce92 100644 --- a/src/plot/layer/geom/spatial.rs +++ b/src/plot/layer/geom/spatial.rs @@ -23,10 +23,6 @@ impl GeomTrait for Spatial { } } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn apply_stat_transform( &self, query: &str, diff --git a/src/plot/layer/geom/tile.rs b/src/plot/layer/geom/tile.rs index b4a639022..b166a6222 100644 --- a/src/plot/layer/geom/tile.rs +++ b/src/plot/layer/geom/tile.rs @@ -97,11 +97,6 @@ impl GeomTrait for Tile { ] } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - // Always apply stat transform to validate and consolidate parameters - true - } - /// Every spatial slot is pinned as a group key — the rectangle's position /// and size *define* the group, they are never the thing being summarised. /// Material aesthetics (fill, stroke, opacity, …) pass through to the diff --git a/src/plot/layer/geom/types.rs b/src/plot/layer/geom/types.rs index 8b390547a..d676cb948 100644 --- a/src/plot/layer/geom/types.rs +++ b/src/plot/layer/geom/types.rs @@ -223,6 +223,95 @@ pub fn wrap_with_order_by(input_query: &str, result: StatResult, aesthetic: &str } } +/// Wrap `query` so it produces a literal categorical column carrying the +/// dummy-axis sentinel value for `axis` (`"pos1"` or `"pos2"`). Used by +/// geoms that should still render sensibly when the user omits a position +/// aesthetic. +/// +/// The wrapped query has shape: +/// ```sql +/// SELECT '__ggsql_stat_dummy' AS "__ggsql_stat_", * +/// FROM () AS "__ggsql_dummy_src__" +/// ``` +/// +/// This composes with any stat: pre-wrap the input (when the geom's stat +/// groups by the dummied axis, e.g. boxplot/violin) so the existing +/// `GROUP BY` collapses to a single group, or post-wrap a stat output +/// (aggregate / identity) so the dummy column is just decoration. +pub fn wrap_with_dummy_axis(query: &str, axis: &str) -> String { + let stat_col = naming::stat_column(axis); + let dummy_v = naming::stat_column("dummy"); + format!( + "SELECT '{val}' AS {col}, * FROM ({q}) AS \"__ggsql_dummy_src__\"", + val = dummy_v, + col = naming::quote_ident(&stat_col), + q = query, + ) +} + +/// Post-wrap a `StatResult` to add a dummy column for `axis` (`"pos1"` or +/// `"pos2"`). +/// +/// Wraps the inner query via [`wrap_with_dummy_axis`] (turning `Identity` +/// into a `Transformed` over the original input) and appends `axis` to +/// both `stat_columns` and `dummy_columns` so `execute/layer.rs` flips +/// `is_dummy: true` on the resulting aesthetic. +pub fn wrap_stat_with_dummy_axis(input_query: &str, result: StatResult, axis: &str) -> StatResult { + match result { + StatResult::Identity => StatResult::Transformed { + query: wrap_with_dummy_axis(input_query, axis), + stat_columns: vec![axis.to_string()], + dummy_columns: vec![axis.to_string()], + consumed_aesthetics: vec![], + }, + StatResult::Transformed { + query, + mut stat_columns, + mut dummy_columns, + consumed_aesthetics, + } => { + let wrapped = wrap_with_dummy_axis(&query, axis); + if !stat_columns.iter().any(|s| s == axis) { + stat_columns.push(axis.to_string()); + } + if !dummy_columns.iter().any(|s| s == axis) { + dummy_columns.push(axis.to_string()); + } + StatResult::Transformed { + query: wrapped, + stat_columns, + dummy_columns, + consumed_aesthetics, + } + } + } +} + +/// Convenience wrapper for the common case of dummying `pos1`. +pub fn wrap_stat_with_dummy_pos1(input_query: &str, result: StatResult) -> StatResult { + wrap_stat_with_dummy_axis(input_query, result, "pos1") +} + +/// Returns true when at least one aesthetic in the same axis family as +/// `axis` (e.g. `pos1`, `pos1min`, `pos1max`, `pos1end`, `pos1offset`) is +/// mapped to a column. +/// +/// Used by the default `apply_stat_transform` to decide whether to fill in +/// a dummy categorical column for an unmapped axis. +pub fn axis_family_has_mapping(aesthetics: &Mappings, axis: &str) -> bool { + use crate::plot::aesthetic::parse_position; + let Some((target_slot, _)) = parse_position(axis) else { + return false; + }; + aesthetics + .aesthetics + .iter() + .any(|(name, value)| match parse_position(name) { + Some((slot, _)) => slot == target_slot && value.column_name().is_some(), + None => false, + }) +} + /// Helper to extract column name from aesthetic value pub fn get_column_name(aesthetics: &Mappings, aesthetic: &str) -> Option { use crate::AestheticValue; @@ -358,6 +447,87 @@ mod tests { } } + #[test] + fn wrap_with_dummy_pos1_produces_expected_sql() { + let wrapped = wrap_with_dummy_axis("SELECT * FROM t", "pos1"); + assert_eq!( + wrapped, + "SELECT '__ggsql_stat_dummy' AS \"__ggsql_stat_pos1\", * FROM (SELECT * FROM t) AS \"__ggsql_dummy_src__\"" + ); + } + + #[test] + fn wrap_stat_with_dummy_pos1_promotes_identity() { + let result = wrap_stat_with_dummy_pos1("SELECT * FROM raw", StatResult::Identity); + match result { + StatResult::Transformed { + query, + stat_columns, + dummy_columns, + consumed_aesthetics, + } => { + assert!(query.contains("__ggsql_stat_dummy")); + assert!(query.contains("__ggsql_stat_pos1")); + assert!(query.contains("SELECT * FROM raw")); + assert_eq!(stat_columns, vec!["pos1".to_string()]); + assert_eq!(dummy_columns, vec!["pos1".to_string()]); + assert!(consumed_aesthetics.is_empty()); + } + _ => panic!("expected Transformed"), + } + } + + #[test] + fn wrap_stat_with_dummy_pos1_extends_transformed_metadata() { + let inner = StatResult::Transformed { + query: "SELECT 1 AS x".to_string(), + stat_columns: vec!["count".to_string()], + dummy_columns: vec![], + consumed_aesthetics: vec!["weight".to_string()], + }; + let result = wrap_stat_with_dummy_pos1("SELECT * FROM raw", inner); + match result { + StatResult::Transformed { + query, + stat_columns, + dummy_columns, + consumed_aesthetics, + } => { + assert!(query.contains("__ggsql_stat_dummy")); + assert!(query.contains("__ggsql_stat_pos1")); + assert!(query.contains("SELECT 1 AS x")); + assert_eq!(stat_columns, vec!["count".to_string(), "pos1".to_string()]); + assert_eq!(dummy_columns, vec!["pos1".to_string()]); + assert_eq!(consumed_aesthetics, vec!["weight".to_string()]); + } + _ => panic!("expected Transformed"), + } + } + + #[test] + fn wrap_stat_with_dummy_pos1_idempotent_on_pos1() { + // Caller already had pos1 in stat_columns/dummy_columns; helper must + // not duplicate. + let inner = StatResult::Transformed { + query: "SELECT 1".to_string(), + stat_columns: vec!["pos1".to_string()], + dummy_columns: vec!["pos1".to_string()], + consumed_aesthetics: vec![], + }; + let result = wrap_stat_with_dummy_pos1("SELECT *", inner); + match result { + StatResult::Transformed { + stat_columns, + dummy_columns, + .. + } => { + assert_eq!(stat_columns, vec!["pos1".to_string()]); + assert_eq!(dummy_columns, vec!["pos1".to_string()]); + } + _ => panic!("expected Transformed"), + } + } + #[test] fn test_color_alias_requires_stroke_or_fill() { // Geom with neither stroke nor fill: color alias should NOT be supported diff --git a/src/plot/layer/geom/violin.rs b/src/plot/layer/geom/violin.rs index 6ee8d95b6..41d3b7644 100644 --- a/src/plot/layer/geom/violin.rs +++ b/src/plot/layer/geom/violin.rs @@ -1,6 +1,6 @@ //! Violin geom implementation -use super::types::POSITION_VALUES; +use super::types::{wrap_with_dummy_axis, POSITION_VALUES}; use super::{DefaultAesthetics, GeomTrait, GeomType, StatResult}; use crate::{ naming, @@ -38,7 +38,10 @@ impl GeomTrait for Violin { fn aesthetics(&self) -> DefaultAesthetics { DefaultAesthetics { defaults: &[ - ("pos1", DefaultAestheticValue::Required), + // pos1 is optional - if omitted, stat_violin synthesises a + // dummy categorical axis so the geom renders a single violin + // of the whole pos2 distribution. + ("pos1", DefaultAestheticValue::Null), ("pos2", DefaultAestheticValue::Required), ("weight", DefaultAestheticValue::Null), ("fill", DefaultAestheticValue::String("black")), @@ -51,10 +54,6 @@ impl GeomTrait for Violin { } } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn default_params(&self) -> &'static [ParamDefinition] { const PARAMS: &[ParamDefinition] = &[ ParamDefinition { @@ -100,6 +99,7 @@ impl GeomTrait for Violin { fn default_remappings(&self) -> DefaultAesthetics { DefaultAesthetics { defaults: &[ + ("pos1", DefaultAestheticValue::Column("pos1")), ("pos2", DefaultAestheticValue::Column("pos2")), ("offset", DefaultAestheticValue::Column("density")), ], @@ -212,28 +212,64 @@ fn stat_violin( )); } + // pos1 is optional. When the user omits it, wrap the source with a + // synthetic dummy categorical column and group by that column so the + // density stat collapses to a single violin spanning the whole dataset. let mut group_by = group_by.to_vec(); - if let Some(x_col) = get_column_name(aesthetics, "pos1") { - // We want to ensure x is included as a grouping - if !group_by.contains(&x_col) { - group_by.push(x_col); + let (working_query, use_dummy) = match get_column_name(aesthetics, "pos1") { + Some(x_col) => { + if !group_by.contains(&x_col) { + group_by.push(x_col); + } + (query.to_string(), false) } - } else { - return Err(GgsqlError::ValidationError( - "Violin requires 'x' aesthetic mapping (categorical)".to_string(), - )); - } + None => { + let dummy_col = naming::stat_column("pos1"); + group_by.push(dummy_col); + (wrap_with_dummy_axis(query, "pos1"), true) + } + }; // Violin uses tails parameter from user (default 3.0 set in default_params) - super::density::stat_density( - query, + let inner = super::density::stat_density( + &working_query, aesthetics, "pos2", None, group_by.as_slice(), parameters, dialect, - ) + )?; + + if !use_dummy { + return Ok(inner); + } + + // Density returned its own Transformed result; tag it with the dummy + // column metadata so execute/layer.rs marks the resulting pos1 aesthetic + // as a dummy and the writer suppresses the axis. + match inner { + StatResult::Identity => unreachable!("stat_density always returns Transformed"), + StatResult::Transformed { + query, + mut stat_columns, + mut dummy_columns, + consumed_aesthetics, + } => { + if !stat_columns.iter().any(|s| s == "pos1") { + stat_columns.push("pos1".to_string()); + } + if !dummy_columns.iter().any(|s| s == "pos1") { + dummy_columns.push("pos1".to_string()); + } + Ok(StatResult::Transformed { + query, + stat_columns, + dummy_columns, + consumed_aesthetics, + }) + } + } } #[cfg(test)] @@ -581,6 +617,55 @@ mod tests { assert!((values[2] - 0.3).abs() < 1e-6, "1.0 should become 0.3"); } + #[test] + fn test_violin_dummy_pos1_when_unmapped() { + // pos2 only - pos1 omitted should produce a single violin via dummy x. + let query = "SELECT flipper_length FROM penguins"; + let mut aesthetics = Mappings::new(); + aesthetics.insert( + "pos2".to_string(), + AestheticValue::standard_column("flipper_length".to_string()), + ); + let groups: Vec = vec![]; + let mut parameters = HashMap::new(); + parameters.insert("bandwidth".to_string(), ParameterValue::Number(5.0)); + parameters.insert( + "kernel".to_string(), + ParameterValue::String("gaussian".to_string()), + ); + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let setup_sql = "CREATE TABLE penguins AS SELECT * FROM (VALUES + (181.0), (186.0), (195.0), (217.0), (221.0), (230.0), (192.0) + ) AS t(flipper_length)"; + reader.execute_sql(setup_sql).unwrap(); + let execute = |sql: &str| reader.execute_sql(sql); + + let result = stat_violin(query, &aesthetics, &groups, ¶meters, &AnsiDialect) + .expect("stat_violin should succeed without pos1"); + + match result { + StatResult::Transformed { + query: stat_query, + stat_columns, + dummy_columns, + .. + } => { + assert!(stat_columns.contains(&"pos1".to_string())); + assert_eq!(dummy_columns, vec!["pos1".to_string()]); + assert!(stat_query.contains("__ggsql_stat_dummy")); + assert!(stat_query.contains("__ggsql_stat_pos1")); + + let df = execute(&stat_query).expect("Generated SQL should execute"); + assert!(df.height() > 0); + let pos1_col = df.column("__ggsql_stat_pos1").unwrap(); + let unique = count_unique_strings(pos1_col); + assert_eq!(unique, 1, "dummy pos1 should collapse to one group"); + } + _ => panic!("Expected Transformed result"), + } + } + #[test] fn test_violin_post_process_no_offset_column() { use crate::df; diff --git a/src/plot/layer/orientation.rs b/src/plot/layer/orientation.rs index 797b06821..cca3cbd9e 100644 --- a/src/plot/layer/orientation.rs +++ b/src/plot/layer/orientation.rs @@ -23,7 +23,7 @@ //! - For two-axis geoms (bar, boxplot): if pos1 is continuous and pos2 is discrete → "transposed" //! - For single-axis geoms (histogram, density): if pos2 has a scale but pos1 doesn't → "transposed" -use super::geom::GeomType; +use super::geom::{Geom, GeomType}; use super::Layer; use crate::plot::aesthetic::{is_position_aesthetic, AestheticContext}; use crate::plot::scale::ScaleTypeKind; @@ -125,7 +125,7 @@ pub fn geom_has_implicit_orientation(geom: &GeomType) -> bool { /// 4. **Default**: Primary fn detect_from_scales( scales: &[Scale], - _geom: &GeomType, + geom: &GeomType, mappings: &Mappings, remappings: &Mappings, ) -> &'static str { @@ -152,12 +152,24 @@ fn detect_from_scales( let has_pos1 = pos1_scale.is_some(); let has_pos2 = pos2_scale.is_some(); - // Rule 1: Single scale present - that axis is primary + // Rule 1: Single scale present - that axis is primary. // Only apply when there are explicit position mappings; otherwise the user // is just customizing a scale (e.g., SCALE y SETTING expand) without intending // to change orientation. The geom's default_remappings will define orientation. + // + // For geoms with optional pos1 *and* a required pos2-side aesthetic + // (boxplot/violin/range): mapping only the value axis means "single + // distribution / interval", not "horizontal orientation". Leave the layer + // aligned and let the stat synthesise the dummy categorical axis. Bar + // (whose pos1 *and* pos2 are both optional) keeps its historical + // behaviour of transposing when only pos2 is mapped. if has_pos1_mapping || has_pos2_mapping { - if has_pos2 && !has_pos1 { + let aes = Geom::from_type(*geom).aesthetics(); + let pos1_optional = !aes.is_required("pos1"); + let pos2_required = + aes.is_required("pos2") || aes.is_required("pos2min") || aes.is_required("pos2max"); + let dummy_axis_geom = pos1_optional && pos2_required; + if has_pos2 && !has_pos1 && (!dummy_axis_geom || has_pos1_mapping) { return TRANSPOSED; } if has_pos1 && !has_pos2 { diff --git a/src/plot/main.rs b/src/plot/main.rs index 07018e0c8..4535c0bbf 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -366,10 +366,11 @@ mod tests { assert!(valid_point.validate_mapping(&None, false).is_ok()); - let invalid_point = Layer::new(Geom::point()) + // Line still requires both pos1 and pos2 - mapping only one fails. + let invalid_line = Layer::new(Geom::line()) .with_aesthetic("pos1".to_string(), AestheticValue::standard_column("x")); - assert!(invalid_point.validate_mapping(&None, false).is_err()); + assert!(invalid_line.validate_mapping(&None, false).is_err()); let valid_ribbon = Layer::new(Geom::ribbon()) .with_aesthetic("pos1".to_string(), AestheticValue::standard_column("x")) @@ -488,7 +489,9 @@ mod tests { assert!(point.is_supported("size")); assert!(point.is_supported("shape")); assert!(!point.is_supported("linetype")); - assert_eq!(point.required(), &["pos1", "pos2"]); + // Both axes are optional - omitted axes become dummy categorical + // axes (strip plot, or single dot when both omitted + aggregate). + assert!(point.required().is_empty()); // Line geom let line = Geom::line().aesthetics(); @@ -526,10 +529,10 @@ mod tests { &["pos1", "pos2", "pos1end", "pos2end"] ); - // Range requires pos1, pos2min, pos2max + // Range requires pos2min, pos2max; pos1 is optional (omit → dummy axis). assert_eq!( Geom::range().aesthetics().required(), - &["pos1", "pos2min", "pos2max"] + &["pos2min", "pos2max"] ); } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 01ecc5481..84a54f2da 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -1344,6 +1344,180 @@ mod tests { ); } + #[test] + fn test_boxplot_dummy_x() { + // Boxplot with only y mapped: should render a single boxplot of the + // whole distribution and suppress the categorical x axis. + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + VISUALISE FROM ggsql:penguins + DRAW boxplot MAPPING bill_len AS y + "#; + + let spec = reader.execute(query).unwrap(); + let writer = VegaLiteWriter::new(); + let result = writer.render(&spec).unwrap(); + + let json: serde_json::Value = serde_json::from_str(&result).unwrap(); + // Boxplot is a composite renderer (multiple sub-layers). Check that + // the first layer's x encoding suppresses its axis. + let layer = data_layer(&json, 0); + let encoding = &layer["encoding"]; + assert!( + encoding["x"]["axis"].is_null(), + "Boxplot dummy x should have axis: null. Encoding: {}", + serde_json::to_string_pretty(encoding).unwrap() + ); + } + + #[test] + fn test_violin_dummy_x() { + // Violin with only y mapped: single violin spanning the whole dataset. + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + VISUALISE FROM ggsql:penguins + DRAW violin MAPPING bill_len AS y + "#; + + let spec = reader.execute(query).unwrap(); + let writer = VegaLiteWriter::new(); + let result = writer.render(&spec).unwrap(); + + let json: serde_json::Value = serde_json::from_str(&result).unwrap(); + let layer = data_layer(&json, 0); + let encoding = &layer["encoding"]; + assert!( + encoding["x"]["axis"].is_null(), + "Violin dummy x should have axis: null. Encoding: {}", + serde_json::to_string_pretty(encoding).unwrap() + ); + } + + #[test] + fn test_point_dummy_x() { + // Point with only y mapped: strip plot at a single dummy x position. + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + VISUALISE FROM ggsql:penguins + DRAW point MAPPING bill_len AS y + "#; + + let spec = reader.execute(query).unwrap(); + let writer = VegaLiteWriter::new(); + let result = writer.render(&spec).unwrap(); + + let json: serde_json::Value = serde_json::from_str(&result).unwrap(); + let layer = data_layer(&json, 0); + let encoding = &layer["encoding"]; + assert!( + encoding["x"]["axis"].is_null(), + "Point dummy x should have axis: null. Encoding: {}", + serde_json::to_string_pretty(encoding).unwrap() + ); + } + + #[test] + fn test_range_dummy_x() { + // Range with only ymin/ymax mapped: a single vertical interval. + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + SELECT 10.0 AS lo, 20.0 AS hi + VISUALISE + DRAW range MAPPING lo AS ymin, hi AS ymax + "#; + + let spec = reader.execute(query).unwrap(); + let writer = VegaLiteWriter::new(); + let result = writer.render(&spec).unwrap(); + + let json: serde_json::Value = serde_json::from_str(&result).unwrap(); + let layer = data_layer(&json, 0); + let encoding = &layer["encoding"]; + assert!( + encoding["x"]["axis"].is_null(), + "Range dummy x should have axis: null. Encoding: {}", + serde_json::to_string_pretty(encoding).unwrap() + ); + } + + #[test] + fn test_point_dummy_y() { + // Symmetric to test_point_dummy_x: only x mapped means dummy y. + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + VISUALISE FROM ggsql:penguins + DRAW point MAPPING bill_len AS x + "#; + + let spec = reader.execute(query).unwrap(); + let writer = VegaLiteWriter::new(); + let result = writer.render(&spec).unwrap(); + + let json: serde_json::Value = serde_json::from_str(&result).unwrap(); + let layer = data_layer(&json, 0); + let encoding = &layer["encoding"]; + assert!( + encoding["y"]["axis"].is_null(), + "Point dummy y should have axis: null. Encoding: {}", + serde_json::to_string_pretty(encoding).unwrap() + ); + } + + #[test] + fn test_point_dummy_both_with_aggregate() { + // Both axes omitted, but aggregate gives the single point meaning: + // a count of all rows at the dummy x/y intersection. + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + VISUALISE FROM ggsql:penguins + DRAW point MAPPING bill_len AS size + SETTING aggregate => 'size:count' + "#; + + let spec = reader.execute(query).unwrap(); + let writer = VegaLiteWriter::new(); + let result = writer.render(&spec).unwrap(); + + let json: serde_json::Value = serde_json::from_str(&result).unwrap(); + let layer = data_layer(&json, 0); + let encoding = &layer["encoding"]; + assert!( + encoding["x"]["axis"].is_null(), + "Both-dummy point should hide x axis. Encoding: {}", + serde_json::to_string_pretty(encoding).unwrap() + ); + assert!( + encoding["y"]["axis"].is_null(), + "Both-dummy point should hide y axis. Encoding: {}", + serde_json::to_string_pretty(encoding).unwrap() + ); + } + + #[test] + fn test_point_dummy_x_with_aggregate() { + // Point with aggregate SETTING and no x mapping: should aggregate the + // whole dataset to a single point and suppress the dummy x axis. + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + VISUALISE FROM ggsql:penguins + DRAW point MAPPING bill_len AS y + SETTING aggregate => 'mean' + "#; + + let spec = reader.execute(query).unwrap(); + let writer = VegaLiteWriter::new(); + let result = writer.render(&spec).unwrap(); + + let json: serde_json::Value = serde_json::from_str(&result).unwrap(); + let layer = data_layer(&json, 0); + let encoding = &layer["encoding"]; + assert!( + encoding["x"]["axis"].is_null(), + "Aggregated point with dummy x should have axis: null. Encoding: {}", + serde_json::to_string_pretty(encoding).unwrap() + ); + } + #[test] fn test_bar_chart_with_expand_setting() { // Test bar chart with SCALE y SETTING expand - should work even when y is stat-derived diff --git a/src/validate.rs b/src/validate.rs index 8c7e715c6..d4e2245bf 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -315,9 +315,9 @@ mod tests { #[test] fn test_validate_missing_required_aesthetic() { - // Point requires x and y, but we only provide x + // Line requires both x and y; mapping only x is invalid. let validated = - validate("SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x").unwrap(); + validate("SELECT 1 as x, 2 as y VISUALISE DRAW line MAPPING x AS x").unwrap(); assert!(!validated.valid()); assert!(!validated.errors().is_empty()); assert!(validated.errors()[0].message.contains("y"));