From 1f6ec7ceb5dd9fd7fc57f9a63ea6d41952038d35 Mon Sep 17 00:00:00 2001 From: Thomas Lin Pedersen Date: Fri, 8 May 2026 10:29:27 +0200 Subject: [PATCH 1/5] add side to jitter and boxplot --- CHANGELOG.md | 10 +- doc/syntax/layer/position/jitter.qmd | 14 + doc/syntax/layer/type/boxplot.qmd | 12 + src/plot/layer/geom/boxplot.rs | 16 +- src/plot/layer/geom/types.rs | 5 + src/plot/layer/geom/violin.rs | 4 +- src/plot/layer/position/jitter.rs | 273 +++++++++++++++++++- src/writer/vegalite/layer.rs | 365 ++++++++++++++++++++++++--- 8 files changed, 648 insertions(+), 51 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a52e9651..4252045a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,9 +5,17 @@ - New `aggregate` SETTING on Identity-stat layers (point, line, area, bar, ribbon, range, segment, arrow, rule, text). By default it collapses each group to a single row by replacing every numeric mapping in place with its aggregated - value. See the `DRAW` documentation for details. + value. See the `DRAW` documentation for details (#384). - Added panel decorations (grid lines, axes, background) for polar coordinates (#156). - Added `radar` setting to polar coordinates for making radar plots (#418). +- New `side` SETTING on the `boxplot` layer and the `jitter` position, mirroring + the existing `violin` setting (#337). + +### Fixed + +- Dodging of horizontal violin plots were broken due to a bad orientation + assumption in the VegaLite writer. We now correctly use the orientation to + dodge in the correct dimension. ## 0.3.2 - 2026-05-05 diff --git a/doc/syntax/layer/position/jitter.qmd b/doc/syntax/layer/position/jitter.qmd index 9ff949eda..57f129b43 100644 --- a/doc/syntax/layer/position/jitter.qmd +++ b/doc/syntax/layer/position/jitter.qmd @@ -23,6 +23,12 @@ Apart from the settings of the layer type, setting `position => 'jitter'` will a If `distribution` is either `'density'` or `'intensity'` then one of the axes must be continuous * `bandwidth`: Smoothing bandwidth for the `'density'` and `'intensity'` distributions (must be > 0). If absent (default), the bandwidth will be computed using Silverman's rule of thumb. * `adjust`: Multiplier for the `bandwidth` setting (must be > 0). Defaults to 1. +* `side`: Constrains the jitter to one side of the original position by folding the sample into half of the width. Dodge centers and per-group widths are computed from the full `width`, so a one-sided jitter sits inside half of the same allocated band that a two-sided jitter would fill — pairing cleanly with a half-violin or half-boxplot on the other side. One of: + * `'both'` (default) jitters in both directions equally. + * `'left'` or `'bottom'` jitters only toward negative offsets. + * `'right'` or `'top'` jitters only toward positive offsets. + + When both axes are jittered, `side` applies independently to each axis (e.g. `'right'` produces non-negative offsets on both axes). ## Examples When plotting points on a discrete axis they are all placed in the middle @@ -65,3 +71,11 @@ DRAW point SCALE BINNED fill SETTING breaks => 4, pretty => false ``` + +Pair a half-violin with one-sided jittered points by setting opposite `side` values: + +```{ggsql} +VISUALISE species AS x, bill_dep AS y FROM ggsql:penguins +DRAW violin SETTING side => 'left' +DRAW point SETTING position => 'jitter', side => 'right', width => 0.4 +``` diff --git a/doc/syntax/layer/type/boxplot.qmd b/doc/syntax/layer/type/boxplot.qmd index 39ecc33f1..601f77663 100644 --- a/doc/syntax/layer/type/boxplot.qmd +++ b/doc/syntax/layer/type/boxplot.qmd @@ -27,6 +27,10 @@ The following aesthetics are recognised by the boxplot layer. * `outliers`: Whether to display outliers as points. Defaults to `true`. * `coef`: Length of the whiskers as a multiple of the IQR (must be >= 0). Defaults to `1.5`. * `width`: Relative width of the boxes (0 to 1). Defaults to `0.9`. +* `side`: Determines the sides of the centerline where the box is displayed. Only the box and median tick shift to the chosen side; whiskers and outliers remain on the centerline. The full `width` is preserved for dodge calculations, so a half-box pairs cleanly with a half-violin or one-sided jitter on the same band. One of: + * `'both'` (default) displays a complete box on both sides of the centerline. + * `'left'` or `'bottom'` displays only the half-box on the left side or bottom side. + * `'right'` or `'top'` displays only the half-box on the right side or top side. ## Data transformation Per group, data will be divided into 4 quartiles and summary statistics will be derived from their extremes. @@ -91,3 +95,11 @@ VISUALISE FROM ggsql:penguins DRAW boxplot MAPPING species AS y, bill_len AS x ``` + +Pair a half-violin with a half-boxplot on the same category by setting opposite `side` values: + +```{ggsql} +VISUALISE bill_len AS x, species AS y FROM ggsql:penguins +DRAW violin SETTING side => 'top' +DRAW boxplot SETTING side => 'bottom', width => 0.3 +``` diff --git a/src/plot/layer/geom/boxplot.rs b/src/plot/layer/geom/boxplot.rs index 5d99b358c..1d8b832f2 100644 --- a/src/plot/layer/geom/boxplot.rs +++ b/src/plot/layer/geom/boxplot.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; -use super::types::POSITION_VALUES; +use super::types::{POSITION_VALUES, SIDE_VALUES}; use super::{DefaultAesthetics, GeomTrait, GeomType}; use crate::{ naming, @@ -72,6 +72,11 @@ impl GeomTrait for Boxplot { default: DefaultParamValue::String("dodge"), constraint: ParamConstraint::string_option(POSITION_VALUES), }, + ParamDefinition { + name: "side", + default: DefaultParamValue::String("both"), + constraint: ParamConstraint::string_option(SIDE_VALUES), + }, ]; PARAMS } @@ -539,7 +544,7 @@ mod tests { let boxplot = Boxplot; let params = boxplot.default_params(); - assert_eq!(params.len(), 4); + assert_eq!(params.len(), 5); // Find and verify outliers param let outliers_param = params.iter().find(|p| p.name == "outliers").unwrap(); @@ -566,6 +571,13 @@ mod tests { position_param.default, DefaultParamValue::String("dodge") )); + + // Find and verify side param (defaults to both) + let side_param = params.iter().find(|p| p.name == "side").unwrap(); + assert!(matches!( + side_param.default, + DefaultParamValue::String("both") + )); } #[test] diff --git a/src/plot/layer/geom/types.rs b/src/plot/layer/geom/types.rs index 8b390547a..f0ff1591e 100644 --- a/src/plot/layer/geom/types.rs +++ b/src/plot/layer/geom/types.rs @@ -18,6 +18,11 @@ pub const POSITION_VALUES: &[&str] = &["identity", "stack", "dodge", "jitter"]; /// Closed interval side values for binned data pub const CLOSED_VALUES: &[&str] = &["left", "right"]; +/// Standard `side` parameter values for layers and positions that can render +/// either both halves of a symmetric shape (or jitter range) or just one of +/// them. Used by violin, boxplot, and jitter. +pub const SIDE_VALUES: &[&str] = &["both", "left", "top", "right", "bottom"]; + /// Aesthetic aliases: user-facing names that resolve to concrete aesthetics. /// /// An alias is considered supported if any of its target aesthetics are supported diff --git a/src/plot/layer/geom/violin.rs b/src/plot/layer/geom/violin.rs index 6ee8d95b6..4e9cb20a3 100644 --- a/src/plot/layer/geom/violin.rs +++ b/src/plot/layer/geom/violin.rs @@ -1,6 +1,6 @@ //! Violin geom implementation -use super::types::POSITION_VALUES; +use super::types::{POSITION_VALUES, SIDE_VALUES}; use super::{DefaultAesthetics, GeomTrait, GeomType, StatResult}; use crate::{ naming, @@ -24,8 +24,6 @@ const KERNEL_VALUES: &[&str] = &[ "cosine", ]; -const SIDE_VALUES: &[&str] = &["both", "left", "top", "right", "bottom"]; - /// Violin geom - violin plots (mirrored density) #[derive(Debug, Clone, Copy)] pub struct Violin; diff --git a/src/plot/layer/position/jitter.rs b/src/plot/layer/position/jitter.rs index 27cf4f3fa..b386aaf15 100644 --- a/src/plot/layer/position/jitter.rs +++ b/src/plot/layer/position/jitter.rs @@ -19,6 +19,7 @@ use super::{ Layer, PositionTrait, PositionType, }; use crate::array_util::{as_f64, cast_array, new_f64_array_non_null}; +use crate::plot::layer::geom::types::SIDE_VALUES; use crate::plot::types::{DefaultParamValue, ParamConstraint, ParamDefinition, ParameterValue}; use crate::{naming, DataFrame, GgsqlError, Plot, Result}; use arrow::array::Array; @@ -28,6 +29,40 @@ use rand::Rng; /// Valid distribution types for jitter position const DISTRIBUTION_VALUES: &[&str] = &["uniform", "normal", "density", "intensity"]; +/// Which side(s) of the original position the jitter offset is allowed to occupy. +/// +/// `Both` is the default: the sampled offset is used as drawn, ranging across +/// `[-width/2, width/2]`. `Positive` and `Negative` fold the sample so it lies +/// in `[0, width/2]` or `[-width/2, 0]` respectively, leaving the other half +/// of the band empty for a complementary half-shape (half-violin, half-box, +/// or oppositely-sided jitter) to occupy. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum JitterSide { + Both, + Positive, + Negative, +} + +impl JitterSide { + fn from_str(s: &str) -> Self { + match s { + "right" | "top" => Self::Positive, + "left" | "bottom" => Self::Negative, + _ => Self::Both, + } + } + + /// Fold a raw sample (which the underlying distribution drew symmetrically + /// around 0) into the half-range corresponding to this side. + fn fold(self, raw: f64) -> f64 { + match self { + Self::Both => raw, + Self::Positive => raw.abs(), + Self::Negative => -raw.abs(), + } + } +} + /// Jitter distribution type #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum JitterDistribution { @@ -298,6 +333,11 @@ impl PositionTrait for Jitter { default: DefaultParamValue::Number(1.0), constraint: ParamConstraint::number_min_exclusive(0.0), }, + ParamDefinition { + name: "side", + default: DefaultParamValue::String("both"), + constraint: ParamConstraint::string_option(SIDE_VALUES), + }, ]; PARAMS } @@ -474,6 +514,22 @@ fn apply_jitter(df: DataFrame, layer: &Layer, spec: &Plot) -> Result }) .unwrap_or(JitterDistribution::Uniform); + // Get side parameter (default "both"). When set to one side, the sampled + // offset is folded into that half of the range so the jitter occupies only + // one side of the original position. The full `width` is still used to + // compute dodge centers and per-group `adjusted_width`, so a half-jitter + // sits inside half of the same allocated band that a `side: 'both'` jitter + // would fill — this is what allows it to compose cleanly with a + // half-violin or half-boxplot on the other side. + let side = layer + .parameters + .get("side") + .and_then(|v| match v { + ParameterValue::String(s) => Some(JitterSide::from_str(s.as_str())), + _ => None, + }) + .unwrap_or(JitterSide::Both); + // Density/intensity distribution validation: requires exactly one continuous axis // (one discrete axis to jitter along, one continuous axis for density) let pos1_continuous = !jitter_pos1; @@ -560,16 +616,17 @@ fn apply_jitter(df: DataFrame, layer: &Layer, spec: &Plot) -> Result None }; - // Helper to generate jitter with optional density scaling + // Helper to generate jitter with optional density scaling and side folding let make_jitter = |rng: &mut rand::rngs::ThreadRng, jitter_width: f64, count: usize| -> Vec { (0..count) .map(|i| { - let jitter = distribution.sample(rng, jitter_width); + let raw = distribution.sample(rng, jitter_width); + let folded = side.fold(raw); if let Some(ref scales) = density_scales { - jitter * scales[i] + folded * scales[i] } else { - jitter + folded } }) .collect() @@ -978,7 +1035,7 @@ mod tests { fn test_jitter_default_params() { let jitter = Jitter; let params = jitter.default_params(); - assert_eq!(params.len(), 5); + assert_eq!(params.len(), 6); assert_eq!(params[0].name, "width"); assert!(matches!(params[0].default, DefaultParamValue::Number(0.9))); assert_eq!(params[1].name, "dodge"); @@ -996,6 +1053,11 @@ mod tests { assert!(matches!(params[3].default, DefaultParamValue::Null)); assert_eq!(params[4].name, "adjust"); assert!(matches!(params[4].default, DefaultParamValue::Number(1.0))); + assert_eq!(params[5].name, "side"); + assert!(matches!( + params[5].default, + DefaultParamValue::String("both") + )); } #[test] @@ -1571,4 +1633,205 @@ mod tests { let q75 = super::quantile_cont(&sorted, 0.75); assert!((q75 - 4.0).abs() < 1e-10); } + + #[test] + fn test_jitter_side_value_mapping() { + // The sign assignment is symmetric across both axes: "right" / "top" + // → positive, "left" / "bottom" → negative, anything else → both. + assert_eq!(JitterSide::from_str("right"), JitterSide::Positive); + assert_eq!(JitterSide::from_str("top"), JitterSide::Positive); + assert_eq!(JitterSide::from_str("left"), JitterSide::Negative); + assert_eq!(JitterSide::from_str("bottom"), JitterSide::Negative); + assert_eq!(JitterSide::from_str("both"), JitterSide::Both); + assert_eq!(JitterSide::from_str("anything-else"), JitterSide::Both); + + assert_eq!(JitterSide::Positive.fold(0.3), 0.3); + assert_eq!(JitterSide::Positive.fold(-0.3), 0.3); + assert_eq!(JitterSide::Negative.fold(0.3), -0.3); + assert_eq!(JitterSide::Negative.fold(-0.3), -0.3); + assert_eq!(JitterSide::Both.fold(0.3), 0.3); + assert_eq!(JitterSide::Both.fold(-0.3), -0.3); + } + + #[test] + fn test_jitter_side_pos1_one_sided_no_dodge() { + // dodge=false so the pos1offset is exclusively the jitter sample — + // we can directly assert the sign matches `side`. + let jitter = Jitter; + let mut spec = Plot::new(); + spec.scales.push(make_discrete_scale("pos1")); + spec.scales.push(make_continuous_scale("pos2")); + + for (side, expect_positive) in [ + ("right", true), + ("top", true), + ("left", false), + ("bottom", false), + ] { + let mut layer = make_test_layer(); + layer.partition_by = vec![]; // disable groups so dodge is a no-op + layer + .parameters + .insert("dodge".to_string(), ParameterValue::Boolean(false)); + layer + .parameters + .insert("side".to_string(), ParameterValue::String(side.to_string())); + + let (result, _) = jitter + .apply_adjustment(make_test_df(), &layer, &spec) + .unwrap(); + + let offset_col = result.column("__ggsql_aes_pos1offset__").unwrap(); + let offsets = as_f64(offset_col).unwrap(); + for i in 0..offsets.len() { + let v = offsets.value(i); + if expect_positive { + assert!( + v >= 0.0, + "side={} should produce non-negative pos1offset, got {}", + side, + v + ); + } else { + assert!( + v <= 0.0, + "side={} should produce non-positive pos1offset, got {}", + side, + v + ); + } + assert!( + v.abs() <= 0.45 + 1e-9, + "magnitude should stay within width/2" + ); + } + } + } + + #[test] + fn test_jitter_side_pos2_one_sided_no_dodge() { + // Jitter pos2 (vertical axis) with side. Same per-axis mapping. + let jitter = Jitter; + let mut spec = Plot::new(); + spec.scales.push(make_continuous_scale("pos1")); + spec.scales.push(make_discrete_scale("pos2")); + + for (side, expect_positive) in [ + ("top", true), + ("right", true), + ("bottom", false), + ("left", false), + ] { + let mut layer = make_test_layer(); + layer.partition_by = vec![]; + layer + .parameters + .insert("dodge".to_string(), ParameterValue::Boolean(false)); + layer + .parameters + .insert("side".to_string(), ParameterValue::String(side.to_string())); + + let (result, _) = jitter + .apply_adjustment(make_test_df(), &layer, &spec) + .unwrap(); + + let offset_col = result.column("__ggsql_aes_pos2offset__").unwrap(); + let offsets = as_f64(offset_col).unwrap(); + for i in 0..offsets.len() { + let v = offsets.value(i); + if expect_positive { + assert!(v >= 0.0, "side={} pos2offset should be ≥0, got {}", side, v); + } else { + assert!(v <= 0.0, "side={} pos2offset should be ≤0, got {}", side, v); + } + } + } + } + + #[test] + fn test_jitter_side_with_dodge_keeps_full_band_width() { + // Load-bearing test for the dodge-composition invariant: a half-jitter + // must occupy half of the per-group dodge band, with the other half + // empty for a complementary half-shape (half-violin or half-box) to + // fill. Specifically: the total range remains [-0.45, +0.45] (width + // 0.9 / 2 groups) — same as `side: 'both'` — but each row's offset + // sits on the same side of its group's centerline. + let jitter = Jitter; + let df = make_test_df(); + let mut spec = Plot::new(); + spec.scales.push(make_discrete_scale("pos1")); + spec.scales.push(make_continuous_scale("pos2")); + + let mut layer = make_test_layer(); // partition_by = ["fill"] => 2 dodge groups + layer.parameters.insert( + "side".to_string(), + ParameterValue::String("right".to_string()), + ); + + let (result, _) = jitter.apply_adjustment(df, &layer, &spec).unwrap(); + let offsets = as_f64(result.column("__ggsql_aes_pos1offset__").unwrap()).unwrap(); + let fill = as_str(result.column("__ggsql_aes_fill__").unwrap()).unwrap(); + + // Group X dodge center is at -0.225, group Y at +0.225. With side=right + // the jitter is folded to non-negative, so each row sits at center + + // [0, 0.225]. + for i in 0..offsets.len() { + let v = offsets.value(i); + let group = fill.value(i); + let center = if group == "X" { -0.225 } else { 0.225 }; + assert!( + (v - center) >= -1e-9, + "row {i} group {group}: offset {v} should be ≥ center {center}" + ); + assert!( + (v - center) <= 0.225 + 1e-9, + "row {i} group {group}: offset {v} should be ≤ center + width/2" + ); + } + + // And the overall range still spans the full 0.9 width across groups. + let max_offset = (0..offsets.len()) + .map(|i| offsets.value(i)) + .fold(f64::MIN, f64::max); + assert!( + max_offset <= 0.45 + 1e-9, + "max offset {} should not exceed full-width upper bound 0.45", + max_offset + ); + } + + #[test] + fn test_jitter_side_normal_distribution() { + // Folding works for the normal distribution too — it produces a + // half-normal sample with the same σ. + let jitter = Jitter; + let mut spec = Plot::new(); + spec.scales.push(make_discrete_scale("pos1")); + spec.scales.push(make_continuous_scale("pos2")); + + let mut layer = make_test_layer(); + layer.partition_by = vec![]; + layer + .parameters + .insert("dodge".to_string(), ParameterValue::Boolean(false)); + layer.parameters.insert( + "distribution".to_string(), + ParameterValue::String("normal".to_string()), + ); + layer.parameters.insert( + "side".to_string(), + ParameterValue::String("right".to_string()), + ); + + let (result, _) = jitter + .apply_adjustment(make_test_df(), &layer, &spec) + .unwrap(); + let offsets = as_f64(result.column("__ggsql_aes_pos1offset__").unwrap()).unwrap(); + for i in 0..offsets.len() { + assert!( + offsets.value(i) >= 0.0, + "normal+side=right should yield non-negative offsets" + ); + } + } } diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index dce96d292..68f8c61dd 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -55,6 +55,19 @@ pub fn geom_to_mark(geom: &Geom) -> Value { }) } +/// Map a `side` value to a positive/negative sign in the orientation-aware way +/// shared by violin, boxplot, and (effectively) jitter rendering. Returns true +/// if `side` falls on the positive offset half (right/top of a vertical layer, +/// bottom/left of a horizontal layer). Caller is responsible for handling +/// `"both"` separately. +fn side_is_positive(side: &str, is_horizontal: bool) -> bool { + if is_horizontal { + matches!(side, "bottom" | "left") + } else { + matches!(side, "top" | "right") + } +} + /// Validate column references for a single layer against its specific DataFrame pub fn validate_layer_columns( layer: &Layer, @@ -1551,12 +1564,7 @@ impl GeomRenderer for ViolinRenderer { // It'll be implemented as an offset. let violin_offset = match layer.parameters.get("side") { Some(ParameterValue::String(side)) if side != "both" => { - let positive = if is_horizontal { - matches!(side.as_str(), "bottom" | "left") - } else { - matches!(side.as_str(), "top" | "right") - }; - if positive { + if side_is_positive(side, is_horizontal) { format!("[datum.{offset}]", offset = offset_col) } else { format!("[-datum.{offset}]", offset = offset_col) @@ -1589,8 +1597,16 @@ impl GeomRenderer for ViolinRenderer { .cloned() .unwrap_or_default(); - // Check if pos1offset exists (from dodging) - we'll combine it with violin offset - let pos1offset_col = naming::aesthetic_column("pos1offset"); + // Combine the violin offset with the dodge offset (if any). Dodge + // stores its per-row offset in the *categorical* axis offset column, + // which is pos1offset in vertical orientation and pos2offset in + // horizontal orientation (where the categorical axis has been + // flipped to pos2 by orientation resolution). + let dodge_offset_col = if is_horizontal { + naming::aesthetic_column("pos2offset") + } else { + naming::aesthetic_column("pos1offset") + }; let mut transforms = existing_transforms; transforms.extend(vec![ @@ -1604,11 +1620,11 @@ impl GeomRenderer for ViolinRenderer { "as": ["__violin_offset"] }), json!({ - // Add pos1offset (dodge displacement) if it exists, otherwise use violin offset directly - // This positions the violin correctly when dodging + // Add the dodge displacement (if any) so the violin is + // positioned correctly within its dodged group. "calculate": format!( - "datum.{pos1offset} != null ? datum.__violin_offset + datum.{pos1offset} : datum.__violin_offset", - pos1offset = pos1offset_col + "datum.{dodge} != null ? datum.__violin_offset + datum.{dodge} : datum.__violin_offset", + dodge = dodge_offset_col ), "as": "__final_offset" }), @@ -1924,7 +1940,7 @@ impl BoxplotRenderer { GgsqlError::WriterError("Boxplot requires 'y' aesthetic mapping".to_string()) })?; - let (pos1, pos1_end, _, pos2, pos2_end, _) = &context.channels; + let (pos1, pos1_end, pos1_offset, pos2, pos2_end, pos2_offset) = &context.channels; let value_var1 = if is_horizontal { pos1 } else { pos2 }; let value_var2 = if is_horizontal { pos1_end } else { pos2_end }; let axis = if is_horizontal { pos2 } else { pos1 }; @@ -2037,46 +2053,115 @@ impl BoxplotRenderer { upper_whiskers["encoding"][value_var1] = y_encoding.clone(); upper_whiskers["encoding"][value_var2] = y2_encoding.clone(); + // Resolve the `side` parameter. When `side != "both"`, the box and + // median tick render at half their normal size and anchor to the + // categorical centerline so they only occupy one side of each band. + // Whiskers and outliers stay on the centerline. The dodge-aware + // `width` value is unchanged so a half-box still occupies half of + // its dodge group's allocated space — this is what lets a half-box + // pair with a half-violin or one-sided jitter on the same band. + let side = layer + .parameters + .get("side") + .and_then(|v| match v { + ParameterValue::String(s) => Some(s.as_str()), + _ => None, + }) + .unwrap_or("both"); + let half_side = side != "both"; + let side_positive = half_side && side_is_positive(side, is_horizontal); + + // For `side != "both"`, halve the bar width and shift the bar to + // one side of the band. We reuse the same mechanism dodge already + // uses to shift bars within a band: an `xOffset` (or `yOffset`) + // encoding pointing at a numeric column whose value is interpreted + // as a band-fraction shift via a scale with domain `[-0.5, 0.5]`. + // + // The box/median needs to be shifted by `±w/4` of the bandwidth so + // a half-width bar (`bandwidth*w/2`) sits centred on the middle of + // its chosen half-band. When dodge is also active there is already + // a `pos1offset` (or `pos2offset`) column carrying the dodge shift + // — we add the side shift to that and point the box/median at the + // combined column. Whiskers and outliers continue to use the + // original pos1offset (or none), so they stay centred on the + // dodge centerline as required. + let box_size = if half_side { + json!({"expr": format!("bandwidth('{}') * {} / 2", axis, width)}) + } else { + width_value.clone() + }; + let side_shift: f64 = if side_positive { + width / 4.0 + } else { + -width / 4.0 + }; + let size_key = if is_horizontal { "height" } else { "width" }; + + // Pick the categorical-axis offset channel and column based on + // orientation. The categorical axis is pos1 (vertical) or pos2 + // (horizontal); the offset channel is pos1_offset / pos2_offset. + let (offset_channel, base_offset_col) = if is_horizontal { + (pos2_offset.as_str(), naming::aesthetic_column("pos2offset")) + } else { + (pos1_offset.as_str(), naming::aesthetic_column("pos1offset")) + }; + let combined_offset_col = format!("__ggsql_box_side_offset__"); + + // Helper that, when half_side is true, adds a calculate transform + // to combine the dodge offset (if any) with the side shift, then + // points the layer's offset encoding at the combined column. + let apply_side_shift = |layer_spec: &mut Value| { + if !half_side { + return; + } + let calc_expr = format!( + "(datum[\"{base}\"] != null ? datum[\"{base}\"] : 0) + {shift}", + base = base_offset_col, + shift = side_shift, + ); + let existing = layer_spec + .get("transform") + .and_then(|t| t.as_array()) + .cloned() + .unwrap_or_default(); + let mut transforms = existing; + transforms.push(json!({ + "calculate": calc_expr, + "as": combined_offset_col, + })); + layer_spec["transform"] = json!(transforms); + + layer_spec["encoding"][offset_channel] = json!({ + "field": combined_offset_col, + "type": "quantitative", + "scale": {"domain": [-0.5, 0.5]}, + }); + }; + // Box (bar from y to y2, where y=q1 and y2=q3) let mut box_part = create_layer( &summary_prototype, "box", - if is_horizontal { - json!({ - "type": "bar", - "height": width_value, - "baseline": "middle" - }) - } else { - json!({ - "type": "bar", - "width": width_value, - "align": "center" - }) - }, + json!({ + "type": "bar", + size_key: box_size, + }), ); box_part["encoding"][value_var1] = y_encoding.clone(); box_part["encoding"][value_var2] = y2_encoding.clone(); + apply_side_shift(&mut box_part); - // Median line (tick at y, where y=median) + // Median line (tick at y, where y=median) — same width and shift as the box let mut median_line = create_layer( &summary_prototype, "median", - if is_horizontal { - json!({ - "type": "tick", - "height": width_value, - "baseline": "middle" - }) - } else { - json!({ - "type": "tick", - "width": width_value, - "align": "center" - }) - }, + json!({ + "type": "tick", + size_key: box_size, + }), ); median_line["encoding"][value_var1] = y_encoding; + apply_side_shift(&mut median_line); layers.push(lower_whiskers); layers.push(upper_whiskers); @@ -3757,6 +3842,206 @@ mod tests { ); } + #[test] + fn test_boxplot_side_parameter() { + use crate::plot::ParameterValue; + use crate::AestheticValue; + + // Render the boxplot layers with the given `side` and orientation, + // then return the box, median, lower-whisker, upper-whisker, and + // outlier mark specs as a tuple for assertion. + fn render_marks( + side: Option<&str>, + is_horizontal: bool, + ) -> (Value, Value, Value, Value, Value) { + let mut layer = Layer::new(crate::plot::Geom::boxplot()); + layer + .mappings + .insert("pos1", AestheticValue::standard_column("species")); + layer + .mappings + .insert("pos2", AestheticValue::standard_column("bill_len")); + if let Some(s) = side { + layer + .parameters + .insert("side".to_string(), ParameterValue::String(s.to_string())); + } + if is_horizontal { + layer.parameters.insert( + "orientation".to_string(), + ParameterValue::String("transposed".to_string()), + ); + } + + // Build a minimal prototype with encoding fields the renderer reads. + let prototype = if is_horizontal { + json!({ + "mark": {"type": "point"}, + "encoding": { + "x": {"field": naming::aesthetic_column("pos1"), "type": "quantitative"}, + "y": {"field": "species", "type": "nominal"}, + }, + }) + } else { + json!({ + "mark": {"type": "point"}, + "encoding": { + "x": {"field": "species", "type": "nominal"}, + "y": {"field": naming::aesthetic_column("pos2"), "type": "quantitative"}, + }, + }) + }; + + let layers = BoxplotRenderer + .render_layers( + prototype, + &layer, + "__ggsql_layer_0__", + true, // include outliers + &RenderContext::default_for_test(), + ) + .unwrap(); + + // Order set by render_layers: outlier (if has_outliers), lower_whisker, + // upper_whisker, box, median. + ( + layers[3].clone(), // box + layers[4].clone(), // median + layers[1].clone(), // lower_whisker + layers[2].clone(), // upper_whisker + layers[0].clone(), // outlier + ) + } + + // Helper: extract the band-fraction shift from the offset encoding's + // calculate transform. Returns 0.0 if no side shift transform is present. + fn extract_side_shift(layer_spec: &Value) -> f64 { + let transforms = match layer_spec.get("transform").and_then(|t| t.as_array()) { + Some(t) => t, + None => return 0.0, + }; + for t in transforms { + if let Some(as_field) = t.get("as").and_then(|s| s.as_str()) { + if as_field == "__ggsql_box_side_offset__" { + let calc = t.get("calculate").and_then(|c| c.as_str()).unwrap_or(""); + let after = match calc.split(") + ").nth(1) { + Some(s) => s, + None => return 0.0, + }; + return after.parse::().unwrap_or(0.0); + } + } + } + 0.0 + } + + // Default width=0.9 → side shift magnitude = 0.225 (= w/4). + let shift_mag = 0.225; + + // Default ("both") in vertical orientation: full width, no side shift. + let (box_v, _median_v, low_v, up_v, out_v) = render_marks(None, false); + assert_eq!(extract_side_shift(&box_v), 0.0); + // Box width should be the full-width expression (not halved). + let box_full_width = box_v["mark"]["width"].clone(); + assert!( + box_full_width + .get("expr") + .and_then(|e| e.as_str()) + .map(|s| !s.contains("/ 2")) + .unwrap_or(false), + "default box width should be full bandwidth*w, got {}", + box_full_width + ); + + // Vertical "right" / "top" → positive shift (box on right half of band). + for s in ["right", "top"] { + let (b, m, low, up, out) = render_marks(Some(s), false); + let shift = extract_side_shift(&b); + assert!( + (shift - shift_mag).abs() < 1e-9, + "side={s}: expected shift +{shift_mag}, got {shift}" + ); + assert!( + (extract_side_shift(&m) - shift_mag).abs() < 1e-9, + "side={s} median shift mismatch" + ); + // Box width is halved. + let bw = b["mark"]["width"]["expr"].as_str().unwrap_or(""); + assert!(bw.contains("/ 2"), "side={s} expected halved width, got {bw}"); + // xOffset encoding points at the combined column. + assert_eq!( + b["encoding"]["xOffset"]["field"], + json!("__ggsql_box_side_offset__"), + "side={s}" + ); + // Whiskers and outliers unchanged. + assert_eq!(low["mark"], low_v["mark"], "lower whisker side={s}"); + assert_eq!(up["mark"], up_v["mark"], "upper whisker side={s}"); + assert_eq!(out["mark"], out_v["mark"], "outlier side={s}"); + assert!( + low.get("transform") + .and_then(|t| t.as_array()) + .map(|arr| arr.iter().all(|tr| tr + .get("as") + .and_then(|s| s.as_str()) + != Some("__ggsql_box_side_offset__"))) + .unwrap_or(true), + "side={s} lower whisker should not have side-shift transform" + ); + } + + // Vertical "left" / "bottom" → negative shift (box on left half of band). + for s in ["left", "bottom"] { + let (b, m, _, _, _) = render_marks(Some(s), false); + let shift = extract_side_shift(&b); + assert!( + (shift + shift_mag).abs() < 1e-9, + "side={s}: expected shift -{shift_mag}, got {shift}" + ); + assert!( + (extract_side_shift(&m) + shift_mag).abs() < 1e-9, + "side={s} median shift mismatch" + ); + } + + // Horizontal "both": full height, no shift. + let (box_h, _, _, _, _) = render_marks(None, true); + assert_eq!(extract_side_shift(&box_h), 0.0); + + // Horizontal "bottom" / "left" → positive (per violin convention, + // mapped to positive yOffset which renders below the centerline). + for s in ["bottom", "left"] { + let (b, m, _, _, _) = render_marks(Some(s), true); + assert!( + (extract_side_shift(&b) - shift_mag).abs() < 1e-9, + "side={s}: expected +{shift_mag}" + ); + assert!( + (extract_side_shift(&m) - shift_mag).abs() < 1e-9, + "side={s} median" + ); + // Horizontal uses yOffset, not xOffset. + assert_eq!( + b["encoding"]["yOffset"]["field"], + json!("__ggsql_box_side_offset__"), + "side={s}" + ); + } + + // Horizontal "top" / "right" → negative. + for s in ["top", "right"] { + let (b, m, _, _, _) = render_marks(Some(s), true); + assert!( + (extract_side_shift(&b) + shift_mag).abs() < 1e-9, + "side={s}: expected -{shift_mag}" + ); + assert!( + (extract_side_shift(&m) + shift_mag).abs() < 1e-9, + "side={s} median" + ); + } + } + #[test] fn test_render_context_get_extent() { use crate::plot::{ArrayElement, Scale}; From 3f82649b23d4e5ca14477920ebc9dd27407cfb92 Mon Sep 17 00:00:00 2001 From: Thomas Lin Pedersen Date: Fri, 8 May 2026 10:31:22 +0200 Subject: [PATCH 2/5] update link to PR --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4252045a2..149aca60e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,13 +9,13 @@ - Added panel decorations (grid lines, axes, background) for polar coordinates (#156). - Added `radar` setting to polar coordinates for making radar plots (#418). - New `side` SETTING on the `boxplot` layer and the `jitter` position, mirroring - the existing `violin` setting (#337). + the existing `violin` setting (#439). ### Fixed - Dodging of horizontal violin plots were broken due to a bad orientation assumption in the VegaLite writer. We now correctly use the orientation to - dodge in the correct dimension. + dodge in the correct dimension (#439). ## 0.3.2 - 2026-05-05 From ca619d4c8a0b94847a4dbcb00eefef93378a4846 Mon Sep 17 00:00:00 2001 From: Thomas Lin Pedersen Date: Fri, 8 May 2026 10:31:35 +0200 Subject: [PATCH 3/5] reformat --- src/writer/vegalite/layer.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 68f8c61dd..dd2f7837a 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -3967,7 +3967,10 @@ mod tests { ); // Box width is halved. let bw = b["mark"]["width"]["expr"].as_str().unwrap_or(""); - assert!(bw.contains("/ 2"), "side={s} expected halved width, got {bw}"); + assert!( + bw.contains("/ 2"), + "side={s} expected halved width, got {bw}" + ); // xOffset encoding points at the combined column. assert_eq!( b["encoding"]["xOffset"]["field"], @@ -3981,10 +3984,10 @@ mod tests { assert!( low.get("transform") .and_then(|t| t.as_array()) - .map(|arr| arr.iter().all(|tr| tr - .get("as") - .and_then(|s| s.as_str()) - != Some("__ggsql_box_side_offset__"))) + .map( + |arr| arr.iter().all(|tr| tr.get("as").and_then(|s| s.as_str()) + != Some("__ggsql_box_side_offset__")) + ) .unwrap_or(true), "side={s} lower whisker should not have side-shift transform" ); From 87bcf130161313ead85d64fc8129ff800deec677 Mon Sep 17 00:00:00 2001 From: Thomas Lin Pedersen Date: Fri, 8 May 2026 13:04:29 +0200 Subject: [PATCH 4/5] add more examples --- doc/syntax/layer/type/violin.qmd | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/doc/syntax/layer/type/violin.qmd b/doc/syntax/layer/type/violin.qmd index 37a59560a..1c95dc252 100644 --- a/doc/syntax/layer/type/violin.qmd +++ b/doc/syntax/layer/type/violin.qmd @@ -117,4 +117,16 @@ To achieve this outcome, you can set the `side` setting and adjust `width` to ta VISUALISE Temp AS x, Month AS y FROM ggsql:airquality DRAW violin SETTING width => 4, side => 'top' SCALE ORDINAL y -``` \ No newline at end of file +``` + +The same facilities can be used to create violins where each side encode different subsets + +```{ggsql} +VISUALISE body_mass AS y, species AS x, sex AS fill FROM ggsql:penguins +DRAW violin + SETTING side => 'left' + FILTER sex = 'male' +DRAW violin + SETTING side => 'right' + FILTER sex = 'female' +``` From 23ee3f86732dd1343b08faad27eb96c13b4a632c Mon Sep 17 00:00:00 2001 From: Thomas Lin Pedersen Date: Fri, 8 May 2026 13:06:21 +0200 Subject: [PATCH 5/5] appease clippy --- src/writer/vegalite/layer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index dd2f7837a..2ac08834a 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -2105,7 +2105,7 @@ impl BoxplotRenderer { } else { (pos1_offset.as_str(), naming::aesthetic_column("pos1offset")) }; - let combined_offset_col = format!("__ggsql_box_side_offset__"); + let combined_offset_col = "__ggsql_box_side_offset__".to_string(); // Helper that, when half_side is true, adds a calculate transform // to combine the dodge offset (if any) with the side shift, then