From 3eec61b1d716c41b954f68f3d30a03d1a672cd24 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 7 Aug 2023 13:17:38 -0400 Subject: [PATCH 1/3] Add Expr::field, Expr::index, and Expr::slice`, add docs --- datafusion/expr/src/expr.rs | 98 ++++++++++++++++++++++++--- datafusion/sql/src/expr/identifier.rs | 11 +-- 2 files changed, 93 insertions(+), 16 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 2fb65472c24ee..bb02e8ace91c0 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -115,8 +115,8 @@ pub enum Expr { IsNotUnknown(Box), /// arithmetic negation of an expression, the operand must be of a signed numeric data type Negative(Box), - /// Returns the field of a [`arrow::array::ListArray`] or [`arrow::array::StructArray`] by key - /// + /// Returns the field of a [`arrow::array::ListArray`] or + /// [`arrow::array::StructArray`] by key or key range GetIndexedField(GetIndexedField), /// Whether an expression is between a given range. Between(Between), @@ -359,19 +359,20 @@ impl ScalarUDF { } } +/// Access a sub field of a nested type, such as `Field` or `List` #[derive(Clone, PartialEq, Eq, Hash, Debug)] pub enum GetFieldAccess { - /// returns the field `struct[field]`. For example `struct["name"]` + /// Named field, For example `struct["name"]` NamedStructField { name: ScalarValue }, - /// single list index - // list[i] + /// Single list index, for example: `list[i]` ListIndex { key: Box }, - /// list range `list[i:j]` + /// List range, for example `list[i:j]` ListRange { start: Box, stop: Box }, } -/// Returns the field of a [`arrow::array::ListArray`] or [`arrow::array::StructArray`] by `key`. -/// If `extra_key` is not `None`, returns the slice of a [`arrow::array::ListArray`] in the range from `key` to `extra_key`. +/// Returns the field of a [`arrow::array::ListArray`] or +/// [`arrow::array::StructArray`] by `key`. See [`GetFieldAccess`] for +/// details. #[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct GetIndexedField { /// The expression to take the field from @@ -925,6 +926,87 @@ impl Expr { )) } + /// Return access to the named field. Example `expr["name"]` + /// + /// ## Access field "my_field" from column "c1" + /// + /// For example if column "c1" holds documents like this + /// + /// ```json + /// { + /// "my_field": 123.34, + /// "other_field": "Boston", + /// } + /// ``` + /// + /// You can access column "my_field" with + /// + /// ``` + /// # use datafusion_expr::{lit, col, Expr}; + /// let expr = col("c1") + /// .field("my_field"); + /// ``` + pub fn field(self, name: impl Into) -> Self { + Expr::GetIndexedField(GetIndexedField { + expr: Box::new(self), + field: GetFieldAccess::NamedStructField { + name: ScalarValue::Utf8(Some(name.into())), + }, + }) + } + + /// Return access to the element field. Example `expr["name"]` + /// + /// ## Example Access element 2 from column "c1" + /// + /// For example if column "c1" holds documents like this + /// + /// ```json + /// [10, 20, 30, 40] + /// ``` + /// + /// You can access the value "30" with + /// + /// ``` + /// # use datafusion_expr::{lit, col, Expr}; + /// let expr = col("c1") + /// .index(lit(3)); + /// ``` + pub fn index(self, key: Expr) -> Self { + Expr::GetIndexedField(GetIndexedField { + expr: Box::new(self), + field: GetFieldAccess::ListIndex { key: Box::new(key) }, + }) + } + + /// Return element at `1` based index field. Example + /// `expr["name"]` + /// + /// ## Example: Access element 2 from column "c1" + /// + /// For example if column "c1" holds documents like this + /// + /// ```json + /// [10, 20, 30, 40] + /// ``` + /// + /// You can access the value "[20, 30, 40]" with + /// + /// ``` + /// # use datafusion_expr::{lit, col, Expr}; + /// let expr = col("c1") + /// .slice(lit(30)); + /// ``` + pub fn slice(self, start: Expr, stop: Expr) -> Self { + Expr::GetIndexedField(GetIndexedField { + expr: Box::new(self), + field: GetFieldAccess::ListRange { + start: Box::new(start), + stop: Box::new(stop), + }, + }) + } + pub fn try_into_col(&self) -> Result { match self { Expr::Column(it) => Ok(it.clone()), diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs index 82e4c959ed7e4..94faa08e51b00 100644 --- a/datafusion/sql/src/expr/identifier.rs +++ b/datafusion/sql/src/expr/identifier.rs @@ -17,9 +17,9 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use datafusion_common::{ - Column, DFField, DFSchema, DataFusionError, Result, ScalarValue, TableReference, + Column, DFField, DFSchema, DataFusionError, Result, TableReference, }; -use datafusion_expr::{Case, Expr, GetFieldAccess, GetIndexedField}; +use datafusion_expr::{Case, Expr}; use sqlparser::ast::{Expr as SQLExpr, Ident}; impl<'a, S: ContextProvider> SqlToRel<'a, S> { @@ -136,12 +136,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { ))); } let nested_name = nested_names[0].to_string(); - Ok(Expr::GetIndexedField(GetIndexedField::new( - Box::new(Expr::Column(field.qualified_column())), - GetFieldAccess::NamedStructField { - name: ScalarValue::Utf8(Some(nested_name)), - }, - ))) + Ok(Expr::Column(field.qualified_column()).field(nested_name)) } // found matching field with no spare identifier(s) Some((field, _nested_names)) => { From e56fb208b3e2f36d783cc1384b913c7febf53693 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 7 Aug 2023 13:25:52 -0400 Subject: [PATCH 2/3] tweak --- datafusion/expr/src/expr.rs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index bb02e8ace91c0..7ab228c935526 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -942,9 +942,10 @@ impl Expr { /// You can access column "my_field" with /// /// ``` - /// # use datafusion_expr::{lit, col, Expr}; + /// # use datafusion_expr::{col}; /// let expr = col("c1") /// .field("my_field"); + /// assert_eq!(expr.display_name().unwrap(), "c1[my_field]"); /// ``` pub fn field(self, name: impl Into) -> Self { Expr::GetIndexedField(GetIndexedField { @@ -971,6 +972,7 @@ impl Expr { /// # use datafusion_expr::{lit, col, Expr}; /// let expr = col("c1") /// .index(lit(3)); + /// assert_eq!(expr.display_name().unwrap(), "c1[Int32(3)]"); /// ``` pub fn index(self, key: Expr) -> Self { Expr::GetIndexedField(GetIndexedField { @@ -979,10 +981,10 @@ impl Expr { }) } - /// Return element at `1` based index field. Example - /// `expr["name"]` + /// Return elements between `1` based `start` and `stop`, for + /// example `expr[1:3]` /// - /// ## Example: Access element 2 from column "c1" + /// ## Example: Access element 2, 3, 4 from column "c1" /// /// For example if column "c1" holds documents like this /// @@ -990,14 +992,15 @@ impl Expr { /// [10, 20, 30, 40] /// ``` /// - /// You can access the value "[20, 30, 40]" with + /// You can access the value `[20, 30, 40]` with /// /// ``` - /// # use datafusion_expr::{lit, col, Expr}; + /// # use datafusion_expr::{lit, col}; /// let expr = col("c1") - /// .slice(lit(30)); + /// .range(lit(2), lit(4)); + /// assert_eq!(expr.display_name().unwrap(), "c1[Int32(2):Int32(4)]"); /// ``` - pub fn slice(self, start: Expr, stop: Expr) -> Self { + pub fn range(self, start: Expr, stop: Expr) -> Self { Expr::GetIndexedField(GetIndexedField { expr: Box::new(self), field: GetFieldAccess::ListRange { From a589c9ed49f220b219d79f85abde88c578cf40cc Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 8 Aug 2023 07:06:46 -0400 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: Igor Izvekov --- datafusion/expr/src/expr.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 7ab228c935526..a0cfb6e1b00a2 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -116,7 +116,7 @@ pub enum Expr { /// arithmetic negation of an expression, the operand must be of a signed numeric data type Negative(Box), /// Returns the field of a [`arrow::array::ListArray`] or - /// [`arrow::array::StructArray`] by key or key range + /// [`arrow::array::StructArray`] by index or range GetIndexedField(GetIndexedField), /// Whether an expression is between a given range. Between(Between), @@ -362,7 +362,7 @@ impl ScalarUDF { /// Access a sub field of a nested type, such as `Field` or `List` #[derive(Clone, PartialEq, Eq, Hash, Debug)] pub enum GetFieldAccess { - /// Named field, For example `struct["name"]` + /// Named field, for example `struct["name"]` NamedStructField { name: ScalarValue }, /// Single list index, for example: `list[i]` ListIndex { key: Box },