diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 2fb65472c24ee..a0cfb6e1b00a2 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -115,8 +115,8 @@ pub enum Expr { IsNotUnknown(Box), /// arithmetic negation of an expression, the operand must be of a signed numeric data type Negative(Box), - /// Returns the field of a [`arrow::array::ListArray`] or [`arrow::array::StructArray`] by key - /// + /// Returns the field of a [`arrow::array::ListArray`] or + /// [`arrow::array::StructArray`] by index or range GetIndexedField(GetIndexedField), /// Whether an expression is between a given range. Between(Between), @@ -359,19 +359,20 @@ impl ScalarUDF { } } +/// Access a sub field of a nested type, such as `Field` or `List` #[derive(Clone, PartialEq, Eq, Hash, Debug)] pub enum GetFieldAccess { - /// returns the field `struct[field]`. For example `struct["name"]` + /// Named field, for example `struct["name"]` NamedStructField { name: ScalarValue }, - /// single list index - // list[i] + /// Single list index, for example: `list[i]` ListIndex { key: Box }, - /// list range `list[i:j]` + /// List range, for example `list[i:j]` ListRange { start: Box, stop: Box }, } -/// Returns the field of a [`arrow::array::ListArray`] or [`arrow::array::StructArray`] by `key`. -/// If `extra_key` is not `None`, returns the slice of a [`arrow::array::ListArray`] in the range from `key` to `extra_key`. +/// Returns the field of a [`arrow::array::ListArray`] or +/// [`arrow::array::StructArray`] by `key`. See [`GetFieldAccess`] for +/// details. #[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct GetIndexedField { /// The expression to take the field from @@ -925,6 +926,90 @@ impl Expr { )) } + /// Return access to the named field. Example `expr["name"]` + /// + /// ## Access field "my_field" from column "c1" + /// + /// For example if column "c1" holds documents like this + /// + /// ```json + /// { + /// "my_field": 123.34, + /// "other_field": "Boston", + /// } + /// ``` + /// + /// You can access column "my_field" with + /// + /// ``` + /// # use datafusion_expr::{col}; + /// let expr = col("c1") + /// .field("my_field"); + /// assert_eq!(expr.display_name().unwrap(), "c1[my_field]"); + /// ``` + pub fn field(self, name: impl Into) -> Self { + Expr::GetIndexedField(GetIndexedField { + expr: Box::new(self), + field: GetFieldAccess::NamedStructField { + name: ScalarValue::Utf8(Some(name.into())), + }, + }) + } + + /// Return access to the element field. Example `expr["name"]` + /// + /// ## Example Access element 2 from column "c1" + /// + /// For example if column "c1" holds documents like this + /// + /// ```json + /// [10, 20, 30, 40] + /// ``` + /// + /// You can access the value "30" with + /// + /// ``` + /// # use datafusion_expr::{lit, col, Expr}; + /// let expr = col("c1") + /// .index(lit(3)); + /// assert_eq!(expr.display_name().unwrap(), "c1[Int32(3)]"); + /// ``` + pub fn index(self, key: Expr) -> Self { + Expr::GetIndexedField(GetIndexedField { + expr: Box::new(self), + field: GetFieldAccess::ListIndex { key: Box::new(key) }, + }) + } + + /// Return elements between `1` based `start` and `stop`, for + /// example `expr[1:3]` + /// + /// ## Example: Access element 2, 3, 4 from column "c1" + /// + /// For example if column "c1" holds documents like this + /// + /// ```json + /// [10, 20, 30, 40] + /// ``` + /// + /// You can access the value `[20, 30, 40]` with + /// + /// ``` + /// # use datafusion_expr::{lit, col}; + /// let expr = col("c1") + /// .range(lit(2), lit(4)); + /// assert_eq!(expr.display_name().unwrap(), "c1[Int32(2):Int32(4)]"); + /// ``` + pub fn range(self, start: Expr, stop: Expr) -> Self { + Expr::GetIndexedField(GetIndexedField { + expr: Box::new(self), + field: GetFieldAccess::ListRange { + start: Box::new(start), + stop: Box::new(stop), + }, + }) + } + pub fn try_into_col(&self) -> Result { match self { Expr::Column(it) => Ok(it.clone()), diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs index 82e4c959ed7e4..94faa08e51b00 100644 --- a/datafusion/sql/src/expr/identifier.rs +++ b/datafusion/sql/src/expr/identifier.rs @@ -17,9 +17,9 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use datafusion_common::{ - Column, DFField, DFSchema, DataFusionError, Result, ScalarValue, TableReference, + Column, DFField, DFSchema, DataFusionError, Result, TableReference, }; -use datafusion_expr::{Case, Expr, GetFieldAccess, GetIndexedField}; +use datafusion_expr::{Case, Expr}; use sqlparser::ast::{Expr as SQLExpr, Ident}; impl<'a, S: ContextProvider> SqlToRel<'a, S> { @@ -136,12 +136,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { ))); } let nested_name = nested_names[0].to_string(); - Ok(Expr::GetIndexedField(GetIndexedField::new( - Box::new(Expr::Column(field.qualified_column())), - GetFieldAccess::NamedStructField { - name: ScalarValue::Utf8(Some(nested_name)), - }, - ))) + Ok(Expr::Column(field.qualified_column()).field(nested_name)) } // found matching field with no spare identifier(s) Some((field, _nested_names)) => {