Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 93 additions & 8 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ pub enum Expr {
IsNotUnknown(Box<Expr>),
/// arithmetic negation of an expression, the operand must be of a signed numeric data type
Negative(Box<Expr>),
/// Returns the field of a [`arrow::array::ListArray`] or [`arrow::array::StructArray`] by key
///
/// Returns the field of a [`arrow::array::ListArray`] or
/// [`arrow::array::StructArray`] by index or range
GetIndexedField(GetIndexedField),
/// Whether an expression is between a given range.
Between(Between),
Expand Down Expand Up @@ -359,19 +359,20 @@ impl ScalarUDF {
}
}

/// Access a sub field of a nested type, such as `Field` or `List`
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub enum GetFieldAccess {
/// returns the field `struct[field]`. For example `struct["name"]`
/// Named field, for example `struct["name"]`
NamedStructField { name: ScalarValue },
/// single list index
// list[i]
/// Single list index, for example: `list[i]`
ListIndex { key: Box<Expr> },
/// list range `list[i:j]`
/// List range, for example `list[i:j]`
ListRange { start: Box<Expr>, stop: Box<Expr> },
}

/// Returns the field of a [`arrow::array::ListArray`] or [`arrow::array::StructArray`] by `key`.
/// If `extra_key` is not `None`, returns the slice of a [`arrow::array::ListArray`] in the range from `key` to `extra_key`.
/// Returns the field of a [`arrow::array::ListArray`] or
/// [`arrow::array::StructArray`] by `key`. See [`GetFieldAccess`] for
/// details.
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub struct GetIndexedField {
/// The expression to take the field from
Expand Down Expand Up @@ -925,6 +926,90 @@ impl Expr {
))
}

/// Return access to the named field. Example `expr["name"]`
///
/// ## Access field "my_field" from column "c1"
///
/// For example if column "c1" holds documents like this
///
/// ```json
/// {
/// "my_field": 123.34,
/// "other_field": "Boston",
/// }
/// ```
///
/// You can access column "my_field" with
///
/// ```
/// # use datafusion_expr::{col};
/// let expr = col("c1")
/// .field("my_field");
/// assert_eq!(expr.display_name().unwrap(), "c1[my_field]");
/// ```
pub fn field(self, name: impl Into<String>) -> Self {
Expr::GetIndexedField(GetIndexedField {
expr: Box::new(self),
field: GetFieldAccess::NamedStructField {
name: ScalarValue::Utf8(Some(name.into())),
},
})
}

/// Return access to the element field. Example `expr["name"]`
///
/// ## Example Access element 2 from column "c1"
///
/// For example if column "c1" holds documents like this
///
/// ```json
/// [10, 20, 30, 40]
/// ```
///
/// You can access the value "30" with
///
/// ```
/// # use datafusion_expr::{lit, col, Expr};
/// let expr = col("c1")
/// .index(lit(3));
/// assert_eq!(expr.display_name().unwrap(), "c1[Int32(3)]");
/// ```
pub fn index(self, key: Expr) -> Self {
Expr::GetIndexedField(GetIndexedField {
expr: Box::new(self),
field: GetFieldAccess::ListIndex { key: Box::new(key) },
})
}

/// Return elements between `1` based `start` and `stop`, for
/// example `expr[1:3]`
///
/// ## Example: Access element 2, 3, 4 from column "c1"
///
/// For example if column "c1" holds documents like this
///
/// ```json
/// [10, 20, 30, 40]
/// ```
///
/// You can access the value `[20, 30, 40]` with
///
/// ```
/// # use datafusion_expr::{lit, col};
/// let expr = col("c1")
/// .range(lit(2), lit(4));
/// assert_eq!(expr.display_name().unwrap(), "c1[Int32(2):Int32(4)]");
/// ```
pub fn range(self, start: Expr, stop: Expr) -> Self {
Expr::GetIndexedField(GetIndexedField {
expr: Box::new(self),
field: GetFieldAccess::ListRange {
start: Box::new(start),
stop: Box::new(stop),
},
})
}

pub fn try_into_col(&self) -> Result<Column> {
match self {
Expr::Column(it) => Ok(it.clone()),
Expand Down
11 changes: 3 additions & 8 deletions datafusion/sql/src/expr/identifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@

use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
use datafusion_common::{
Column, DFField, DFSchema, DataFusionError, Result, ScalarValue, TableReference,
Column, DFField, DFSchema, DataFusionError, Result, TableReference,
};
use datafusion_expr::{Case, Expr, GetFieldAccess, GetIndexedField};
use datafusion_expr::{Case, Expr};
use sqlparser::ast::{Expr as SQLExpr, Ident};

impl<'a, S: ContextProvider> SqlToRel<'a, S> {
Expand Down Expand Up @@ -136,12 +136,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
)));
}
let nested_name = nested_names[0].to_string();
Ok(Expr::GetIndexedField(GetIndexedField::new(
Box::new(Expr::Column(field.qualified_column())),
GetFieldAccess::NamedStructField {
name: ScalarValue::Utf8(Some(nested_name)),
},
)))
Ok(Expr::Column(field.qualified_column()).field(nested_name))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shows the kind of API cleanup that is possible using this new API

}
// found matching field with no spare identifier(s)
Some((field, _nested_names)) => {
Expand Down