diff --git a/datafusion/core/tests/sql/expr.rs b/datafusion/core/tests/sql/expr.rs index 093ab3433a3e5..4fa1f54d22bfc 100644 --- a/datafusion/core/tests/sql/expr.rs +++ b/datafusion/core/tests/sql/expr.rs @@ -644,6 +644,16 @@ async fn test_struct_literals() -> Result<()> { Ok(()) } +#[tokio::test] +async fn binary_bitwise_shift() -> Result<()> { + test_expression!("2 << 10", "2048"); + test_expression!("2048 >> 10", "2"); + test_expression!("2048 << NULL", "NULL"); + test_expression!("2048 >> NULL", "NULL"); + + Ok(()) +} + #[tokio::test] async fn test_interval_expressions() -> Result<()> { // day nano intervals diff --git a/datafusion/expr/src/binary_rule.rs b/datafusion/expr/src/binary_rule.rs index d6994d68847a3..f71e97e498906 100644 --- a/datafusion/expr/src/binary_rule.rs +++ b/datafusion/expr/src/binary_rule.rs @@ -55,7 +55,10 @@ pub fn binary_operator_data_type( | Operator::IsDistinctFrom | Operator::IsNotDistinctFrom => Ok(DataType::Boolean), // bitwise operations return the common coerced type - Operator::BitwiseAnd | Operator::BitwiseOr => Ok(result_type), + Operator::BitwiseAnd + | Operator::BitwiseOr + | Operator::BitwiseShiftLeft + | Operator::BitwiseShiftRight => Ok(result_type), // math operations return the same value as the common coerced type Operator::Plus | Operator::Minus @@ -76,9 +79,10 @@ pub fn coerce_types( ) -> Result { // This result MUST be compatible with `binary_coerce` let result = match op { - Operator::BitwiseAnd | Operator::BitwiseOr => { - bitwise_coercion(lhs_type, rhs_type) - } + Operator::BitwiseAnd + | Operator::BitwiseOr + | Operator::BitwiseShiftRight + | Operator::BitwiseShiftLeft => bitwise_coercion(lhs_type, rhs_type), Operator::And | Operator::Or => match (lhs_type, rhs_type) { // logical binary boolean operators can only be evaluated in bools (DataType::Boolean, DataType::Boolean) => Some(DataType::Boolean), @@ -135,12 +139,14 @@ pub fn coerce_types( fn bitwise_coercion(left_type: &DataType, right_type: &DataType) -> Option { use arrow::datatypes::DataType::*; - if !is_numeric(left_type) || !is_numeric(right_type) { + if !both_numeric_or_null_and_numeric(left_type, right_type) { return None; } + if left_type == right_type && !is_dictionary(left_type) { return Some(left_type.clone()); } + // TODO support other data type match (left_type, right_type) { (Int64, _) | (_, Int64) => Some(Int64), diff --git a/datafusion/expr/src/operator.rs b/datafusion/expr/src/operator.rs index d22cb85694eb7..f2cdb3555f824 100644 --- a/datafusion/expr/src/operator.rs +++ b/datafusion/expr/src/operator.rs @@ -71,6 +71,10 @@ pub enum Operator { BitwiseAnd, /// Bitwise or, like `|` BitwiseOr, + /// Bitwise right, like `>>` + BitwiseShiftRight, + /// Bitwise left, like `<<` + BitwiseShiftLeft, /// String concat StringConcat, } @@ -101,6 +105,8 @@ impl fmt::Display for Operator { Operator::IsNotDistinctFrom => "IS NOT DISTINCT FROM", Operator::BitwiseAnd => "&", Operator::BitwiseOr => "|", + Operator::BitwiseShiftRight => ">>", + Operator::BitwiseShiftLeft => "<<", Operator::StringConcat => "||", }; write!(f, "{}", display) diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 64e35311625af..6769032bff6de 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -50,7 +50,10 @@ use arrow::compute::kernels::comparison::{ use adapter::{eq_dyn, gt_dyn, gt_eq_dyn, lt_dyn, lt_eq_dyn, neq_dyn}; use arrow::compute::kernels::concat_elements::concat_elements_utf8; -use kernels::{bitwise_and, bitwise_and_scalar, bitwise_or, bitwise_or_scalar}; +use kernels::{ + bitwise_and, bitwise_and_scalar, bitwise_or, bitwise_or_scalar, bitwise_shift_left, + bitwise_shift_left_scalar, bitwise_shift_right, bitwise_shift_right_scalar, +}; use kernels_arrow::{ add_decimal, add_decimal_scalar, divide_decimal, divide_decimal_scalar, eq_decimal_scalar, gt_decimal_scalar, gt_eq_decimal_scalar, is_distinct_from, @@ -740,6 +743,12 @@ impl BinaryExpr { ), Operator::BitwiseAnd => bitwise_and_scalar(array, scalar.clone()), Operator::BitwiseOr => bitwise_or_scalar(array, scalar.clone()), + Operator::BitwiseShiftRight => { + bitwise_shift_right_scalar(array, scalar.clone()) + } + Operator::BitwiseShiftLeft => { + bitwise_shift_left_scalar(array, scalar.clone()) + } // if scalar operation is not supported - fallback to array implementation _ => None, }; @@ -850,6 +859,8 @@ impl BinaryExpr { } Operator::BitwiseAnd => bitwise_and(left, right), Operator::BitwiseOr => bitwise_or(left, right), + Operator::BitwiseShiftRight => bitwise_shift_right(left, right), + Operator::BitwiseShiftLeft => bitwise_shift_left(left, right), Operator::StringConcat => { binary_string_array_op!(left, right, concat_elements) } @@ -2481,6 +2492,34 @@ mod tests { Ok(()) } + #[test] + fn bitwise_shift_array_test() -> Result<()> { + let input = Arc::new(Int32Array::from(vec![Some(2), None, Some(10)])) as ArrayRef; + let modules = + Arc::new(Int32Array::from(vec![Some(2), Some(4), Some(8)])) as ArrayRef; + let mut result = bitwise_shift_left(input.clone(), modules.clone())?; + + let expected = Int32Array::from(vec![Some(8), None, Some(2560)]); + assert_eq!(result.as_ref(), &expected); + + result = bitwise_shift_right(result.clone(), modules.clone())?; + assert_eq!(result.as_ref(), &input); + + Ok(()) + } + + #[test] + fn bitwise_shift_array_overflow_test() -> Result<()> { + let input = Arc::new(Int32Array::from(vec![Some(2)])) as ArrayRef; + let modules = Arc::new(Int32Array::from(vec![Some(100)])) as ArrayRef; + let result = bitwise_shift_left(input.clone(), modules.clone())?; + + let expected = Int32Array::from(vec![Some(32)]); + assert_eq!(result.as_ref(), &expected); + + Ok(()) + } + #[test] fn bitwise_scalar_test() -> Result<()> { let left = Arc::new(Int32Array::from(vec![Some(12), None, Some(11)])) as ArrayRef; @@ -2494,4 +2533,19 @@ mod tests { assert_eq!(result.as_ref(), &expected); Ok(()) } + + #[test] + fn bitwise_shift_scalar_test() -> Result<()> { + let input = Arc::new(Int32Array::from(vec![Some(2), None, Some(4)])) as ArrayRef; + let module = ScalarValue::from(10i32); + let mut result = bitwise_shift_left_scalar(&input, module.clone()).unwrap()?; + + let expected = Int32Array::from(vec![Some(2048), None, Some(4096)]); + assert_eq!(result.as_ref(), &expected); + + result = bitwise_shift_right_scalar(&result, module).unwrap()?; + assert_eq!(result.as_ref(), &input); + + Ok(()) + } } diff --git a/datafusion/physical-expr/src/expressions/binary/kernels.rs b/datafusion/physical-expr/src/expressions/binary/kernels.rs index a89957447e00c..3ca4a447c875a 100644 --- a/datafusion/physical-expr/src/expressions/binary/kernels.rs +++ b/datafusion/physical-expr/src/expressions/binary/kernels.rs @@ -21,13 +21,14 @@ use arrow::array::*; use arrow::datatypes::DataType; use datafusion_common::{DataFusionError, Result, ScalarValue}; use datafusion_expr::Operator; + use std::sync::Arc; /// The binary_bitwise_array_op macro only evaluates for integer types /// like int64, int32. /// It is used to do bitwise operation. macro_rules! binary_bitwise_array_op { - ($LEFT:expr, $RIGHT:expr, $OP:tt, $ARRAY_TYPE:ident, $TYPE:ty) => {{ + ($LEFT:expr, $RIGHT:expr, $METHOD:expr, $ARRAY_TYPE:ident) => {{ let len = $LEFT.len(); let left = $LEFT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap(); let right = $RIGHT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap(); @@ -37,7 +38,7 @@ macro_rules! binary_bitwise_array_op { if left.is_null(i) || right.is_null(i) { None } else { - Some(left.value(i) $OP right.value(i)) + Some($METHOD(left.value(i), right.value(i))) } }) .collect::<$ARRAY_TYPE>(); @@ -49,7 +50,7 @@ macro_rules! binary_bitwise_array_op { /// like int64, int32. /// It is used to do bitwise operation on an array with a scalar. macro_rules! binary_bitwise_array_scalar { - ($LEFT:expr, $RIGHT:expr, $OP:tt, $ARRAY_TYPE:ident, $TYPE:ty) => {{ + ($LEFT:expr, $RIGHT:expr, $METHOD:expr, $ARRAY_TYPE:ident, $TYPE:ty) => {{ let len = $LEFT.len(); let array = $LEFT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap(); let scalar = $RIGHT; @@ -63,7 +64,7 @@ macro_rules! binary_bitwise_array_scalar { if array.is_null(i) { None } else { - Some(array.value(i) $OP right) + Some($METHOD(array.value(i), right)) } }) .collect::<$ARRAY_TYPE>(); @@ -75,16 +76,16 @@ macro_rules! binary_bitwise_array_scalar { pub(crate) fn bitwise_and(left: ArrayRef, right: ArrayRef) -> Result { match &left.data_type() { DataType::Int8 => { - binary_bitwise_array_op!(left, right, &, Int8Array, i8) + binary_bitwise_array_op!(left, right, |a, b| a & b, Int8Array) } DataType::Int16 => { - binary_bitwise_array_op!(left, right, &, Int16Array, i16) + binary_bitwise_array_op!(left, right, |a, b| a & b, Int16Array) } DataType::Int32 => { - binary_bitwise_array_op!(left, right, &, Int32Array, i32) + binary_bitwise_array_op!(left, right, |a, b| a & b, Int32Array) } DataType::Int64 => { - binary_bitwise_array_op!(left, right, &, Int64Array, i64) + binary_bitwise_array_op!(left, right, |a, b| a & b, Int64Array) } other => Err(DataFusionError::Internal(format!( "Data type {:?} not supported for binary operation '{}' on dyn arrays", @@ -94,19 +95,103 @@ pub(crate) fn bitwise_and(left: ArrayRef, right: ArrayRef) -> Result { } } +pub(crate) fn bitwise_shift_right(left: ArrayRef, right: ArrayRef) -> Result { + match &left.data_type() { + DataType::Int8 => { + binary_bitwise_array_op!( + left, + right, + |a: i8, b: i8| a.wrapping_shr(b as u32), + Int8Array + ) + } + DataType::Int16 => { + binary_bitwise_array_op!( + left, + right, + |a: i16, b: i16| a.wrapping_shr(b as u32), + Int16Array + ) + } + DataType::Int32 => { + binary_bitwise_array_op!( + left, + right, + |a: i32, b: i32| a.wrapping_shr(b as u32), + Int32Array + ) + } + DataType::Int64 => { + binary_bitwise_array_op!( + left, + right, + |a: i64, b: i64| a.wrapping_shr(b as u32), + Int64Array + ) + } + other => Err(DataFusionError::Internal(format!( + "Data type {:?} not supported for binary operation '{}' on dyn arrays", + other, + Operator::BitwiseShiftRight + ))), + } +} + +pub(crate) fn bitwise_shift_left(left: ArrayRef, right: ArrayRef) -> Result { + match &left.data_type() { + DataType::Int8 => { + binary_bitwise_array_op!( + left, + right, + |a: i8, b: i8| a.wrapping_shl(b as u32), + Int8Array + ) + } + DataType::Int16 => { + binary_bitwise_array_op!( + left, + right, + |a: i16, b: i16| a.wrapping_shl(b as u32), + Int16Array + ) + } + DataType::Int32 => { + binary_bitwise_array_op!( + left, + right, + |a: i32, b: i32| a.wrapping_shl(b as u32), + Int32Array + ) + } + DataType::Int64 => { + binary_bitwise_array_op!( + left, + right, + |a: i64, b: i64| a.wrapping_shl(b as u32), + Int64Array + ) + } + other => Err(DataFusionError::Internal(format!( + "Data type {:?} not supported for binary operation '{}' on dyn arrays", + other, + Operator::BitwiseShiftLeft + ))), + } +} + pub(crate) fn bitwise_or(left: ArrayRef, right: ArrayRef) -> Result { match &left.data_type() { DataType::Int8 => { - binary_bitwise_array_op!(left, right, |, Int8Array, i8) + binary_bitwise_array_op!(left, right, |a, b| a | b, Int8Array) } DataType::Int16 => { - binary_bitwise_array_op!(left, right, |, Int16Array, i16) + binary_bitwise_array_op!(left, right, |a, b| a | b, Int16Array) } DataType::Int32 => { - binary_bitwise_array_op!(left, right, |, Int32Array, i32) + binary_bitwise_array_op!(left, right, |a, b| a | b, Int32Array) } DataType::Int64 => { - binary_bitwise_array_op!(left, right, |, Int64Array, i64) + binary_bitwise_array_op!(left, right, |a, b| a | b, Int64Array) } other => Err(DataFusionError::Internal(format!( "Data type {:?} not supported for binary operation '{}' on dyn arrays", @@ -122,16 +207,16 @@ pub(crate) fn bitwise_and_scalar( ) -> Option> { let result = match array.data_type() { DataType::Int8 => { - binary_bitwise_array_scalar!(array, scalar, &, Int8Array, i8) + binary_bitwise_array_scalar!(array, scalar, |a, b| a & b, Int8Array, i8) } DataType::Int16 => { - binary_bitwise_array_scalar!(array, scalar, &, Int16Array, i16) + binary_bitwise_array_scalar!(array, scalar, |a, b| a & b, Int16Array, i16) } DataType::Int32 => { - binary_bitwise_array_scalar!(array, scalar, &, Int32Array, i32) + binary_bitwise_array_scalar!(array, scalar, |a, b| a & b, Int32Array, i32) } DataType::Int64 => { - binary_bitwise_array_scalar!(array, scalar, &, Int64Array, i64) + binary_bitwise_array_scalar!(array, scalar, |a, b| a & b, Int64Array, i64) } other => Err(DataFusionError::Internal(format!( "Data type {:?} not supported for binary operation '{}' on dyn arrays", @@ -148,16 +233,16 @@ pub(crate) fn bitwise_or_scalar( ) -> Option> { let result = match array.data_type() { DataType::Int8 => { - binary_bitwise_array_scalar!(array, scalar, |, Int8Array, i8) + binary_bitwise_array_scalar!(array, scalar, |a, b| a | b, Int8Array, i8) } DataType::Int16 => { - binary_bitwise_array_scalar!(array, scalar, |, Int16Array, i16) + binary_bitwise_array_scalar!(array, scalar, |a, b| a | b, Int16Array, i16) } DataType::Int32 => { - binary_bitwise_array_scalar!(array, scalar, |, Int32Array, i32) + binary_bitwise_array_scalar!(array, scalar, |a, b| a | b, Int32Array, i32) } DataType::Int64 => { - binary_bitwise_array_scalar!(array, scalar, |, Int64Array, i64) + binary_bitwise_array_scalar!(array, scalar, |a, b| a | b, Int64Array, i64) } other => Err(DataFusionError::Internal(format!( "Data type {:?} not supported for binary operation '{}' on dyn arrays", @@ -167,3 +252,103 @@ pub(crate) fn bitwise_or_scalar( }; Some(result) } + +pub(crate) fn bitwise_shift_right_scalar( + array: &dyn Array, + scalar: ScalarValue, +) -> Option> { + let result = match array.data_type() { + DataType::Int8 => { + binary_bitwise_array_scalar!( + array, + scalar, + |a: i8, b: i8| a.wrapping_shr(b as u32), + Int8Array, + i8 + ) + } + DataType::Int16 => { + binary_bitwise_array_scalar!( + array, + scalar, + |a: i16, b: i16| a.wrapping_shr(b as u32), + Int16Array, + i16 + ) + } + DataType::Int32 => { + binary_bitwise_array_scalar!( + array, + scalar, + |a: i32, b: i32| a.wrapping_shr(b as u32), + Int32Array, + i32 + ) + } + DataType::Int64 => { + binary_bitwise_array_scalar!( + array, + scalar, + |a: i64, b: i64| a.wrapping_shr(b as u32), + Int64Array, + i64 + ) + } + other => Err(DataFusionError::Internal(format!( + "Data type {:?} not supported for binary operation '{}' on dyn arrays", + other, + Operator::BitwiseShiftRight + ))), + }; + Some(result) +} + +pub(crate) fn bitwise_shift_left_scalar( + array: &dyn Array, + scalar: ScalarValue, +) -> Option> { + let result = match array.data_type() { + DataType::Int8 => { + binary_bitwise_array_scalar!( + array, + scalar, + |a: i8, b: i8| a.wrapping_shl(b as u32), + Int8Array, + i8 + ) + } + DataType::Int16 => { + binary_bitwise_array_scalar!( + array, + scalar, + |a: i16, b: i16| a.wrapping_shl(b as u32), + Int16Array, + i16 + ) + } + DataType::Int32 => { + binary_bitwise_array_scalar!( + array, + scalar, + |a: i32, b: i32| a.wrapping_shl(b as u32), + Int32Array, + i32 + ) + } + DataType::Int64 => { + binary_bitwise_array_scalar!( + array, + scalar, + |a: i64, b: i64| a.wrapping_shl(b as u32), + Int64Array, + i64 + ) + } + other => Err(DataFusionError::Internal(format!( + "Data type {:?} not supported for binary operation '{}' on dyn arrays", + other, + Operator::BitwiseShiftLeft + ))), + }; + Some(result) +} diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index daa4753e4bfdf..ddf9198e5407c 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -1535,6 +1535,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { BinaryOperator::PGRegexNotIMatch => Ok(Operator::RegexNotIMatch), BinaryOperator::BitwiseAnd => Ok(Operator::BitwiseAnd), BinaryOperator::BitwiseOr => Ok(Operator::BitwiseOr), + BinaryOperator::PGBitwiseShiftRight => Ok(Operator::BitwiseShiftRight), + BinaryOperator::PGBitwiseShiftLeft => Ok(Operator::BitwiseShiftLeft), BinaryOperator::StringConcat => Ok(Operator::StringConcat), _ => Err(DataFusionError::NotImplemented(format!( "Unsupported SQL binary operator {:?}",