diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index b463e016c852..f857e26c7f89 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -1063,12 +1063,12 @@ impl PrimitiveArray { } /// Construct a timestamp array with an optional timezone - pub fn with_timezone_opt(&self, timezone: Option) -> Self { + pub fn with_timezone_opt>>(&self, timezone: Option) -> Self { let array_data = unsafe { self.data .clone() .into_builder() - .data_type(DataType::Timestamp(T::UNIT, timezone)) + .data_type(DataType::Timestamp(T::UNIT, timezone.map(Into::into))) .build_unchecked() }; PrimitiveArray::from(array_data) diff --git a/arrow-array/src/builder/primitive_builder.rs b/arrow-array/src/builder/primitive_builder.rs index 1c2cd908ca26..6688d07b7055 100644 --- a/arrow-array/src/builder/primitive_builder.rs +++ b/arrow-array/src/builder/primitive_builder.rs @@ -575,8 +575,7 @@ mod tests { assert_eq!(array.precision(), 1); assert_eq!(array.scale(), 2); - let data_type = - DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_string())); + let data_type = DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into())); let mut builder = TimestampNanosecondBuilder::new().with_data_type(data_type.clone()); builder.append_value(1); diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs index 51b4c7cfcdc6..5f362036a8cd 100644 --- a/arrow-array/src/builder/struct_builder.rs +++ b/arrow-array/src/builder/struct_builder.rs @@ -491,7 +491,7 @@ mod tests { Field::new("f1", DataType::Decimal128(1, 2), false), Field::new( "f2", - DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".to_string())), + DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())), false, ), ]; diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 806ff8771573..31a42b384fda 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -540,7 +540,7 @@ macro_rules! cast_list_to_string { fn make_timestamp_array( array: &PrimitiveArray, unit: TimeUnit, - tz: Option, + tz: Option>, ) -> ArrayRef { match unit { TimeUnit::Second => Arc::new( @@ -2620,7 +2620,7 @@ fn cast_string_to_timestamp< TimestampType: ArrowTimestampType, >( array: &dyn Array, - to_tz: &Option, + to_tz: &Option>, cast_options: &CastOptions, ) -> Result { let string_array = array @@ -7913,7 +7913,7 @@ mod tests { let b = cast( &b, - &DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_string())), + &DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into())), ) .unwrap(); let v = b.as_primitive::(); @@ -7923,7 +7923,7 @@ mod tests { let b = cast( &b, - &DataType::Timestamp(TimeUnit::Millisecond, Some("+02:00".to_string())), + &DataType::Timestamp(TimeUnit::Millisecond, Some("+02:00".into())), ) .unwrap(); let v = b.as_primitive::(); @@ -7934,7 +7934,7 @@ mod tests { #[test] fn test_cast_utf8_to_timestamp() { - fn test_tz(tz: String) { + fn test_tz(tz: Arc) { let valid = StringArray::from(vec![ "2023-01-01 04:05:06.789000-08:00", "2023-01-01 04:05:06.789000-07:00", @@ -7970,8 +7970,8 @@ mod tests { assert_eq!(1672531200000000000, c.value(8)); } - test_tz("+00:00".to_owned()); - test_tz("+02:00".to_owned()); + test_tz("+00:00".into()); + test_tz("+02:00".into()); } #[test] @@ -7998,11 +7998,11 @@ mod tests { let array = Arc::new(valid) as ArrayRef; let b = cast( &array, - &DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_owned())), + &DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into())), ) .unwrap(); - let expect = DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_owned())); + let expect = DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into())); assert_eq!(b.data_type(), &expect); let c = b diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs index 262c057d4283..46e97b1f848f 100644 --- a/arrow-csv/src/reader/mod.rs +++ b/arrow-csv/src/reader/mod.rs @@ -1739,7 +1739,7 @@ mod tests { } fn test_parse_timestamp_impl( - timezone: Option, + timezone: Option>, expected: &[i64], ) { let csv = [ @@ -1775,23 +1775,23 @@ mod tests { &[0, 0, -7_200_000_000_000], ); test_parse_timestamp_impl::( - Some("+00:00".to_string()), + Some("+00:00".into()), &[0, 0, -7_200_000_000_000], ); test_parse_timestamp_impl::( - Some("-05:00".to_string()), + Some("-05:00".into()), &[18_000_000_000_000, 0, -7_200_000_000_000], ); test_parse_timestamp_impl::( - Some("-03".to_string()), + Some("-03".into()), &[10_800_000_000, 0, -7_200_000_000], ); test_parse_timestamp_impl::( - Some("-03".to_string()), + Some("-03".into()), &[10_800_000, 0, -7_200_000], ); test_parse_timestamp_impl::( - Some("-03".to_string()), + Some("-03".into()), &[10_800, 0, -7_200], ); } diff --git a/arrow-csv/src/writer.rs b/arrow-csv/src/writer.rs index 28a939d88f34..946803decf90 100644 --- a/arrow-csv/src/writer.rs +++ b/arrow-csv/src/writer.rs @@ -624,7 +624,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo let schema = Schema::new(vec![ Field::new( "c1", - DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".to_string())), + DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())), true, ), Field::new("c2", DataType::Timestamp(TimeUnit::Millisecond, None), true), diff --git a/arrow-integration-test/src/datatype.rs b/arrow-integration-test/src/datatype.rs index ece64e16eb08..d0f4ca66fda9 100644 --- a/arrow-integration-test/src/datatype.rs +++ b/arrow-integration-test/src/datatype.rs @@ -89,7 +89,7 @@ pub fn data_type_from_json(json: &serde_json::Value) -> Result { }; let tz = match map.get("timezone") { None => Ok(None), - Some(serde_json::Value::String(tz)) => Ok(Some(tz.clone())), + Some(Value::String(tz)) => Ok(Some(tz.as_str().into())), _ => Err(ArrowError::ParseError( "timezone must be a string".to_string(), )), diff --git a/arrow-integration-test/src/lib.rs b/arrow-integration-test/src/lib.rs index 6f9e8a4eb1aa..0b890ea33657 100644 --- a/arrow-integration-test/src/lib.rs +++ b/arrow-integration-test/src/lib.rs @@ -1104,10 +1104,10 @@ mod tests { #[test] fn test_arrow_data_equality() { - let secs_tz = Some("Europe/Budapest".to_string()); - let millis_tz = Some("America/New_York".to_string()); - let micros_tz = Some("UTC".to_string()); - let nanos_tz = Some("Africa/Johannesburg".to_string()); + let secs_tz = Some("Europe/Budapest".into()); + let millis_tz = Some("America/New_York".into()); + let micros_tz = Some("UTC".into()); + let nanos_tz = Some("Africa/Johannesburg".into()); let schema = Schema::new(vec![ Field::new("bools-with-metadata-map", DataType::Boolean, true).with_metadata( diff --git a/arrow-integration-test/src/schema.rs b/arrow-integration-test/src/schema.rs index 8147589390a3..bb17b1adb1ac 100644 --- a/arrow-integration-test/src/schema.rs +++ b/arrow-integration-test/src/schema.rs @@ -131,14 +131,14 @@ mod tests { Field::new("c15", DataType::Timestamp(TimeUnit::Second, None), false), Field::new( "c16", - DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".to_string())), + DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".into())), false, ), Field::new( "c17", DataType::Timestamp( TimeUnit::Microsecond, - Some("Africa/Johannesburg".to_string()), + Some("Africa/Johannesburg".into()), ), false, ), diff --git a/arrow-ipc/src/convert.rs b/arrow-ipc/src/convert.rs index aede8a448a06..8f8593cfd8f1 100644 --- a/arrow-ipc/src/convert.rs +++ b/arrow-ipc/src/convert.rs @@ -290,7 +290,7 @@ pub(crate) fn get_data_type(field: crate::Field, may_be_dictionary: bool) -> Dat } crate::Type::Timestamp => { let timestamp = field.type_as_timestamp().unwrap(); - let timezone: Option = timestamp.timezone().map(|tz| tz.to_string()); + let timezone: Option<_> = timestamp.timezone().map(|tz| tz.into()); match timestamp.unit() { crate::TimeUnit::SECOND => { DataType::Timestamp(TimeUnit::Second, timezone) @@ -636,8 +636,8 @@ pub(crate) fn get_fb_field_type<'a>( } } Timestamp(unit, tz) => { - let tz = tz.clone().unwrap_or_default(); - let tz_str = fbb.create_string(tz.as_str()); + let tz = tz.as_deref().unwrap_or_default(); + let tz_str = fbb.create_string(tz); let mut builder = crate::TimestampBuilder::new(fbb); let time_unit = match unit { TimeUnit::Second => crate::TimeUnit::SECOND, @@ -882,7 +882,7 @@ mod tests { "timestamp[us]", DataType::Timestamp( TimeUnit::Microsecond, - Some("Africa/Johannesburg".to_string()), + Some("Africa/Johannesburg".into()), ), false, ), diff --git a/arrow-json/src/raw/mod.rs b/arrow-json/src/raw/mod.rs index 21e6191ac7b2..b63763159a99 100644 --- a/arrow-json/src/raw/mod.rs +++ b/arrow-json/src/raw/mod.rs @@ -843,7 +843,7 @@ mod tests { {"c": "1997-01-31T14:26:56.123-05:00", "d": "1997-01-31"} "#; - let with_timezone = DataType::Timestamp(T::UNIT, Some("+08:00".to_string())); + let with_timezone = DataType::Timestamp(T::UNIT, Some("+08:00".into())); let schema = Arc::new(Schema::new(vec![ Field::new("a", T::DATA_TYPE, true), Field::new("b", T::DATA_TYPE, true), @@ -1092,7 +1092,7 @@ mod tests { do_test(DataType::Decimal128(2, 1)); do_test(DataType::Timestamp( TimeUnit::Microsecond, - Some("+00:00".to_string()), + Some("+00:00".into()), )); } } diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs index 2f0defe5268a..56b3ec2b36b0 100644 --- a/arrow-row/src/lib.rs +++ b/arrow-row/src/lib.rs @@ -1533,12 +1533,12 @@ mod tests { // Construct dictionary with a timezone let dict = a.finish(); let values = TimestampNanosecondArray::from(dict.values().to_data()); - let dict_with_tz = dict.with_values(&values.with_timezone("+02:00".to_string())); + let dict_with_tz = dict.with_values(&values.with_timezone("+02:00")); let d = DataType::Dictionary( Box::new(DataType::Int32), Box::new(DataType::Timestamp( TimeUnit::Nanosecond, - Some("+02:00".to_string()), + Some("+02:00".into()), )), ); diff --git a/arrow-schema/Cargo.toml b/arrow-schema/Cargo.toml index 89e82a0ff164..2ef08072a00d 100644 --- a/arrow-schema/Cargo.toml +++ b/arrow-schema/Cargo.toml @@ -38,7 +38,7 @@ path = "src/lib.rs" bench = false [dependencies] -serde = { version = "1.0", default-features = false, features = ["derive", "std"], optional = true } +serde = { version = "1.0", default-features = false, features = ["derive", "std", "rc"], optional = true } bitflags = { version = "2.0.0", default-features = false, optional = true } [features] diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs index 56eb6e8cef16..bcfea5a91023 100644 --- a/arrow-schema/src/datatype.rs +++ b/arrow-schema/src/datatype.rs @@ -16,6 +16,7 @@ // under the License. use std::fmt; +use std::sync::Arc; use crate::field::Field; @@ -131,7 +132,14 @@ pub enum DataType { /// empty to "Europe/Paris" would require converting the timestamp values /// from "Europe/Paris" to "UTC", which seems counter-intuitive but is /// nevertheless correct). - Timestamp(TimeUnit, Option), + /// + /// ``` + /// # use arrow_schema::{DataType, TimeUnit}; + /// DataType::Timestamp(TimeUnit::Second, None); + /// DataType::Timestamp(TimeUnit::Second, Some("literal".into())); + /// DataType::Timestamp(TimeUnit::Second, Some("string".to_string().into())); + /// ``` + Timestamp(TimeUnit, Option>), /// A 32-bit date representing the elapsed time since UNIX epoch (1970-01-01) /// in days (32 bits). Date32, @@ -476,7 +484,7 @@ impl DataType { | DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => 0, DataType::Timestamp(_, s) => { - s.as_ref().map(|s| s.capacity()).unwrap_or_default() + s.as_ref().map(|s| s.len()).unwrap_or_default() } DataType::List(field) | DataType::FixedSizeList(field, _) diff --git a/arrow-schema/src/ffi.rs b/arrow-schema/src/ffi.rs index 058febbdd35c..e830f39052eb 100644 --- a/arrow-schema/src/ffi.rs +++ b/arrow-schema/src/ffi.rs @@ -36,6 +36,7 @@ use crate::{ArrowError, DataType, Field, Schema, TimeUnit, UnionMode}; use bitflags::bitflags; +use std::sync::Arc; use std::{ collections::HashMap, ffi::{c_char, c_void, CStr, CString}, @@ -514,16 +515,16 @@ impl TryFrom<&FFI_ArrowSchema> for DataType { ["tsu", ""] => DataType::Timestamp(TimeUnit::Microsecond, None), ["tsn", ""] => DataType::Timestamp(TimeUnit::Nanosecond, None), ["tss", tz] => { - DataType::Timestamp(TimeUnit::Second, Some(tz.to_string())) + DataType::Timestamp(TimeUnit::Second, Some(Arc::from(*tz))) } ["tsm", tz] => { - DataType::Timestamp(TimeUnit::Millisecond, Some(tz.to_string())) + DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from(*tz))) } ["tsu", tz] => { - DataType::Timestamp(TimeUnit::Microsecond, Some(tz.to_string())) + DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from(*tz))) } ["tsn", tz] => { - DataType::Timestamp(TimeUnit::Nanosecond, Some(tz.to_string())) + DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from(*tz))) } _ => { return Err(ArrowError::CDataInterface(format!( diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index 316f78d62f43..76909587db76 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -1364,7 +1364,7 @@ mod tests { let result = take_impl(&input, &index, None).unwrap(); match result.data_type() { DataType::Timestamp(TimeUnit::Nanosecond, tz) => { - assert_eq!(tz.clone(), Some("UTC".to_owned())) + assert_eq!(tz.clone(), Some("UTC".into())) } _ => panic!(), } diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs index 33695e2edeb6..8a6f996a9d86 100644 --- a/arrow/tests/array_cast.rs +++ b/arrow/tests/array_cast.rs @@ -350,7 +350,7 @@ fn create_decimal_array( // Get a selection of datatypes to try and cast to fn get_all_types() -> Vec { use DataType::*; - let tz_name = String::from("+08:00"); + let tz_name: Arc = Arc::from("+08:00"); let mut types = vec![ Null, diff --git a/arrow/tests/csv.rs b/arrow/tests/csv.rs index dbb399948302..3ee319101757 100644 --- a/arrow/tests/csv.rs +++ b/arrow/tests/csv.rs @@ -25,10 +25,7 @@ fn test_export_csv_timestamps() { let schema = Schema::new(vec![ Field::new( "c1", - DataType::Timestamp( - TimeUnit::Millisecond, - Some("Australia/Sydney".to_string()), - ), + DataType::Timestamp(TimeUnit::Millisecond, Some("Australia/Sydney".into())), true, ), Field::new("c2", DataType::Timestamp(TimeUnit::Millisecond, None), true), @@ -68,10 +65,7 @@ fn test_export_csv_timestamps_using_rfc3339() { let schema = Schema::new(vec![ Field::new( "c1", - DataType::Timestamp( - TimeUnit::Millisecond, - Some("Australia/Sydney".to_string()), - ), + DataType::Timestamp(TimeUnit::Millisecond, Some("Australia/Sydney".into())), true, ), Field::new("c2", DataType::Timestamp(TimeUnit::Millisecond, None), true), @@ -85,7 +79,7 @@ fn test_export_csv_timestamps_using_rfc3339() { // vec![Some(1555584887378), Some(1635577147000)], ) - .with_timezone("Australia/Sydney".to_string()); + .with_timezone("Australia/Sydney"); let c2 = TimestampMillisecondArray::from(vec![Some(1555584887378), Some(1635577147000)]); let batch = diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs index a000a4656bf9..a63d859aaf7b 100644 --- a/parquet/src/arrow/schema/mod.rs +++ b/parquet/src/arrow/schema/mod.rs @@ -327,7 +327,7 @@ fn arrow_to_parquet_type(field: &Field) -> Result { Type::primitive_type_builder(name, PhysicalType::INT64) .with_logical_type(Some(LogicalType::Timestamp { // If timezone set, values are normalized to UTC timezone - is_adjusted_to_u_t_c: matches!(tz, Some(z) if !z.as_str().is_empty()), + is_adjusted_to_u_t_c: matches!(tz, Some(z) if !z.as_ref().is_empty()), unit: match time_unit { TimeUnit::Second => unreachable!(), TimeUnit::Millisecond => { @@ -507,7 +507,9 @@ fn arrow_to_parquet_type(field: &Field) -> Result { let dict_field = Field::new(name, *value.clone(), field.is_nullable()); arrow_to_parquet_type(&dict_field) } - DataType::RunEndEncoded(_, _) => Err(arrow_err!("Converting RunEndEncodedType to parquet not supported",)) + DataType::RunEndEncoded(_, _) => Err(arrow_err!( + "Converting RunEndEncodedType to parquet not supported", + )), } } @@ -641,7 +643,7 @@ mod tests { ProjectionMask::all(), None, ) - .unwrap(); + .unwrap(); assert_eq!(&arrow_fields, converted_arrow_schema.fields()); } @@ -1317,7 +1319,7 @@ mod tests { ), Field::new( "ts_nano", - DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".to_string())), + DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())), false, ), Field::new( @@ -1343,20 +1345,9 @@ mod tests { ))), false, ), - Field::new( - "decimal_int32", - DataType::Decimal128(8, 2), - false, - ), - Field::new( - "decimal_int64", - DataType::Decimal128(16, 2), - false, - ), - Field::new( - "decimal_fix_length", - DataType::Decimal128(30, 2), - false, ), + Field::new("decimal_int32", DataType::Decimal128(8, 2), false), + Field::new("decimal_int64", DataType::Decimal128(16, 2), false), + Field::new("decimal_fix_length", DataType::Decimal128(30, 2), false), ]; assert_eq!(arrow_fields, converted_arrow_fields); @@ -1447,27 +1438,27 @@ mod tests { ), Field::new( "ts_seconds", - DataType::Timestamp(TimeUnit::Second, Some("UTC".to_string())), + DataType::Timestamp(TimeUnit::Second, Some("UTC".into())), false, ), Field::new( "ts_micro_utc", - DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".to_string())), + DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into())), false, ), Field::new( "ts_millis_zero_offset", - DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".to_string())), + DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())), false, ), Field::new( "ts_millis_zero_negative_offset", - DataType::Timestamp(TimeUnit::Millisecond, Some("-00:00".to_string())), + DataType::Timestamp(TimeUnit::Millisecond, Some("-00:00".into())), false, ), Field::new( "ts_micro_non_utc", - DataType::Timestamp(TimeUnit::Microsecond, Some("+01:00".to_string())), + DataType::Timestamp(TimeUnit::Microsecond, Some("+01:00".into())), false, ), Field::new( @@ -1492,18 +1483,9 @@ mod tests { DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)), false, ), - Field::new( - "decimal_int32", - DataType::Decimal128(8, 2), - false), - Field::new("decimal_int64", - DataType::Decimal128(16, 2), - false), - Field::new( - "decimal_fix_length", - DataType::Decimal128(30, 2), - false, - ), + Field::new("decimal_int32", DataType::Decimal128(8, 2), false), + Field::new("decimal_int64", DataType::Decimal128(16, 2), false), + Field::new("decimal_fix_length", DataType::Decimal128(30, 2), false), ]; let arrow_schema = Schema::new(arrow_fields); let converted_arrow_schema = arrow_to_parquet_schema(&arrow_schema).unwrap(); @@ -1594,14 +1576,14 @@ mod tests { Field::new("c15", DataType::Timestamp(TimeUnit::Second, None), false), Field::new( "c16", - DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".to_string())), + DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".into())), false, ), Field::new( "c17", DataType::Timestamp( TimeUnit::Microsecond, - Some("Africa/Johannesburg".to_string()), + Some("Africa/Johannesburg".into()), ), false, ), diff --git a/parquet/src/arrow/schema/primitive.rs b/parquet/src/arrow/schema/primitive.rs index e95db2b033e5..6565f7eaeefb 100644 --- a/parquet/src/arrow/schema/primitive.rs +++ b/parquet/src/arrow/schema/primitive.rs @@ -198,7 +198,7 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result TimeUnit::Nanosecond, }, if is_adjusted_to_u_t_c { - Some("UTC".to_string()) + Some("UTC".into()) } else { None },