|
19 | 19 | //! |
20 | 20 | //! This file essentially exists to ease the transition onto arrow2 |
21 | 21 |
|
22 | | -use arrow::array::{ArrayData, PrimitiveArray}; |
23 | | -use arrow::buffer::Buffer; |
24 | | -use arrow::datatypes::ArrowPrimitiveType; |
| 22 | +use arrow::array::{ |
| 23 | + ArrayData, BinaryOffsetSizeTrait, BooleanArray, GenericBinaryArray, |
| 24 | + GenericStringArray, PrimitiveArray, StringOffsetSizeTrait, |
| 25 | +}; |
| 26 | +use arrow::buffer::{Buffer, MutableBuffer}; |
| 27 | +use arrow::datatypes::{ArrowPrimitiveType, DataType}; |
| 28 | +use arrow::util::bit_util; |
25 | 29 |
|
26 | 30 | /// A trait to define from_slice functions for arrow primitive array types |
27 | | -pub trait FromSlice<T> |
| 31 | +pub trait FromSlice<S, E> |
28 | 32 | where |
29 | | - T: ArrowPrimitiveType, |
| 33 | + S: AsRef<[E]>, |
30 | 34 | { |
31 | 35 | /// convert a slice of native types into a primitive array (without nulls) |
32 | | - fn from_slice(slice: &[T::Native]) -> PrimitiveArray<T>; |
| 36 | + fn from_slice(slice: S) -> Self; |
33 | 37 | } |
34 | 38 |
|
35 | | -/// default implementation for primitive types |
36 | | -// #[cfg(test)] |
37 | | -impl<T: ArrowPrimitiveType> FromSlice<T> for PrimitiveArray<T> { |
38 | | - fn from_slice(slice: &[T::Native]) -> PrimitiveArray<T> { |
| 39 | +/// default implementation for primitive array types, adapted from `From<Vec<_>>` |
| 40 | +impl<S, T> FromSlice<S, T::Native> for PrimitiveArray<T> |
| 41 | +where |
| 42 | + T: ArrowPrimitiveType, |
| 43 | + S: AsRef<[T::Native]>, |
| 44 | +{ |
| 45 | + fn from_slice(slice: S) -> Self { |
| 46 | + let slice = slice.as_ref(); |
39 | 47 | let array_data = ArrayData::builder(T::DATA_TYPE) |
40 | 48 | .len(slice.len()) |
41 | 49 | .add_buffer(Buffer::from_slice_ref(&slice)); |
42 | 50 | let array_data = unsafe { array_data.build_unchecked() }; |
43 | | - PrimitiveArray::<T>::from(array_data) |
| 51 | + Self::from(array_data) |
| 52 | + } |
| 53 | +} |
| 54 | + |
| 55 | +/// default implementation for binary array types, adapted from `From<Vec<_>>` |
| 56 | +impl<S, I, OffsetSize> FromSlice<S, I> for GenericBinaryArray<OffsetSize> |
| 57 | +where |
| 58 | + OffsetSize: BinaryOffsetSizeTrait, |
| 59 | + S: AsRef<[I]>, |
| 60 | + I: AsRef<[u8]>, |
| 61 | +{ |
| 62 | + fn from_slice(slice: S) -> Self { |
| 63 | + let slice = slice.as_ref(); |
| 64 | + let mut offsets = Vec::with_capacity(slice.len() + 1); |
| 65 | + let mut values = Vec::new(); |
| 66 | + let mut length_so_far: OffsetSize = OffsetSize::zero(); |
| 67 | + offsets.push(length_so_far); |
| 68 | + for s in slice { |
| 69 | + let s = s.as_ref(); |
| 70 | + length_so_far += OffsetSize::from_usize(s.len()).unwrap(); |
| 71 | + offsets.push(length_so_far); |
| 72 | + values.extend_from_slice(s); |
| 73 | + } |
| 74 | + let array_data = ArrayData::builder(OffsetSize::DATA_TYPE) |
| 75 | + .len(slice.len()) |
| 76 | + .add_buffer(Buffer::from_slice_ref(&offsets)) |
| 77 | + .add_buffer(Buffer::from_slice_ref(&values)); |
| 78 | + let array_data = unsafe { array_data.build_unchecked() }; |
| 79 | + Self::from(array_data) |
| 80 | + } |
| 81 | +} |
| 82 | + |
| 83 | +/// default implementation for utf8 array types, adapted from `From<Vec<_>>` |
| 84 | +impl<S, I, OffsetSize> FromSlice<S, I> for GenericStringArray<OffsetSize> |
| 85 | +where |
| 86 | + OffsetSize: StringOffsetSizeTrait, |
| 87 | + S: AsRef<[I]>, |
| 88 | + I: AsRef<str>, |
| 89 | +{ |
| 90 | + fn from_slice(slice: S) -> Self { |
| 91 | + let slice = slice.as_ref(); |
| 92 | + let mut offsets = |
| 93 | + MutableBuffer::new((slice.len() + 1) * std::mem::size_of::<OffsetSize>()); |
| 94 | + let mut values = MutableBuffer::new(0); |
| 95 | + |
| 96 | + let mut length_so_far = OffsetSize::zero(); |
| 97 | + offsets.push(length_so_far); |
| 98 | + |
| 99 | + for s in slice { |
| 100 | + let s = s.as_ref(); |
| 101 | + length_so_far += OffsetSize::from_usize(s.len()).unwrap(); |
| 102 | + offsets.push(length_so_far); |
| 103 | + values.extend_from_slice(s.as_bytes()); |
| 104 | + } |
| 105 | + let array_data = ArrayData::builder(OffsetSize::DATA_TYPE) |
| 106 | + .len(slice.len()) |
| 107 | + .add_buffer(offsets.into()) |
| 108 | + .add_buffer(values.into()); |
| 109 | + let array_data = unsafe { array_data.build_unchecked() }; |
| 110 | + Self::from(array_data) |
| 111 | + } |
| 112 | +} |
| 113 | + |
| 114 | +/// default implementation for boolean array type, adapted from `From<Vec<bool>>` |
| 115 | +impl<S> FromSlice<S, bool> for BooleanArray |
| 116 | +where |
| 117 | + S: AsRef<[bool]>, |
| 118 | +{ |
| 119 | + fn from_slice(slice: S) -> Self { |
| 120 | + let slice = slice.as_ref(); |
| 121 | + let mut mut_buf = MutableBuffer::new_null(slice.len()); |
| 122 | + { |
| 123 | + let mut_slice = mut_buf.as_slice_mut(); |
| 124 | + for (i, b) in slice.iter().enumerate() { |
| 125 | + if *b { |
| 126 | + bit_util::set_bit(mut_slice, i); |
| 127 | + } |
| 128 | + } |
| 129 | + } |
| 130 | + let array_data = ArrayData::builder(DataType::Boolean) |
| 131 | + .len(slice.len()) |
| 132 | + .add_buffer(mut_buf.into()); |
| 133 | + |
| 134 | + let array_data = unsafe { array_data.build_unchecked() }; |
| 135 | + Self::from(array_data) |
44 | 136 | } |
45 | 137 | } |
0 commit comments