From a66cbdfcdfd82d76f30ccf8cc67cc0e7af8ad5e1 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sat, 27 Jun 2020 17:08:56 -0500 Subject: [PATCH] Expose total_values_length functions on BinaryArray, LargeBinaryArray in Python --- python/pyarrow/array.pxi | 14 ++++++++++++++ python/pyarrow/includes/libarrow.pxd | 2 ++ python/pyarrow/tests/test_array.py | 12 ++++++++++++ 3 files changed, 28 insertions(+) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 9ad841560df..f5478f93944 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1789,12 +1789,26 @@ cdef class BinaryArray(Array): """ Concrete class for Arrow arrays of variable-sized binary data type. """ + @property + def total_values_length(self): + """ + The number of bytes from beginning to end of the data buffer addressed + by the offsets of this BinaryArray. + """ + return ( self.ap).total_values_length() cdef class LargeBinaryArray(Array): """ Concrete class for Arrow arrays of large variable-sized binary data type. """ + @property + def total_values_length(self): + """ + The number of bytes from beginning to end of the data buffer addressed + by the offsets of this LargeBinaryArray. + """ + return ( self.ap).total_values_length() cdef class DictionaryArray(Array): diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 7f416cbe7fd..93c43ca99b4 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -593,12 +593,14 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: shared_ptr[CBuffer] value_data() int32_t value_offset(int64_t i) int32_t value_length(int64_t i) + int32_t total_values_length() cdef cppclass CLargeBinaryArray" arrow::LargeBinaryArray"(CArray): const uint8_t* GetValue(int i, int64_t* length) shared_ptr[CBuffer] value_data() int64_t value_offset(int64_t i) int64_t value_length(int64_t i) + int64_t total_values_length() cdef cppclass CStringArray" arrow::StringArray"(CBinaryArray): CStringArray(int64_t length, shared_ptr[CBuffer] value_offsets, diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 5d806c9956e..c7b4116257d 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -110,6 +110,18 @@ def test_binary_format(): assert result == expected +def test_binary_total_values_length(): + arr = pa.array([b'0000', None, b'11111', b'222222', b'3333333'], + type='binary') + large_arr = pa.array([b'0000', None, b'11111', b'222222', b'3333333'], + type='large_binary') + + assert arr.total_values_length == 22 + assert arr.slice(1, 3).total_values_length == 11 + assert large_arr.total_values_length == 22 + assert large_arr.slice(1, 3).total_values_length == 11 + + def test_to_numpy_zero_copy(): arr = pa.array(range(10))