From cbdbba54a7a7131a0ddf214cc4656cc4e4691fd6 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 25 Feb 2021 19:30:21 +0100 Subject: [PATCH] ARROW-11662: [C++] Support sorting decimal and fixed size binary data Also enable nth_to_indices on decimal and fixed size binary data. --- c_glib/test/test-decimal128-data-type.rb | 4 +- .../arrow/compute/kernels/codegen_internal.cc | 9 + .../arrow/compute/kernels/codegen_internal.h | 32 ++ cpp/src/arrow/compute/kernels/vector_sort.cc | 79 ++-- .../arrow/compute/kernels/vector_sort_test.cc | 385 ++++++++++++------ cpp/src/arrow/testing/gtest_util.h | 2 + cpp/src/arrow/testing/random.cc | 42 +- cpp/src/arrow/testing/random.h | 11 + cpp/src/arrow/type.cc | 2 +- cpp/src/arrow/type.h | 4 +- cpp/src/arrow/type_test.cc | 6 +- docs/source/cpp/compute.rst | 3 +- .../test/test-decimal128-data-type.rb | 4 +- 13 files changed, 432 insertions(+), 151 deletions(-) diff --git a/c_glib/test/test-decimal128-data-type.rb b/c_glib/test/test-decimal128-data-type.rb index a02e3badca0..b27e1cad1ea 100644 --- a/c_glib/test/test-decimal128-data-type.rb +++ b/c_glib/test/test-decimal128-data-type.rb @@ -23,12 +23,12 @@ def test_type def test_name data_type = Arrow::Decimal128DataType.new(2, 0) - assert_equal("decimal", data_type.name) + assert_equal("decimal128", data_type.name) end def test_to_s data_type = Arrow::Decimal128DataType.new(2, 0) - assert_equal("decimal(2, 0)", data_type.to_s) + assert_equal("decimal128(2, 0)", data_type.to_s) end def test_precision diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.cc b/cpp/src/arrow/compute/kernels/codegen_internal.cc index b321ff3fc8b..ad43b7a3aa9 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.cc +++ b/cpp/src/arrow/compute/kernels/codegen_internal.cc @@ -48,6 +48,7 @@ std::vector> g_numeric_types; std::vector> g_base_binary_types; std::vector> g_temporal_types; std::vector> g_primitive_types; +std::vector g_decimal_type_ids; static std::once_flag codegen_static_initialized; template @@ -71,6 +72,9 @@ static void InitStaticData() { // Floating point types g_floating_types = {float32(), float64()}; + // Decimal types + g_decimal_type_ids = {Type::DECIMAL128, Type::DECIMAL256}; + // Numeric types Extend(g_int_types, &g_numeric_types); Extend(g_floating_types, &g_numeric_types); @@ -132,6 +136,11 @@ const std::vector>& FloatingPointTypes() { return g_floating_types; } +const std::vector& DecimalTypeIds() { + std::call_once(codegen_static_initialized, InitStaticData); + return g_decimal_type_ids; +} + const std::vector& AllTimeUnits() { static std::vector units = {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO}; diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h index 8c49e796623..9e2ed82a426 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.h +++ b/cpp/src/arrow/compute/kernels/codegen_internal.h @@ -188,6 +188,16 @@ struct GetViewType { } }; +template <> +struct GetViewType { + using T = Decimal256; + using PhysicalType = util::string_view; + + static T LogicalValue(PhysicalType value) { + return Decimal256(reinterpret_cast(value.data())); + } +}; + template struct GetOutputType; @@ -206,6 +216,11 @@ struct GetOutputType { using T = Decimal128; }; +template <> +struct GetOutputType { + using T = Decimal256; +}; + // ---------------------------------------------------------------------- // Iteration / value access utilities @@ -396,6 +411,7 @@ const std::vector>& SignedIntTypes(); const std::vector>& UnsignedIntTypes(); const std::vector>& IntTypes(); const std::vector>& FloatingPointTypes(); +const std::vector& DecimalTypeIds(); ARROW_EXPORT const std::vector& AllTimeUnits(); @@ -1185,6 +1201,22 @@ ArrayKernelExec GenerateTemporal(detail::GetTypeId get_id) { } } +// Generate a kernel given a templated functor for decimal types +// +// See "Numeric" above for description of the generator functor +template