diff --git a/cpp/src/arrow/compute/row/compare_internal_avx2.cc b/cpp/src/arrow/compute/row/compare_internal_avx2.cc index ec511aa03a6..23238a3691c 100644 --- a/cpp/src/arrow/compute/row/compare_internal_avx2.cc +++ b/cpp/src/arrow/compute/row/compare_internal_avx2.cc @@ -180,6 +180,40 @@ uint32_t KeyCompare::NullUpdateColumnToRowImp_avx2( } } +namespace { + +// Intrinsics `_mm256_i32gather_epi32/64` treat the `vindex` as signed integer, and we +// are using `uint32_t` to represent the offset, in range of [0, 4G), within the row +// table. When the offset is larger than `0x80000000` (2GB), those intrinsics will treat +// it as negative offset and gather the data from undesired address. To avoid this issue, +// we normalize the addresses by translating `base` `0x80000000` higher, and `offset` +// `0x80000000` lower. This way, the offset is always in range of [-2G, 2G) and those +// intrinsics are safe. + +constexpr uint64_t kTwoGB = 0x80000000ull; + +template +inline __m256i UnsignedOffsetSafeGather32(int const* base, __m256i offset) { + int const* normalized_base = base + kTwoGB / sizeof(int); + __m256i normalized_offset = + _mm256_sub_epi32(offset, _mm256_set1_epi32(static_cast(kTwoGB / kScale))); + return _mm256_i32gather_epi32(normalized_base, normalized_offset, + static_cast(kScale)); +} + +template +inline __m256i UnsignedOffsetSafeGather64(arrow::util::int64_for_gather_t const* base, + __m128i offset) { + arrow::util::int64_for_gather_t const* normalized_base = + base + kTwoGB / sizeof(arrow::util::int64_for_gather_t); + __m128i normalized_offset = + _mm_sub_epi32(offset, _mm_set1_epi32(static_cast(kTwoGB / kScale))); + return _mm256_i32gather_epi64(normalized_base, normalized_offset, + static_cast(kScale)); +} + +} // namespace + template uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2( uint32_t offset_within_row, uint32_t num_rows_to_compare, @@ -236,10 +270,8 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2( irow_right = _mm256_loadu_si256(reinterpret_cast(left_to_right_map) + i); } - // TODO: Need to test if this gather is OK when irow_right is larger than - // 0x80000000u. __m256i offset_right = - _mm256_i32gather_epi32((const int*)offsets_right, irow_right, 4); + UnsignedOffsetSafeGather32<4>((int const*)offsets_right, irow_right); offset_right = _mm256_add_epi32(offset_right, _mm256_set1_epi32(offset_within_row)); reinterpret_cast(match_bytevector)[i] = @@ -253,40 +285,6 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2( } } -namespace { - -// Intrinsics `_mm256_i32gather_epi32/64` treat the `vindex` as signed integer, and we -// are using `uint32_t` to represent the offset, in range of [0, 4G), within the row -// table. When the offset is larger than `0x80000000` (2GB), those intrinsics will treat -// it as negative offset and gather the data from undesired address. To avoid this issue, -// we normalize the addresses by translating `base` `0x80000000` higher, and `offset` -// `0x80000000` lower. This way, the offset is always in range of [-2G, 2G) and those -// intrinsics are safe. - -constexpr uint64_t kTwoGB = 0x80000000ull; - -template -inline __m256i UnsignedOffsetSafeGather32(int const* base, __m256i offset) { - int const* normalized_base = base + kTwoGB / sizeof(int); - __m256i normalized_offset = - _mm256_sub_epi32(offset, _mm256_set1_epi32(static_cast(kTwoGB / kScale))); - return _mm256_i32gather_epi32(normalized_base, normalized_offset, - static_cast(kScale)); -} - -template -inline __m256i UnsignedOffsetSafeGather64(arrow::util::int64_for_gather_t const* base, - __m128i offset) { - arrow::util::int64_for_gather_t const* normalized_base = - base + kTwoGB / sizeof(arrow::util::int64_for_gather_t); - __m128i normalized_offset = - _mm_sub_epi32(offset, _mm_set1_epi32(static_cast(kTwoGB / kScale))); - return _mm256_i32gather_epi64(normalized_base, normalized_offset, - static_cast(kScale)); -} - -} // namespace - template inline uint64_t CompareSelected8_avx2(const uint8_t* left_base, const uint8_t* right_base, __m256i irow_left, __m256i offset_right, diff --git a/cpp/src/arrow/compute/row/row_internal.cc b/cpp/src/arrow/compute/row/row_internal.cc index 469205e9b00..97697afea93 100644 --- a/cpp/src/arrow/compute/row/row_internal.cc +++ b/cpp/src/arrow/compute/row/row_internal.cc @@ -325,6 +325,7 @@ Status RowTableImpl::AppendSelectionFrom(const RowTableImpl& from, // Varying-length rows auto from_offsets = reinterpret_cast(from.offsets_->data()); auto to_offsets = reinterpret_cast(offsets_->mutable_data()); + // TODO(GH-43202): The following two variables are possibly overflowing. uint32_t total_length = to_offsets[num_rows_]; uint32_t total_length_to_append = 0; for (uint32_t i = 0; i < num_rows_to_append; ++i) {