Skip to content

Commit 2973fcc

Browse files
committed
Address code review comments
1 parent e7e624b commit 2973fcc

2 files changed

Lines changed: 59 additions & 59 deletions

File tree

cpp/src/arrow/compute/kernels/hash-test.cc

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
#include "arrow/compute/kernels/util-internal.h"
4444
#include "arrow/compute/test-util.h"
4545

46+
#include "arrow/ipc/json-simple.h"
47+
4648
using std::shared_ptr;
4749
using std::vector;
4850

@@ -72,12 +74,13 @@ void CheckValueCountsNull(FunctionContext* ctx, const shared_ptr<DataType>& type
7274
input.value =
7375
ArrayData::Make(type, 0 /* length */, std::move(data_buffers), 0 /* null_count */);
7476

75-
shared_ptr<Array> ex_values = _MakeArray<Type, T>(type, {}, {});
76-
shared_ptr<Array> ex_counts = _MakeArray<Int64Type, int64_t>(int64(), {}, {});
77+
shared_ptr<Array> ex_values = ArrayFromJSON(type, "[]");
78+
shared_ptr<Array> ex_counts = ArrayFromJSON(int64(), "[]");
7779

7880
shared_ptr<Array> result;
7981
ASSERT_OK(ValueCounts(ctx, input, &result));
8082
auto result_struct = std::dynamic_pointer_cast<StructArray>(result);
83+
ASSERT_NE(result_struct->GetFieldByName(kValuesFieldName), nullptr);
8184
// TODO: We probably shouldn't rely on value ordering.
8285
ASSERT_ARRAYS_EQUAL(*ex_values, *result_struct->GetFieldByName(kValuesFieldName));
8386
ASSERT_ARRAYS_EQUAL(*ex_counts, *result_struct->GetFieldByName(kCountsFieldName));
@@ -94,7 +97,7 @@ void CheckValueCounts(FunctionContext* ctx, const shared_ptr<DataType>& type,
9497
_MakeArray<Int64Type, int64_t>(int64(), out_counts, out_is_valid);
9598

9699
shared_ptr<Array> result;
97-
ASSERT_OK(ValueCounts(ctx, Datum(input), &result));
100+
ASSERT_OK(ValueCounts(ctx, input, &result));
98101
auto result_struct = std::dynamic_pointer_cast<StructArray>(result);
99102
// TODO: We probably shouldn't rely on value ordering.
100103
ASSERT_ARRAYS_EQUAL(*ex_values, *result_struct->field(kValuesFieldIndex));
@@ -453,7 +456,7 @@ TEST_F(TestHashKernel, ChunkedArrayInvoke) {
453456

454457
// Unique counts
455458
shared_ptr<Array> counts_array;
456-
ASSERT_OK(ValueCounts(&this->ctx_, Datum(carr), &counts_array));
459+
ASSERT_OK(ValueCounts(&this->ctx_, carr, &counts_array));
457460
auto counts_struct = std::dynamic_pointer_cast<StructArray>(counts_array);
458461
ASSERT_ARRAYS_EQUAL(*ex_dict, *counts_struct->field(0));
459462
ASSERT_ARRAYS_EQUAL(*ex_counts, *counts_struct->field(1));

cpp/src/arrow/compute/kernels/hash.cc

Lines changed: 52 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -82,17 +82,13 @@ class UniqueAction : public ActionBase {
8282

8383
Status Reserve(const int64_t length) { return Status::OK(); }
8484

85-
Status ObserveNull() { return Status::OK(); }
85+
void ObserveNull() {}
8686

8787
template <class Index>
88-
Status ObserveFound(Index index) {
89-
return Status::OK();
90-
}
88+
void ObserveFound(Index index) {}
9189

9290
template <class Index>
93-
Status ObserveNotFound(Index index) {
94-
return Status::OK();
95-
}
91+
void ObserveNotFound(Index index, Status* err_status) {}
9692

9793
Status Flush(Datum* out) { return Status::OK(); }
9894

@@ -115,12 +111,13 @@ class ValueCountsAction : ActionBase {
115111
// builder size is independent of input array size.
116112
return Status::OK();
117113
}
114+
118115
Status Reset() {
119116
count_builder_.Reset();
120117
return Status::OK();
121118
}
122119

123-
// Don't do anything on flush beceause we don't want to finalize the builder
120+
// Don't do anything on flush because we don't want to finalize the builder
124121
// or incur the cost of memory copies.
125122
Status Flush(Datum* out) { return Status::OK(); }
126123

@@ -134,17 +131,19 @@ class ValueCountsAction : ActionBase {
134131
return Status::OK();
135132
}
136133

137-
Status ObserveNull() { return Status::OK(); }
134+
void ObserveNull() {}
138135

139136
template <class Index>
140-
Status ObserveFound(Index slot) {
137+
void ObserveFound(Index slot) {
141138
count_builder_[slot]++;
142-
return Status::OK();
143139
}
144140

145141
template <class Index>
146-
Status ObserveNotFound(Index slot) {
147-
return count_builder_.Append(1);
142+
void ObserveNotFound(Index slot, Status* status) {
143+
Status s = count_builder_.Append(1);
144+
if (ARROW_PREDICT_FALSE(!s.ok())) {
145+
*status = s;
146+
}
148147
}
149148

150149
private:
@@ -166,20 +165,16 @@ class DictEncodeAction : public ActionBase {
166165

167166
Status Reserve(const int64_t length) { return indices_builder_.Reserve(length); }
168167

169-
Status ObserveNull() {
170-
indices_builder_.UnsafeAppendNull();
171-
return Status::OK();
172-
}
168+
void ObserveNull() { indices_builder_.UnsafeAppendNull(); }
173169

174170
template <class Index>
175-
Status ObserveFound(Index index) {
171+
void ObserveFound(Index index) {
176172
indices_builder_.UnsafeAppend(index);
177-
return Status::OK();
178173
}
179174

180175
template <class Index>
181-
Status ObserveNotFound(Index index) {
182-
return ObserveFound(index);
176+
void ObserveNotFound(Index index, Status* status) {
177+
ObserveFound(index);
183178
}
184179

185180
Status Flush(Datum* out) {
@@ -268,15 +263,17 @@ class RegularHashKernelImpl : public HashKernelImpl {
268263
0 /* start_offset */, out);
269264
}
270265

271-
Status VisitNull() { return action_.ObserveNull(); }
266+
Status VisitNull() {
267+
action_.ObserveNull();
268+
return Status::Status::OK();
269+
}
272270

273271
Status VisitValue(const Scalar& value) {
272+
auto on_found = [this](int32_t memo_index) { action_.ObserveFound(memo_index); };
273+
274274
Status status;
275-
auto on_found = [this, &status](int32_t memo_index) {
276-
status = action_.ObserveFound(memo_index);
277-
};
278275
auto on_not_found = [this, &status](int32_t memo_index) {
279-
status = action_.ObserveNotFound(memo_index);
276+
action_.ObserveNotFound(memo_index, &status);
280277
};
281278
memo_table_->GetOrInsert(value, on_found, on_not_found);
282279
return status;
@@ -307,7 +304,7 @@ class NullHashKernelImpl : public HashKernelImpl {
307304
Status Append(const ArrayData& arr) override {
308305
RETURN_NOT_OK(action_.Reserve(arr.length));
309306
for (int64_t i = 0; i < arr.length; ++i) {
310-
RETURN_NOT_OK(action_.ObserveNull());
307+
action_.ObserveNull();
311308
}
312309
return Status::OK();
313310
}
@@ -363,27 +360,27 @@ struct HashKernelTraits<Type, Action, enable_if_fixed_size_binary<Type>> {
363360

364361
} // namespace
365362

366-
#define PROCESS_SUPPORTED_HASH_TYPES \
367-
PROCESS(NullType) \
368-
PROCESS(BooleanType) \
369-
PROCESS(UInt8Type) \
370-
PROCESS(Int8Type) \
371-
PROCESS(UInt16Type) \
372-
PROCESS(Int16Type) \
373-
PROCESS(UInt32Type) \
374-
PROCESS(Int32Type) \
375-
PROCESS(UInt64Type) \
376-
PROCESS(Int64Type) \
377-
PROCESS(FloatType) \
378-
PROCESS(DoubleType) \
379-
PROCESS(Date32Type) \
380-
PROCESS(Date64Type) \
381-
PROCESS(Time32Type) \
382-
PROCESS(Time64Type) \
383-
PROCESS(TimestampType) \
384-
PROCESS(BinaryType) \
385-
PROCESS(StringType) \
386-
PROCESS(FixedSizeBinaryType) \
363+
#define PROCESS_SUPPORTED_HASH_TYPES(PROCESS) \
364+
PROCESS(NullType) \
365+
PROCESS(BooleanType) \
366+
PROCESS(UInt8Type) \
367+
PROCESS(Int8Type) \
368+
PROCESS(UInt16Type) \
369+
PROCESS(Int16Type) \
370+
PROCESS(UInt32Type) \
371+
PROCESS(Int32Type) \
372+
PROCESS(UInt64Type) \
373+
PROCESS(Int64Type) \
374+
PROCESS(FloatType) \
375+
PROCESS(DoubleType) \
376+
PROCESS(Date32Type) \
377+
PROCESS(Date64Type) \
378+
PROCESS(Time32Type) \
379+
PROCESS(Time64Type) \
380+
PROCESS(TimestampType) \
381+
PROCESS(BinaryType) \
382+
PROCESS(StringType) \
383+
PROCESS(FixedSizeBinaryType) \
387384
PROCESS(Decimal128Type)
388385

389386
Status GetUniqueKernel(FunctionContext* ctx, const std::shared_ptr<DataType>& type,
@@ -396,7 +393,7 @@ Status GetUniqueKernel(FunctionContext* ctx, const std::shared_ptr<DataType>& ty
396393
type, ctx->memory_pool())); \
397394
break;
398395

399-
PROCESS_SUPPORTED_HASH_TYPES
396+
PROCESS_SUPPORTED_HASH_TYPES(PROCESS)
400397
#undef PROCESS
401398
default:
402399
break;
@@ -421,7 +418,7 @@ Status GetDictionaryEncodeKernel(FunctionContext* ctx,
421418
type, ctx->memory_pool())); \
422419
break;
423420

424-
PROCESS_SUPPORTED_HASH_TYPES
421+
PROCESS_SUPPORTED_HASH_TYPES(PROCESS)
425422
#undef PROCESS
426423
default:
427424
break;
@@ -447,7 +444,7 @@ Status GetValueCountsKernel(FunctionContext* ctx, const std::shared_ptr<DataType
447444
type, ctx->memory_pool())); \
448445
break;
449446

450-
PROCESS_SUPPORTED_HASH_TYPES
447+
PROCESS_SUPPORTED_HASH_TYPES(PROCESS)
451448
#undef PROCESS
452449
default:
453450
break;
@@ -506,8 +503,8 @@ Status DictionaryEncode(FunctionContext* ctx, const Datum& value, Datum* out) {
506503
return Status::OK();
507504
}
508505

509-
const char kValuesFieldName[] = "Values";
510-
const char kCountsFieldName[] = "Counts";
506+
const char kValuesFieldName[] = "values";
507+
const char kCountsFieldName[] = "counts";
511508
const int32_t kValuesFieldIndex = 0;
512509
const int32_t kCountsFieldIndex = 1;
513510
Status ValueCounts(FunctionContext* ctx, const Datum& value,
@@ -524,8 +521,8 @@ Status ValueCounts(FunctionContext* ctx, const Datum& value,
524521
RETURN_NOT_OK(func->FlushFinal(&value_counts));
525522

526523
auto data_type = std::make_shared<StructType>(std::vector<std::shared_ptr<Field>>{
527-
std::make_shared<Field>("Values", uniques->type()),
528-
std::make_shared<Field>("Counts", int64())});
524+
std::make_shared<Field>(kValuesFieldName, uniques->type()),
525+
std::make_shared<Field>(kCountsFieldName, int64())});
529526
*counts = std::make_shared<StructArray>(
530527
data_type, uniques->length(),
531528
std::vector<std::shared_ptr<Array>>{uniques, MakeArray(value_counts.array())});

0 commit comments

Comments
 (0)