From f1ace777b56cba104026b59dad0d1616e8caab0f Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Fri, 28 Feb 2025 17:16:10 -0800 Subject: [PATCH 1/3] Update [ghstack-poisoned] --- kernels/portable/cpu/op_argmax.cpp | 5 ++++- kernels/portable/cpu/op_argmin.cpp | 12 +++++++++++- kernels/test/op_argmax_test.cpp | 13 +++++++++++++ kernels/test/op_argmin_test.cpp | 13 +++++++++++++ 4 files changed, 41 insertions(+), 2 deletions(-) diff --git a/kernels/portable/cpu/op_argmax.cpp b/kernels/portable/cpu/op_argmax.cpp index 5eb656d5b76..7e95c305e89 100644 --- a/kernels/portable/cpu/op_argmax.cpp +++ b/kernels/portable/cpu/op_argmax.cpp @@ -49,7 +49,10 @@ Tensor& argmax_out( for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { std::tuple acc = reduce_over_dim( [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) { - if (!std::isnan(acc_val) && (std::isnan(v) || v > acc_val)) { + // the below condition as written is equivalent to + // !isnan(accval) && (isnan(v) || v > acc_val). See + // argument in op_argmin.cpp. + if (!std::isnan(acc_val) && !(v <= acc_val)) { acc_val = v; acc_ix = ix; } diff --git a/kernels/portable/cpu/op_argmin.cpp b/kernels/portable/cpu/op_argmin.cpp index 1c4a2572ea8..8a94994c4be 100644 --- a/kernels/portable/cpu/op_argmin.cpp +++ b/kernels/portable/cpu/op_argmin.cpp @@ -49,7 +49,17 @@ Tensor& argmin_out( for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { std::tuple acc = reduce_over_dim( [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) { - if (!std::isnan(acc_val) && (std::isnan(v) || v < acc_val)) { + // the below condition as written is equivalent to !isnan(accval) && + // (isnan(v) || v < acc_val). cases: + // - if neither acc_val nor v is NaN, !(v >= acc_val) is + // trivially equivalent to v < acc_val. + // - if acc_val is NaN, the whole thing is trivially false. + // - if acc_val is not NaN and v is NaN, then v >= acc_val + // - is false because all comparisons involving NaN are + // - false, so the result is true. The result is trivially + // - true for the above condition that uses isnan(v) as + // - well. + if (!std::isnan(acc_val) && !(v >= acc_val)) { acc_val = v; acc_ix = ix; } diff --git a/kernels/test/op_argmax_test.cpp b/kernels/test/op_argmax_test.cpp index 66c79cefff7..4d68dfe88be 100644 --- a/kernels/test/op_argmax_test.cpp +++ b/kernels/test/op_argmax_test.cpp @@ -90,3 +90,16 @@ TEST_F(OpArgmaxTest, SanityCheckNullDim) { EXPECT_TENSOR_EQ(out, expected); // clang-format on } + +TEST_F(OpArgmaxTest, FirstNaNWins) { + TensorFactory tf_float; + Tensor in = tf_float.make({4}, {1, NAN, -4, NAN}); + + TensorFactory tf_long; + Tensor out = tf_long.zeros({}); + Tensor expected = tf_long.make({}, {1}); + + Tensor ret = op_argmax_out(in, {}, false, out); + EXPECT_TENSOR_EQ(out, ret); + EXPECT_TENSOR_EQ(out, expected); +} diff --git a/kernels/test/op_argmin_test.cpp b/kernels/test/op_argmin_test.cpp index 250fe4f7e1e..a0b2699a28f 100644 --- a/kernels/test/op_argmin_test.cpp +++ b/kernels/test/op_argmin_test.cpp @@ -90,3 +90,16 @@ TEST_F(OpArgminTest, SanityCheckNullDim) { EXPECT_TENSOR_EQ(out, expected); // clang-format on } + +TEST_F(OpArgminTest, FirstNaNWins) { + TensorFactory tf_float; + Tensor in = tf_float.make({4}, {1, NAN, -4, NAN}); + + TensorFactory tf_long; + Tensor out = tf_long.zeros({}); + Tensor expected = tf_long.make({}, {1}); + + Tensor ret = op_argmin_out(in, {}, false, out); + EXPECT_TENSOR_EQ(out, ret); + EXPECT_TENSOR_EQ(out, expected); +} From d3a0f675767cf527ac680f32bbaa0b0fe97e8804 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Fri, 28 Feb 2025 17:16:14 -0800 Subject: [PATCH 2/3] Update [ghstack-poisoned] --- .../cpu/util/delinearized_indexes_range.h | 108 ++++++++++++++++++ kernels/portable/cpu/util/targets.bzl | 13 +++ kernels/portable/cpu/util/test/CMakeLists.txt | 4 +- .../test/delinearized_indexes_range_test.cpp | 69 +++++++++++ kernels/portable/cpu/util/test/targets.bzl | 10 ++ test/utils/OSSTestConfig.json | 4 +- 6 files changed, 206 insertions(+), 2 deletions(-) create mode 100644 kernels/portable/cpu/util/delinearized_indexes_range.h create mode 100644 kernels/portable/cpu/util/test/delinearized_indexes_range_test.cpp diff --git a/kernels/portable/cpu/util/delinearized_indexes_range.h b/kernels/portable/cpu/util/delinearized_indexes_range.h new file mode 100644 index 00000000000..c6a9cc91bfb --- /dev/null +++ b/kernels/portable/cpu/util/delinearized_indexes_range.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace torch::executor { + +namespace internal { +class DelinearizedIndexesIterator { + public: + using difference_type = ssize_t; + using value_type = std::array; + using reference = const value_type&; + using pointer = const value_type*; + using iterator_category = std::forward_iterator_tag; + + DelinearizedIndexesIterator() = default; + + explicit DelinearizedIndexesIterator(const Tensor& t) + : idx_(0), dim_(t.dim()), shape_(t.sizes()) { + } + + struct make_end_t { + explicit constexpr make_end_t() = default; + }; + + DelinearizedIndexesIterator(make_end_t, const Tensor& t) + : idx_(t.numel()) {} + + bool operator==(const DelinearizedIndexesIterator& rhs) const { + return idx_ == rhs.idx_; + } + + bool operator!=(const DelinearizedIndexesIterator& rhs) const { + return !operator==(rhs); + } + + reference operator*() const { + return repr_; + } + + pointer operator->() const { + return &repr_; + } + + DelinearizedIndexesIterator& operator++() { + idx_++; + for (auto ii = dim_ - 1; ii >= 0; --ii) { + repr_[ii]++; + ET_DCHECK(repr_[ii] <= shape_[ii]); + if ET_LIKELY (repr_[ii] < shape_[ii]) { + break; + } else { + repr_[ii] = 0; + } + } + return *this; + } + + DelinearizedIndexesIterator operator++(int) { + auto it = *this; + operator++(); + return it; + } + + difference_type operator-(const DelinearizedIndexesIterator& rhs) const { + return difference_type(idx_ - rhs.idx_); + } + + private: + std::size_t idx_ = 0; + value_type repr_ = {0,}; + ssize_t dim_; + ArrayRef shape_; +}; +} // namespace internal + +class DelinearizedIndexesRange { + public: + using iterator = internal::DelinearizedIndexesIterator; + + DelinearizedIndexesRange(const Tensor& t) : + tensor_(t) {} + + iterator begin() const { + return iterator(tensor_); + } + + iterator end() { + return iterator(iterator::make_end_t(), tensor_); + } + private: + const Tensor& tensor_; +}; +} // namespace torch::executor diff --git a/kernels/portable/cpu/util/targets.bzl b/kernels/portable/cpu/util/targets.bzl index eef765d5eec..95970b600ef 100644 --- a/kernels/portable/cpu/util/targets.bzl +++ b/kernels/portable/cpu/util/targets.bzl @@ -280,6 +280,19 @@ def define_common_targets(): visibility = ["//executorch/kernels/portable/cpu/..."], ) + runtime.cxx_library( + name = "delinearized_indexes_range", + exported_headers = ["delinearized_indexes_range.h"], + deps = [ + "//executorch/runtime/core/exec_aten:lib", + "//executorch/runtime/core/exec_aten/util:tensor_dimension_limit", + ], + visibility = [ + "//executorch/...", + "@EXECUTORCH_CLIENTS", + ], + ) + # Utility functions that can be used by operators that perform reduction for aten_mode in get_aten_mode_options(): suffix = "_aten" if aten_mode else "" diff --git a/kernels/portable/cpu/util/test/CMakeLists.txt b/kernels/portable/cpu/util/test/CMakeLists.txt index 5f81e4b6aec..76c53ea8448 100644 --- a/kernels/portable/cpu/util/test/CMakeLists.txt +++ b/kernels/portable/cpu/util/test/CMakeLists.txt @@ -19,7 +19,9 @@ set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../../..) include(${EXECUTORCH_ROOT}/build/Test.cmake) -set(_test_srcs broadcast_test.cpp reduce_test.cpp) +set(_test_srcs broadcast_test.cpp delinearized_indexes_range_test.cpp + reduce_test.cpp +) et_cxx_test( kernels_portable_cpu_util_test SOURCES ${_test_srcs} EXTRA_LIBS diff --git a/kernels/portable/cpu/util/test/delinearized_indexes_range_test.cpp b/kernels/portable/cpu/util/test/delinearized_indexes_range_test.cpp new file mode 100644 index 00000000000..395e1a74d98 --- /dev/null +++ b/kernels/portable/cpu/util/test/delinearized_indexes_range_test.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +#include + +using executorch::aten::ScalarType; +using executorch::aten::Tensor; +using executorch::runtime::testing::TensorFactory; +using torch::executor::DelinearizedIndexesRange; + +TEST(DelinearizedIndexesRangeTest, Empty) { + TensorFactory tf; + + Tensor a = tf.make({0}, {}); + ASSERT_EQ(a.numel(), 0); + bool loop_entered = false; + for (auto _ : DelinearizedIndexesRange(a)) { + loop_entered = true; + } + EXPECT_FALSE(loop_entered); +} + +TEST(DelinearizedIndexesRangeTest, OneD) { + TensorFactory tf; + + Tensor a = tf.zeros({5}); + DelinearizedIndexesRange r(a); + std::vector v(r.begin(), r.end()); + int idx = 0; + for (const auto& elem: v) { + EXPECT_EQ(elem[0], idx++); + } +} + +TEST(DelinearizedIndexesRangeTest, ThreeD) { + TensorFactory tf; + Tensor a = tf.zeros({3, 2, 3}); + DelinearizedIndexesRange r(a); + std::vector v(r.begin(), r.end()); + std::vector expected = { + {0, 0, 0}, + {0, 0, 1}, + {0, 0, 2}, + {0, 1, 0}, + {0, 1, 1}, + {0, 1, 2}, + {1, 0, 0}, + {1, 0, 1}, + {1, 0, 2}, + {1, 1, 0}, + {1, 1, 1}, + {1, 1, 2}, + {2, 0, 0}, + {2, 0, 1}, + {2, 0, 2}, + {2, 1, 0}, + {2, 1, 1}, + {2, 1, 2}, + }; + EXPECT_EQ(v, expected); +} diff --git a/kernels/portable/cpu/util/test/targets.bzl b/kernels/portable/cpu/util/test/targets.bzl index 28988b90dcc..25e16237361 100644 --- a/kernels/portable/cpu/util/test/targets.bzl +++ b/kernels/portable/cpu/util/test/targets.bzl @@ -12,6 +12,16 @@ def define_common_targets(): ], ) + runtime.cxx_test( + name = "delinearized_indexes_range_test", + srcs = ["delinearized_indexes_range_test.cpp"], + deps = [ + "//executorch/kernels/portable/cpu/util:delinearized_indexes_range", + "//executorch/runtime/core/exec_aten:lib", + "//executorch/runtime/core/exec_aten/testing_util:tensor_util", + ], + ) + runtime.cxx_test( name = "reduce_test", srcs = ["reduce_test.cpp"], diff --git a/test/utils/OSSTestConfig.json b/test/utils/OSSTestConfig.json index 70cb2d2e44f..b94f11ea94e 100644 --- a/test/utils/OSSTestConfig.json +++ b/test/utils/OSSTestConfig.json @@ -7,7 +7,8 @@ "op_fast_hadamard_transform_test.cpp" ], "additional_libs": [ - "custom_ops" + "custom_ops", + "dumb_fht" ] }, { @@ -62,6 +63,7 @@ "directory": "kernels/portable/cpu/util/test", "sources": [ "broadcast_test.cpp", + "delinearized_indexes_range_test.cpp", "reduce_test.cpp" ], "additional_libs": [ From 14a55be16b0f20ecb614aa51b1333d42f56f8e80 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Tue, 4 Mar 2025 10:27:18 -0800 Subject: [PATCH 3/3] Update [ghstack-poisoned] --- kernels/portable/cpu/util/broadcast_indexes_range.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernels/portable/cpu/util/broadcast_indexes_range.h b/kernels/portable/cpu/util/broadcast_indexes_range.h index bebf5a056e6..7ee4f3fb2b1 100644 --- a/kernels/portable/cpu/util/broadcast_indexes_range.h +++ b/kernels/portable/cpu/util/broadcast_indexes_range.h @@ -158,7 +158,8 @@ class BroadcastIndexesIterator { // output_dim. This is straightforwardly implementable with an // adjusted stride array that contains 0s where the padded input // shape would contain 1s. - std::array effective_input_broadcast_strides_ = {{0}}; + std::array effective_input_broadcast_strides_ = { + {{0}}}; }; } // namespace internal