diff --git a/doc/reference/ufunc.rst b/doc/reference/ufunc.rst index 2dffca15e889..a5b64852bd42 100644 --- a/doc/reference/ufunc.rst +++ b/doc/reference/ufunc.rst @@ -105,10 +105,12 @@ Comparison functions dpnp.less_equal dpnp.not_equal dpnp.equal + dpnp.logical_and dpnp.logical_or dpnp.logical_xor dpnp.logical_not + dpnp.maximum dpnp.minimum dpnp.fmax diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt index 1d140b066584..077710cb55c2 100644 --- a/dpnp/backend/extensions/ufunc/CMakeLists.txt +++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt @@ -26,6 +26,8 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/common.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fabs.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmax.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmin.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmod.cpp ) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp index b915f9a299a8..e4af134f46db 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp @@ -26,6 +26,8 @@ #include #include "fabs.hpp" +#include "fmax.hpp" +#include "fmin.hpp" #include "fmod.hpp" namespace py = pybind11; @@ -38,6 +40,8 @@ namespace dpnp::extensions::ufunc void init_elementwise_functions(py::module_ m) { init_fabs(m); + init_fmax(m); + init_fmin(m); init_fmod(m); } } // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp new file mode 100644 index 000000000000..64f68d146be8 --- /dev/null +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp @@ -0,0 +1,137 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// maxification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +#include "dpctl4pybind11.hpp" + +#include "fmax.hpp" +#include "kernels/elementwise_functions/fmax.hpp" +#include "populate.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/maximum.hpp" +#include "utils/type_dispatch.hpp" + +namespace py = pybind11; + +namespace dpnp::extensions::ufunc +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace max_ns = dpctl::tensor::kernels::maximum; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +namespace impl +{ +// Supports the same types table as for maximum function in dpctl +template +using OutputType = max_ns::MaximumOutputType; + +using dpnp::kernels::fmax::FmaxFunctor; + +template +using ContigFunctor = + ew_cmn_ns::BinaryContigFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using StridedFunctor = + ew_cmn_ns::BinaryStridedFunctor>; + +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +static binary_contig_impl_fn_ptr_t fmax_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static int fmax_output_typeid_table[td_ns::num_types][td_ns::num_types]; +static binary_strided_impl_fn_ptr_t + fmax_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_TABLES(fmax); +} // namespace impl + +void init_fmax(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_fmax_dispatch_tables(); + using impl::fmax_contig_dispatch_table; + using impl::fmax_output_typeid_table; + using impl::fmax_strided_dispatch_table; + + auto fmax_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_int::py_binary_ufunc( + src1, src2, dst, exec_q, depends, fmax_output_typeid_table, + fmax_contig_dispatch_table, fmax_strided_dispatch_table, + // no support of C-contig row with broadcasting in OneMKL + td_ns::NullPtrTable< + impl:: + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + td_ns::NullPtrTable< + impl:: + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_fmax", fmax_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + + auto fmax_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_int::py_binary_ufunc_result_type( + dtype1, dtype2, fmax_output_typeid_table); + }; + m.def("_fmax_result_type", fmax_result_type_pyapi); + } +} +} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.hpp new file mode 100644 index 000000000000..70d0baac314c --- /dev/null +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.hpp @@ -0,0 +1,35 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +namespace py = pybind11; + +namespace dpnp::extensions::ufunc +{ +void init_fmax(py::module_ m); +} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp new file mode 100644 index 000000000000..0972ffde9226 --- /dev/null +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp @@ -0,0 +1,137 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// maxification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +#include "dpctl4pybind11.hpp" + +#include "fmin.hpp" +#include "kernels/elementwise_functions/fmin.hpp" +#include "populate.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/minimum.hpp" +#include "utils/type_dispatch.hpp" + +namespace py = pybind11; + +namespace dpnp::extensions::ufunc +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace min_ns = dpctl::tensor::kernels::minimum; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +namespace impl +{ +// Supports the same types table as for minimum function in dpctl +template +using OutputType = min_ns::MinimumOutputType; + +using dpnp::kernels::fmin::FminFunctor; + +template +using ContigFunctor = + ew_cmn_ns::BinaryContigFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using StridedFunctor = + ew_cmn_ns::BinaryStridedFunctor>; + +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +static binary_contig_impl_fn_ptr_t fmin_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static int fmin_output_typeid_table[td_ns::num_types][td_ns::num_types]; +static binary_strided_impl_fn_ptr_t + fmin_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_TABLES(fmin); +} // namespace impl + +void init_fmin(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_fmin_dispatch_tables(); + using impl::fmin_contig_dispatch_table; + using impl::fmin_output_typeid_table; + using impl::fmin_strided_dispatch_table; + + auto fmin_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_int::py_binary_ufunc( + src1, src2, dst, exec_q, depends, fmin_output_typeid_table, + fmin_contig_dispatch_table, fmin_strided_dispatch_table, + // no support of C-contig row with broadcasting in OneMKL + td_ns::NullPtrTable< + impl:: + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + td_ns::NullPtrTable< + impl:: + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_fmin", fmin_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + + auto fmin_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_int::py_binary_ufunc_result_type( + dtype1, dtype2, fmin_output_typeid_table); + }; + m.def("_fmin_result_type", fmin_result_type_pyapi); + } +} +} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.hpp new file mode 100644 index 000000000000..9c2ca9baab34 --- /dev/null +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.hpp @@ -0,0 +1,35 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +namespace py = pybind11; + +namespace dpnp::extensions::ufunc +{ +void init_fmin(py::module_ m); +} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index 0a7646cfc57e..159ca57993ce 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -43,6 +43,8 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/exp2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/expm1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/floor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/fmax.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/fmin.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fmod.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hypot.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ln.cpp diff --git a/dpnp/backend/extensions/vm/fmax.cpp b/dpnp/backend/extensions/vm/fmax.cpp new file mode 100644 index 000000000000..b711516f6797 --- /dev/null +++ b/dpnp/backend/extensions/vm/fmax.cpp @@ -0,0 +1,161 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "fmax.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::fmax function. + * + * @tparam T Type of input vectors `a` and `b` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event fmax_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + py::ssize_t a_offset, + const char *in_b, + py::ssize_t b_offset, + char *out_y, + py::ssize_t out_offset, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + tu_ns::validate_type_for_device(exec_q); + + if ((a_offset != 0) || (b_offset != 0) || (out_offset != 0)) { + throw std::runtime_error("Arrays offsets have to be equals to 0"); + } + + std::int64_t n = static_cast(in_n); + const T1 *a = reinterpret_cast(in_a); + const T2 *b = reinterpret_cast(in_b); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::fmax(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing 1st input vector of size n + b, // pointer `b` containing 2nd input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types][td_ns::num_types]; +static binary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types] + [td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_TABLES(fmax); +} // namespace impl + +void init_fmax(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_tables(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto fmax_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst, + const event_vecT &depends = {}) { + return py_int::py_binary_ufunc( + src1, src2, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrTable{}, + // no support of C-contig row with broadcasting in OneMKL + td_ns::NullPtrTable< + impl:: + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + td_ns::NullPtrTable< + impl:: + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_fmax", fmax_pyapi, + "Call `fmax` function from OneMKL VM library to performs element " + "by element computation of the modulus function of vector `src1` " + "with respect to vector `src2` to resulting vector `dst`", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("depends") = py::list()); + + auto fmax_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst) { + return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + output_typeid_vector, + contig_dispatch_vector); + }; + m.def("_mkl_fmax_to_call", fmax_need_to_call_pyapi, + "Check input arguments to answer if `fmax` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/fmax.hpp b/dpnp/backend/extensions/vm/fmax.hpp new file mode 100644 index 000000000000..13d8ccad9ff5 --- /dev/null +++ b/dpnp/backend/extensions/vm/fmax.hpp @@ -0,0 +1,35 @@ +//***************************************************************************** +// Copyright (c) 2023-2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +namespace py = pybind11; + +namespace dpnp::extensions::vm +{ +void init_fmax(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/fmin.cpp b/dpnp/backend/extensions/vm/fmin.cpp new file mode 100644 index 000000000000..3b288216c921 --- /dev/null +++ b/dpnp/backend/extensions/vm/fmin.cpp @@ -0,0 +1,161 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "fmin.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::fmin function. + * + * @tparam T Type of input vectors `a` and `b` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event fmin_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + py::ssize_t a_offset, + const char *in_b, + py::ssize_t b_offset, + char *out_y, + py::ssize_t out_offset, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + tu_ns::validate_type_for_device(exec_q); + + if ((a_offset != 0) || (b_offset != 0) || (out_offset != 0)) { + throw std::runtime_error("Arrays offsets have to be equals to 0"); + } + + std::int64_t n = static_cast(in_n); + const T1 *a = reinterpret_cast(in_a); + const T2 *b = reinterpret_cast(in_b); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::fmin(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing 1st input vector of size n + b, // pointer `b` containing 2nd input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types][td_ns::num_types]; +static binary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types] + [td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_TABLES(fmin); +} // namespace impl + +void init_fmin(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_tables(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto fmin_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst, + const event_vecT &depends = {}) { + return py_int::py_binary_ufunc( + src1, src2, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrTable{}, + // no support of C-contig row with broadcasting in OneMKL + td_ns::NullPtrTable< + impl:: + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + td_ns::NullPtrTable< + impl:: + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_fmin", fmin_pyapi, + "Call `fmin` function from OneMKL VM library to performs element " + "by element computation of the modulus function of vector `src1` " + "with respect to vector `src2` to resulting vector `dst`", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("depends") = py::list()); + + auto fmin_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst) { + return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + output_typeid_vector, + contig_dispatch_vector); + }; + m.def("_mkl_fmin_to_call", fmin_need_to_call_pyapi, + "Check input arguments to answer if `fmin` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/fmin.hpp b/dpnp/backend/extensions/vm/fmin.hpp new file mode 100644 index 000000000000..d1eefe5eccb2 --- /dev/null +++ b/dpnp/backend/extensions/vm/fmin.hpp @@ -0,0 +1,35 @@ +//***************************************************************************** +// Copyright (c) 2023-2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +namespace py = pybind11; + +namespace dpnp::extensions::vm +{ +void init_fmin(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp index b78ae51ddc30..4491703957a7 100644 --- a/dpnp/backend/extensions/vm/vm_py.cpp +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -46,6 +46,8 @@ #include "exp2.hpp" #include "expm1.hpp" #include "floor.hpp" +#include "fmax.hpp" +#include "fmin.hpp" #include "fmod.hpp" #include "hypot.hpp" #include "ln.hpp" @@ -87,6 +89,8 @@ PYBIND11_MODULE(_vm_impl, m) vm_ns::init_exp2(m); vm_ns::init_expm1(m); vm_ns::init_floor(m); + vm_ns::init_fmax(m); + vm_ns::init_fmin(m); vm_ns::init_fmod(m); vm_ns::init_hypot(m); vm_ns::init_ln(m); diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp index e5a2c924653a..11aed0ebac25 100644 --- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp +++ b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp @@ -103,28 +103,6 @@ #endif -MACRO_2ARG_3TYPES_OP( - dpnp_fmod_c, - dispatch_fmod_op(input1_elem, input2_elem), - dispatch_fmod_op(x1, x2), - MACRO_UNPACK_TYPES(std::int32_t, std::int64_t, float, double), - oneapi::mkl::vm::fmod, - MACRO_UNPACK_TYPES(float, double)) - -MACRO_2ARG_3TYPES_OP(dpnp_maximum_c, - sycl::max(input1_elem, input2_elem), - nullptr, - std::false_type, - oneapi::mkl::vm::fmax, - MACRO_UNPACK_TYPES(float, double)) - -MACRO_2ARG_3TYPES_OP(dpnp_minimum_c, - sycl::min(input1_elem, input2_elem), - nullptr, - std::false_type, - oneapi::mkl::vm::fmin, - MACRO_UNPACK_TYPES(float, double)) - // "multiply" needs to be standalone kernel (not autogenerated) due to complex // algorithm. This is not an element wise. pytest // "tests/third_party/cupy/creation_tests/test_ranges.py::TestMgrid::test_mgrid3" diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp index aaaf90c27bb0..9f9b7a89143f 100644 --- a/dpnp/backend/include/dpnp_iface_fptr.hpp +++ b/dpnp/backend/include/dpnp_iface_fptr.hpp @@ -100,15 +100,11 @@ enum class DPNPFuncName : size_t DPNP_FN_INITVAL_EXT, /**< Used in numpy ones, ones_like, zeros, zeros_like impls */ DPNP_FN_MAX, /**< Used in numpy.max() impl */ - DPNP_FN_MAXIMUM_EXT, /**< Used in numpy.fmax() impl , requires extra - parameters */ DPNP_FN_MEAN, /**< Used in numpy.mean() impl */ DPNP_FN_MEDIAN, /**< Used in numpy.median() impl */ DPNP_FN_MEDIAN_EXT, /**< Used in numpy.median() impl, requires extra parameters */ DPNP_FN_MIN, /**< Used in numpy.min() impl */ - DPNP_FN_MINIMUM_EXT, /**< Used in numpy.fmax() impl, requires extra - parameters */ DPNP_FN_MODF, /**< Used in numpy.modf() impl */ DPNP_FN_MODF_EXT, /**< Used in numpy.modf() impl, requires extra parameters */ diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index e3797bd22e6e..75413cc5e60d 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -1026,45 +1026,6 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap) #include -template -static void func_map_elemwise_2arg_3type_short_core(func_map_t &fmap) -{ - ((fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][FT1][FTs] = - {get_floating_res_type(), - (void *)dpnp_maximum_c_ext< - func_type_map_t::find_type()>, - func_type_map_t::find_type, - func_type_map_t::find_type>, - get_floating_res_type(), - (void *)dpnp_maximum_c_ext< - func_type_map_t::find_type()>, - func_type_map_t::find_type, - func_type_map_t::find_type>}), - ...); - ((fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][FT1][FTs] = - {get_floating_res_type(), - (void *)dpnp_minimum_c_ext< - func_type_map_t::find_type()>, - func_type_map_t::find_type, - func_type_map_t::find_type>, - get_floating_res_type(), - (void *)dpnp_minimum_c_ext< - func_type_map_t::find_type()>, - func_type_map_t::find_type, - func_type_map_t::find_type>}), - ...); -} - -template -static void func_map_elemwise_2arg_3type_short_helper(func_map_t &fmap) -{ - ((func_map_elemwise_2arg_3type_short_core(fmap)), ...); -} - static void func_map_init_elemwise_2arg_3type(func_map_t &fmap) { // Used in dpnp_dot_c @@ -1170,9 +1131,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t &fmap) (void *)dpnp_multiply_c_default< std::complex, std::complex, std::complex>}; - func_map_elemwise_2arg_3type_short_helper(fmap); - return; } diff --git a/dpnp/backend/kernels/elementwise_functions/fmax.hpp b/dpnp/backend/kernels/elementwise_functions/fmax.hpp new file mode 100644 index 000000000000..6b0ebb81ec6f --- /dev/null +++ b/dpnp/backend/kernels/elementwise_functions/fmax.hpp @@ -0,0 +1,83 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +// dpctl tensor headers +#include "utils/math_utils.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::kernels::fmax +{ +namespace mu_ns = dpctl::tensor::math_utils; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct FmaxFunctor +{ + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = + std::conjunction, + std::disjunction, + std::is_same>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + if constexpr (std::is_integral_v && std::is_integral_v) { + return in1 >= in2 ? in1 : in2; + } + else if constexpr (tu_ns::is_complex::value && + tu_ns::is_complex::value) + { + static_assert(std::is_same_v); + + using realT = typename argT1::value_type; + const realT in2r = std::real(in2); + const realT in2i = std::imag(in2); + + if (sycl::isnan(in2r) || sycl::isnan(in2i) || + mu_ns::greater_equal_complex(in1, in2)) + { + return in1; + } + return in2; + } + else { + return sycl::fmax(in1, in2); + } + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + return sycl::fmax(in1, in2); + } +}; +} // namespace dpnp::kernels::fmax diff --git a/dpnp/backend/kernels/elementwise_functions/fmin.hpp b/dpnp/backend/kernels/elementwise_functions/fmin.hpp new file mode 100644 index 000000000000..30e4af8884f4 --- /dev/null +++ b/dpnp/backend/kernels/elementwise_functions/fmin.hpp @@ -0,0 +1,83 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +// dpctl tensor headers +#include "utils/math_utils.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::kernels::fmin +{ +namespace mu_ns = dpctl::tensor::math_utils; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct FminFunctor +{ + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = + std::conjunction, + std::disjunction, + std::is_same>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + if constexpr (std::is_integral_v && std::is_integral_v) { + return in1 <= in2 ? in1 : in2; + } + else if constexpr (tu_ns::is_complex::value && + tu_ns::is_complex::value) + { + static_assert(std::is_same_v); + + using realT = typename argT1::value_type; + const realT in2r = std::real(in2); + const realT in2i = std::imag(in2); + + if (sycl::isnan(in2r) || sycl::isnan(in2i) || + mu_ns::less_equal_complex(in1, in2)) + { + return in1; + } + return in2; + } + else { + return sycl::fmin(in1, in2); + } + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + return sycl::fmin(in1, in2); + } +}; +} // namespace dpnp::kernels::fmin diff --git a/dpnp/backend/kernels/elementwise_functions/fmod.hpp b/dpnp/backend/kernels/elementwise_functions/fmod.hpp index e97b257cb066..bf60bd095642 100644 --- a/dpnp/backend/kernels/elementwise_functions/fmod.hpp +++ b/dpnp/backend/kernels/elementwise_functions/fmod.hpp @@ -38,8 +38,7 @@ struct FmodFunctor resT operator()(const argT1 &in1, const argT2 &in2) const { - if constexpr (std::is_integral::value && - std::is_integral::value) { + if constexpr (std::is_integral_v && std::is_integral_v) { if (in2 == argT2(0)) { return resT(0); } diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 0c8bd1134a78..3b5b23832260 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -41,9 +41,7 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_ERF_EXT DPNP_FN_FFT_FFT_EXT DPNP_FN_FFT_RFFT_EXT - DPNP_FN_MAXIMUM_EXT DPNP_FN_MEDIAN_EXT - DPNP_FN_MINIMUM_EXT DPNP_FN_MODF_EXT DPNP_FN_PARTITION_EXT DPNP_FN_RADIANS_EXT @@ -170,15 +168,6 @@ cpdef dpnp_descriptor dpnp_isclose(dpnp_descriptor input1, dpnp_descriptor input double rtol=*, double atol=*, cpp_bool equal_nan=*) -""" -Mathematical functions -""" -cpdef dpnp_descriptor dpnp_fmax(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*, - dpnp_descriptor out=*, object where=*) -cpdef dpnp_descriptor dpnp_fmin(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*, - dpnp_descriptor out=*, object where=*) - - """ Trigonometric functions """ diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx index 4c560d50e0b3..d304f1d32d35 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pyx +++ b/dpnp/dpnp_algo/dpnp_algo.pyx @@ -219,99 +219,3 @@ cdef utils.dpnp_descriptor call_fptr_1in_1out_strides(DPNPFuncName fptr_name, c_dpctl.DPCTLEvent_Delete(event_ref) return result - - -cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, - utils.dpnp_descriptor x1_obj, - utils.dpnp_descriptor x2_obj, - object dtype=None, - utils.dpnp_descriptor out=None, - object where=True, - func_name=None): - - # Convert type (x1_obj.dtype) to C enum DPNPFuncType - cdef DPNPFuncType x1_c_type = dpnp_dtype_to_DPNPFuncType(x1_obj.dtype) - cdef DPNPFuncType x2_c_type = dpnp_dtype_to_DPNPFuncType(x2_obj.dtype) - - # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(fptr_name, x1_c_type, x2_c_type) - - result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1_obj, x2_obj) - - # get FPTR function and return type - cdef (DPNPFuncType, void *) ret_type_and_func = utils.get_ret_type_and_func(kernel_data, - result_sycl_device.has_aspect_fp64) - cdef DPNPFuncType return_type = ret_type_and_func[0] - cdef fptr_2in_1out_strides_t func = < fptr_2in_1out_strides_t > ret_type_and_func[1] - - # Create result array - cdef shape_type_c x1_shape = x1_obj.shape - - cdef shape_type_c x1_strides = utils.strides_to_vector(x1_obj.strides, x1_shape) - cdef shape_type_c x2_shape = x2_obj.shape - cdef shape_type_c x2_strides = utils.strides_to_vector(x2_obj.strides, x2_shape) - - cdef shape_type_c result_shape = utils.get_common_shape(x1_shape, x2_shape) - cdef utils.dpnp_descriptor result - - # check 'out' parameter data - if out is not None: - if out.shape != result_shape: - utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape) - - utils.get_common_usm_allocation(x1_obj, out) # check USM allocation is common - - if out is None or out.is_array_overlapped(x1_obj) or out.is_array_overlapped(x2_obj) or not out.match_ctype(return_type): - """ - Create result array with type given by FPTR data. - If 'out' array has another dtype than expected or overlaps a memory from any input array, - we have to create a temporary array and to copy data from the temporary into 'out' array, - once the computation is completed. - Otherwise simultaneously access to the same memory may cause a race condition issue - which will result into undefined behaviour. - """ - is_result_memory_allocated = True - result = utils.create_output_descriptor(result_shape, - return_type, - None, - device=result_sycl_device, - usm_type=result_usm_type, - sycl_queue=result_sycl_queue) - else: - is_result_memory_allocated = False - result = out - - cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape) - - result_obj = result.get_array() - - cdef c_dpctl.SyclQueue q = < c_dpctl.SyclQueue > result_obj.sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - """ Call FPTR function """ - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, - result.get_data(), - result.size, - result.ndim, - result_shape.data(), - result_strides.data(), - x1_obj.get_data(), - x1_obj.size, - x1_obj.ndim, - x1_shape.data(), - x1_strides.data(), - x2_obj.get_data(), - x2_obj.size, - x2_obj.ndim, - x2_shape.data(), - x2_strides.data(), - NULL, - NULL) # dep_events_ref) - - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) - c_dpctl.DPCTLEvent_Delete(event_ref) - - if out is not None and is_result_memory_allocated: - return out.get_result_desc(result) - - return result.get_result_desc() diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi index 28b89ce60a1a..84b004856bda 100644 --- a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi +++ b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi @@ -37,8 +37,6 @@ and the rest of the library __all__ += [ "dpnp_ediff1d", - "dpnp_fmax", - "dpnp_fmin", "dpnp_modf", ] @@ -104,22 +102,6 @@ cpdef utils.dpnp_descriptor dpnp_ediff1d(utils.dpnp_descriptor x1): return result -cpdef utils.dpnp_descriptor dpnp_fmax(utils.dpnp_descriptor x1_obj, - utils.dpnp_descriptor x2_obj, - object dtype=None, - utils.dpnp_descriptor out=None, - object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_MAXIMUM_EXT, x1_obj, x2_obj, dtype, out, where) - - -cpdef utils.dpnp_descriptor dpnp_fmin(utils.dpnp_descriptor x1_obj, - utils.dpnp_descriptor x2_obj, - object dtype=None, - utils.dpnp_descriptor out=None, - object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_MINIMUM_EXT, x1_obj, x2_obj, dtype, out, where) - - cpdef tuple dpnp_modf(utils.dpnp_descriptor x1): """ Convert string type names (array.dtype) to C enum DPNPFuncType """ cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index b3103869e8d3..3402f7d23a8f 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -438,11 +438,6 @@ def get_dpnp_descriptor( if use_origin_backend(): return False - # It's required to keep track of input object if a non-strided copy is - # going to be created. Thus there will be an extra descriptor allocated - # to refer on original input. - orig_desc = None - # If input object is a scalar, it means it was allocated on host memory. # We need to copy it to USM memory according to compute follows data. if isscalar(ext_obj): @@ -473,7 +468,6 @@ def get_dpnp_descriptor( ext_obj_offset = 0 if ext_obj.strides != shape_offsets or ext_obj_offset != 0: - orig_desc = dpnp_descriptor(ext_obj) ext_obj = array(ext_obj, order="C") # while dpnp functions are based on DPNP_QUEUE @@ -490,7 +484,7 @@ def get_dpnp_descriptor( if not queue_is_default: ext_obj = array(ext_obj, sycl_queue=default_queue) - dpnp_desc = dpnp_descriptor(ext_obj, orig_desc) + dpnp_desc = dpnp_descriptor(ext_obj) if dpnp_desc.is_valid: # pylint: disable=using-constant-test return dpnp_desc diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 1caf1359be3e..51d7f2ceddc9 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -61,8 +61,6 @@ from .backend.extensions.sycl_ext import _sycl_ext_impl from .dpnp_algo import ( dpnp_ediff1d, - dpnp_fmax, - dpnp_fmin, dpnp_modf, ) from .dpnp_algo.dpnp_elementwise_common import ( @@ -1537,232 +1535,174 @@ def ediff1d(x1, to_end=None, to_begin=None): ) -def fmax(x1, x2, /, out=None, *, where=True, dtype=None, subok=True, **kwargs): - """ - Element-wise maximum of array elements. +_FMAX_DOCSTRING = """ +Compares two input arrays `x1` and `x2` and returns a new array containing the +element-wise maxima. - For full documentation refer to :obj:`numpy.fmax`. +If one of the elements being compared is a NaN, then the non-nan element is +returned. If both elements are NaNs then the first is returned. The latter +distinction is important for complex NaNs, which are defined as at least one of +the real or imaginary parts being a NaN. The net effect is that NaNs are +ignored when possible. - Returns - ------- - out : dpnp.ndarray - The maximum of `x1` and `x2`, element-wise, ignoring NaNs. +For full documentation refer to :obj:`numpy.fmax`. - Limitations - ----------- - Parameters `x1` and `x2` are supported as either scalar, - :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`, but both `x1` - and `x2` can not be scalars at the same time. - Parameters `where`, `dtype` and `subok` are supported with their default - values. - Keyword argument `kwargs` is currently unsupported. - Otherwise the function will be executed sequentially on CPU. - Input array data types are limited by real-valued data types. +Parameters +---------- +x1 : {dpnp.ndarray, usm_ndarray, scalar} + First input array, expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} + Second input array, also expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. +out : {None, dpnp.ndarray, usm_ndarray}, optional + Output array to populate. + Array must have the correct shape and the expected data type. + Default: ``None``. +order : {"C", "F", "A", "K"}, optional + Memory layout of the newly output array, if parameter `out` is ``None``. + Default: ``"K"``. - See Also - -------- - :obj:`dpnp.maximum` : Element-wise maximum of array elements, propagates - NaNs. - :obj:`dpnp.fmin` : Element-wise minimum of array elements, ignores NaNs. - :obj:`dpnp.max` : The maximum value of an array along a given axis, - propagates NaNs.. - :obj:`dpnp.nanmax` : The maximum value of an array along a given axis, - ignores NaNs. - :obj:`dpnp.minimum` : Element-wise minimum of array elements, propagates - NaNs. - :obj:`dpnp.fmod` : Calculate the element-wise remainder of division. +Returns +------- +out : dpnp.ndarray + An array containing the element-wise maxima. The data type of + the returned array is determined by the Type Promotion Rules. - Examples - -------- - >>> import dpnp as np - >>> x1 = np.array([2, 3, 4]) - >>> x2 = np.array([1, 5, 2]) - >>> np.fmax(x1, x2) - array([2, 5, 4]) - - >>> x1 = np.eye(2) - >>> x2 = np.array([0.5, 2]) - >>> np.fmax(x1, x2) # broadcasting - array([[1. , 2. ], - [0.5, 2. ]]) - - >>> x1 = np.array([np.nan, 0, np.nan]) - >>> x2 = np.array([0, np.nan, np.nan]) - >>> np.fmax(x1, x2) - array([ 0., 0., nan]) +Limitations +----------- +Parameters `where` and `subok` are supported with their default values. +Keyword argument `kwargs` is currently unsupported. +Otherwise ``NotImplementedError`` exception will be raised. - """ +See Also +-------- +:obj:`dpnp.fmin` : Element-wise minimum of two arrays, ignores NaNs. +:obj:`dpnp.maximum` : Element-wise maximum of two arrays, propagates NaNs. +:obj:`dpnp.max` : The maximum value of an array along a given axis, propagates NaNs. +:obj:`dpnp.nanmax` : The maximum value of an array along a given axis, ignores NaNs. +:obj:`dpnp.minimum` : Element-wise minimum of two arrays, propagates NaNs. +:obj:`dpnp.min` : The minimum value of an array along a given axis, propagates NaNs. +:obj:`dpnp.nanmin` : The minimum value of an array along a given axis, ignores NaNs. - if kwargs: - pass - elif where is not True: - pass - elif dtype is not None: - pass - elif subok is not True: - pass - elif dpnp.isscalar(x1) and dpnp.isscalar(x2): - # at least either x1 or x2 has to be an array - pass - else: - # get USM type and queue to copy scalar from the host memory - # into a USM allocation - usm_type, queue = ( - get_usm_allocations([x1, x2]) - if dpnp.isscalar(x1) or dpnp.isscalar(x2) - else (None, None) - ) +Notes +----- +The fmax is equivalent to ``dpnp.where(x1 >= x2, x1, x2)`` when neither +`x1` nor `x2` are NaNs, but it is faster and does proper broadcasting. - x1_desc = dpnp.get_dpnp_descriptor( - x1, - copy_when_strides=False, - copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, - alloc_queue=queue, - ) - x2_desc = dpnp.get_dpnp_descriptor( - x2, - copy_when_strides=False, - copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, - alloc_queue=queue, - ) - if x1_desc and x2_desc: - if out is not None: - if not dpnp.is_supported_array_type(out): - raise TypeError( - "return array must be of supported array type" - ) - out_desc = ( - dpnp.get_dpnp_descriptor( - out, copy_when_nondefault_queue=False - ) - or None - ) - else: - out_desc = None +Examples +-------- +>>> import dpnp as np +>>> x1 = np.array([2, 3, 4]) +>>> x2 = np.array([1, 5, 2]) +>>> np.fmax(x1, x2) +array([2, 5, 4]) - return dpnp_fmax( - x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where - ).get_pyobj() +>>> x1 = np.eye(2) +>>> x2 = np.array([0.5, 2]) +>>> np.fmax(x1, x2) +array([[1. , 2. ], + [0.5, 2. ]]) - return call_origin( - numpy.fmax, x1, x2, dtype=dtype, out=out, where=where, **kwargs - ) +>>> x1 = np.array([np.nan, 0, np.nan]) +>>> x2 = np.array([0, np.nan, np.nan]) +>>> np.fmax(x1, x2) +array([ 0., 0., nan]) +""" +fmax = DPNPBinaryFunc( + "fmax", + ufi._fmax_result_type, + ufi._fmax, + _FMAX_DOCSTRING, + mkl_fn_to_call=vmi._mkl_fmax_to_call, + mkl_impl_fn=vmi._fmax, +) -def fmin(x1, x2, /, out=None, *, where=True, dtype=None, subok=True, **kwargs): - """ - Element-wise minimum of array elements. - For full documentation refer to :obj:`numpy.fmin`. +_FMIN_DOCSTRING = """ +Compares two input arrays `x1` and `x2` and returns a new array containing the +element-wise minima. - Returns - ------- - out : dpnp.ndarray - The minimum of `x1` and `x2`, element-wise, ignoring NaNs. +If one of the elements being compared is a NaN, then the non-nan element is +returned. If both elements are NaNs then the first is returned. The latter +distinction is important for complex NaNs, which are defined as at least one of +the real or imaginary parts being a NaN. The net effect is that NaNs are +ignored when possible. - Limitations - ----------- - Parameters `x1` and `x2` are supported as either scalar, - :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`, but both `x1` - and `x2` can not be scalars at the same time. - Parameters `where`, `dtype` and `subok` are supported with their default - values. - Keyword argument `kwargs` is currently unsupported. - Otherwise the function will be executed sequentially on CPU. - Input array data types are limited by real-valued data types. +For full documentation refer to :obj:`numpy.fmin`. - See Also - -------- - :obj:`dpnp.minimum` : Element-wise minimum of array elements, propagates - NaNs. - :obj:`dpnp.fmax` : Element-wise maximum of array elements, ignores NaNs. - :obj:`dpnp.min` : The minimum value of an array along a given axis, - propagates NaNs. - :obj:`dpnp.nanmin` : The minimum value of an array along a given axis, - ignores NaNs. - :obj:`dpnp.maximum` : Element-wise maximum of array elements, propagates - NaNs. - :obj:`dpnp.fmod` : Calculate the element-wise remainder of division. +Parameters +---------- +x1 : {dpnp.ndarray, usm_ndarray, scalar} + First input array, expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} + Second input array, also expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. +out : {None, dpnp.ndarray, usm_ndarray}, optional + Output array to populate. + Array must have the correct shape and the expected data type. + Default: ``None``. +order : {"C", "F", "A", "K"}, optional + Memory layout of the newly output array, if parameter `out` is ``None``. + Default: ``"K"``. - Examples - -------- - >>> import dpnp as np - >>> x1 = np.array([2, 3, 4]) - >>> x2 = np.array([1, 5, 2]) - >>> np.fmin(x1, x2) - array([1, 3, 2]) - - >>> x1 = np.eye(2) - >>> x2 = np.array([0.5, 2]) - >>> np.fmin(x1, x2) # broadcasting - array([[0.5, 0. ], - [0. , 1. ]] - - >>> x1 = np.array([np.nan, 0, np.nan]) - >>> x2 = np.array([0, np.nan, np.nan]) - >>> np.fmin(x1, x2) - array([ 0., 0., nan]) +Returns +------- +out : dpnp.ndarray + An array containing the element-wise minima. The data type of + the returned array is determined by the Type Promotion Rules. - """ +Limitations +----------- +Parameters `where` and `subok` are supported with their default values. +Keyword argument `kwargs` is currently unsupported. +Otherwise ``NotImplementedError`` exception will be raised. - if kwargs: - pass - elif where is not True: - pass - elif dtype is not None: - pass - elif subok is not True: - pass - elif dpnp.isscalar(x1) and dpnp.isscalar(x2): - # at least either x1 or x2 has to be an array - pass - else: - # get USM type and queue to copy scalar from the host memory into - # a USM allocation - usm_type, queue = ( - get_usm_allocations([x1, x2]) - if dpnp.isscalar(x1) or dpnp.isscalar(x2) - else (None, None) - ) +See Also +-------- +:obj:`dpnp.fmax` : Element-wise maximum of two arrays, ignores NaNs. +:obj:`dpnp.minimum` : Element-wise minimum of two arrays, propagates NaNs. +:obj:`dpnp.min` : The minimum value of an array along a given axis, propagates NaNs. +:obj:`dpnp.nanmin` : The minimum value of an array along a given axis, ignores NaNs. +:obj:`dpnp.maximum` : Element-wise maximum of two arrays, propagates NaNs. +:obj:`dpnp.max` : The maximum value of an array along a given axis, propagates NaNs. +:obj:`dpnp.nanmax` : The maximum value of an array along a given axis, ignores NaNs. - x1_desc = dpnp.get_dpnp_descriptor( - x1, - copy_when_strides=False, - copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, - alloc_queue=queue, - ) - x2_desc = dpnp.get_dpnp_descriptor( - x2, - copy_when_strides=False, - copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, - alloc_queue=queue, - ) - if x1_desc and x2_desc: - if out is not None: - if not dpnp.is_supported_array_type(out): - raise TypeError( - "return array must be of supported array type" - ) - out_desc = ( - dpnp.get_dpnp_descriptor( - out, copy_when_nondefault_queue=False - ) - or None - ) - else: - out_desc = None +Notes +----- +The fmin is equivalent to ``dpnp.where(x1 <= x2, x1, x2)`` when neither +`x1` nor `x2` are NaNs, but it is faster and does proper broadcasting. - return dpnp_fmin( - x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where - ).get_pyobj() +Examples +-------- +>>> import dpnp as np +>>> x1 = np.array([2, 3, 4]) +>>> x2 = np.array([1, 5, 2]) +>>> np.fmin(x1, x2) +array([1, 3, 2]) - return call_origin( - numpy.fmin, x1, x2, dtype=dtype, out=out, where=where, **kwargs - ) +>>> x1 = np.eye(2) +>>> x2 = np.array([0.5, 2]) +>>> np.fmin(x1, x2) +array([[0.5, 0. ], + [0. , 1. ]]) + +>>> x1 = np.array([np.nan, 0, np.nan]) +>>> x2 = np.array([0, np.nan, np.nan]) +>>> np.fmin(x1, x2) +array([ 0., 0., nan]) +""" + +fmin = DPNPBinaryFunc( + "fmin", + ufi._fmin_result_type, + ufi._fmin, + _FMIN_DOCSTRING, + mkl_fn_to_call=vmi._mkl_fmin_to_call, + mkl_impl_fn=vmi._fmin, +) _FMOD_DOCSTRING = """ @@ -2100,6 +2040,11 @@ def gradient(f, *varargs, axis=None, edge_order=1): Compares two input arrays `x1` and `x2` and returns a new array containing the element-wise maxima. +If one of the elements being compared is a NaN, then that element is returned. +If both elements are NaNs then the first is returned. The latter distinction is +important for complex NaNs, which are defined as at least one of the real or +imaginary parts being a NaN. The net effect is that NaNs are propagated. + For full documentation refer to :obj:`numpy.maximum`. Parameters @@ -2175,6 +2120,11 @@ def gradient(f, *varargs, axis=None, edge_order=1): Compares two input arrays `x1` and `x2` and returns a new array containing the element-wise minima. +If one of the elements being compared is a NaN, then that element is returned. +If both elements are NaNs then the first is returned. The latter distinction is +important for complex NaNs, which are defined as at least one of the real or +imaginary parts being a NaN. The net effect is that NaNs are propagated. + For full documentation refer to :obj:`numpy.minimum`. Parameters diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pxd b/dpnp/dpnp_utils/dpnp_algo_utils.pxd index 4d4272ac9fb1..23714b5218cc 100644 --- a/dpnp/dpnp_utils/dpnp_algo_utils.pxd +++ b/dpnp/dpnp_utils/dpnp_algo_utils.pxd @@ -91,19 +91,11 @@ cdef class dpnp_descriptor: cdef public: # TODO remove "public" as python accessible attribute object origin_pyobj - dpnp_descriptor origin_desc dict descriptor Py_ssize_t dpnp_descriptor_data_size cpp_bool dpnp_descriptor_is_scalar cdef void * get_data(self) - cdef cpp_bool match_ctype(self, DPNPFuncType ctype) - - -cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape) except * -""" -Calculate common shape from input shapes -""" cdef dpnp_descriptor create_output_descriptor(shape_type_c output_shape, DPNPFuncType c_type, diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx index 1e3a793d868f..ad9a2f10ff43 100644 --- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx +++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx @@ -33,8 +33,6 @@ This module contains different helpers and utilities """ import dpctl -import dpctl.tensor._copy_utils as dpt_cu -import dpctl.tensor._tensor_impl as dpt_ti import dpctl.utils as dpu import numpy @@ -381,32 +379,6 @@ cpdef long _get_linear_index(key, tuple shape, int ndim): return li -cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape) except *: - cdef shape_type_c input1_shape_orig = input1_shape - cdef shape_type_c input2_shape_orig = input2_shape - cdef shape_type_c result_shape - - # ex (8, 1, 6, 1) and (7, 1, 5) -> (8, 1, 6, 1) and (1, 7, 1, 5) - cdef size_t max_shape_size = max(input1_shape.size(), input2_shape.size()) - input1_shape.insert(input1_shape.begin(), max_shape_size - input1_shape.size(), 1) - input2_shape.insert(input2_shape.begin(), max_shape_size - input2_shape.size(), 1) - - # ex result (8, 7, 6, 5) - for it in range(max_shape_size): - if input1_shape[it] == input2_shape[it]: - result_shape.push_back(input1_shape[it]) - elif input1_shape[it] == 1: - result_shape.push_back(input2_shape[it]) - elif input2_shape[it] == 1: - result_shape.push_back(input1_shape[it]) - else: - err_msg = f"{ERROR_PREFIX} in function get_common_shape(): " - err_msg += f"operands could not be broadcast together with shapes {input1_shape_orig} {input2_shape_orig}" - raise ValueError(err_msg) - - return result_shape - - cdef dpnp_descriptor create_output_descriptor(shape_type_c output_shape, DPNPFuncType c_type, dpnp_descriptor requested_out, @@ -572,10 +544,9 @@ cdef (DPNPFuncType, void *) get_ret_type_and_func(DPNPFuncData kernel_data, cdef class dpnp_descriptor: - def __init__(self, obj, dpnp_descriptor orig_desc=None): + def __init__(self, obj): """ Initialize variables """ self.origin_pyobj = None - self.origin_desc = None self.descriptor = None self.dpnp_descriptor_data_size = 0 self.dpnp_descriptor_is_scalar = True @@ -594,10 +565,6 @@ cdef class dpnp_descriptor: self.origin_pyobj = obj - """ Keep track of a descriptor with original data """ - if orig_desc is not None and orig_desc.is_valid: - self.origin_desc = orig_desc - """ array size calculation """ cdef Py_ssize_t shape_it = 0 self.dpnp_descriptor_data_size = 1 @@ -657,14 +624,6 @@ cdef class dpnp_descriptor: def is_scalar(self): return self.dpnp_descriptor_is_scalar - @property - def is_temporary(self): - """ - Non-none descriptor of original data means the current descriptor - holds a temporary allocated data. - """ - return self.origin_desc is not None - @property def data(self): if self.is_valid: @@ -696,15 +655,6 @@ cdef class dpnp_descriptor: return interface_dict - def _copy_array_from(self, other_desc): - """ - Fill array data with usm_ndarray of the same shape from other DPNP descriptor - """ - if not isinstance(other_desc, dpnp_descriptor): - raise TypeError("expected dpnp_descriptor, got {}".format(type(other_desc))) - - dpt_cu._copy_same_shape(self.get_array(), other_desc.get_array()) - def get_pyobj(self): return self.origin_pyobj @@ -718,29 +668,6 @@ cdef class dpnp_descriptor: "expected either dpctl.tensor.usm_ndarray or dpnp.dpnp_array.dpnp_array, got {}" "".format(type(self.origin_pyobj))) - def get_result_desc(self, result_desc=None): - """ - Copy the result data into an original array - """ - if self.is_temporary: - # Original descriptor is not None, so copy the array data into it and return - from_desc = self if result_desc is None else result_desc - self.origin_desc._copy_array_from(from_desc) - return self.origin_desc - elif result_desc is not None: - # A temporary result descriptor was allocated, needs to copy data back into 'out' descriptor - self._copy_array_from(result_desc) - return self - - def is_array_overlapped(self, other_desc): - """ - Check if usm_ndarray overlaps an array from other DPNP descriptor - """ - if not isinstance(other_desc, dpnp_descriptor): - raise TypeError("expected dpnp_descriptor, got {}".format(type(other_desc))) - - return dpt_ti._array_overlap(self.get_array(), other_desc.get_array()) - cdef void * get_data(self): cdef Py_ssize_t item_size = 0 cdef Py_ssize_t elem_offset = 0 @@ -755,9 +682,6 @@ cdef class dpnp_descriptor: return < void * > val - cdef cpp_bool match_ctype(self, DPNPFuncType ctype): - return self.dtype == dpnp_DPNPFuncType_to_dtype(< size_t > ctype) - def __bool__(self): return self.is_valid diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index 199566295a34..944a4bd122d4 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -222,8 +222,6 @@ tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_ldexp tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_nextafter_combination tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_nextafter_float -tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_fmax_nan -tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_fmin_nan tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_negative tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_for_old_numpy diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 26b521905396..61f981c2b9cc 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -273,8 +273,6 @@ tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_ldexp tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_nextafter_combination tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_nextafter_float -tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_fmax_nan -tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_fmin_nan tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_negative tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_for_old_numpy diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index ae2c73748b56..54bc03d0a3f2 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -24,6 +24,7 @@ get_float_complex_dtypes, get_float_dtypes, get_integer_dtypes, + has_support_aspect16, has_support_aspect64, ) from .test_umath import ( @@ -1953,6 +1954,80 @@ def test_invalid_out(self, out): assert_raises(TypeError, numpy.divide, a.asnumpy(), 2, out) +class TestFmaxFmin: + @pytest.mark.skipif(not has_support_aspect16(), reason="no fp16 support") + @pytest.mark.parametrize("func", ["fmax", "fmin"]) + def test_half(self, func): + a = numpy.array([0, 1, 2, 4, 2], dtype=numpy.float16) + b = numpy.array([-2, 5, 1, 4, 3], dtype=numpy.float16) + c = numpy.array([0, -1, -numpy.inf, numpy.nan, 6], dtype=numpy.float16) + ia, ib, ic = dpnp.array(a), dpnp.array(b), dpnp.array(c) + + result = getattr(dpnp, func)(ia, ib) + expected = getattr(numpy, func)(a, b) + assert_equal(result, expected) + + result = getattr(dpnp, func)(ib, ic) + expected = getattr(numpy, func)(b, c) + assert_equal(result, expected) + + @pytest.mark.parametrize("func", ["fmax", "fmin"]) + @pytest.mark.parametrize("dtype", get_float_dtypes()) + def test_float_nans(self, func, dtype): + a = numpy.array([0, numpy.nan, numpy.nan], dtype=dtype) + b = numpy.array([numpy.nan, 0, numpy.nan], dtype=dtype) + ia, ib = dpnp.array(a), dpnp.array(b) + + result = getattr(dpnp, func)(ia, ib) + expected = getattr(numpy, func)(a, b) + assert_equal(result, expected) + + @pytest.mark.parametrize("func", ["fmax", "fmin"]) + @pytest.mark.parametrize("dtype", get_complex_dtypes()) + @pytest.mark.parametrize( + "nan_val", + [ + complex(numpy.nan, 0), + complex(0, numpy.nan), + complex(numpy.nan, numpy.nan), + ], + ids=["nan+0j", "nanj", "nan+nanj"], + ) + def test_complex_nans(self, func, dtype, nan_val): + a = numpy.array([0, nan_val, nan_val], dtype=dtype) + b = numpy.array([nan_val, 0, nan_val], dtype=dtype) + ia, ib = dpnp.array(a), dpnp.array(b) + + result = getattr(dpnp, func)(ia, ib) + expected = getattr(numpy, func)(a, b) + assert_equal(result, expected) + + @pytest.mark.parametrize("func", ["fmax", "fmin"]) + @pytest.mark.parametrize("dtype", get_float_dtypes(no_float16=False)) + def test_precision(self, func, dtype): + dtmin = numpy.finfo(dtype).min + dtmax = numpy.finfo(dtype).max + d1 = dtype(0.1) + d1_next = numpy.nextafter(d1, numpy.inf) + + test_cases = [ + # v1 v2 + (dtmin, -numpy.inf), + (dtmax, -numpy.inf), + (d1, d1_next), + (dtmax, numpy.nan), + ] + + for v1, v2 in test_cases: + a = numpy.array([v1]) + b = numpy.array([v2]) + ia, ib = dpnp.array(a), dpnp.array(b) + + result = getattr(dpnp, func)(ia, ib) + expected = getattr(numpy, func)(a, b) + assert_allclose(result, expected) + + class TestFloorDivide: @pytest.mark.usefixtures("suppress_divide_numpy_warnings") @pytest.mark.parametrize( diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index d38acc4a6570..44311813b185 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -639,8 +639,8 @@ def test_1in_1out(func, data, usm_type): pytest.param("dot", [3 + 2j, 4 + 1j, 5], [1, 2 + 3j, 3]), # TODO: uncomment once resolved in gh-1723 by dpctl # pytest.param("extract", [False, True, True, False], [0, 1, 2, 3]), - pytest.param("fmax", [[0.0, 1.0, 2.0]], [[3.0, 4.0, 5.0]]), - pytest.param("fmin", [[0.0, 1.0, 2.0]], [[3.0, 4.0, 5.0]]), + pytest.param("fmax", [0.0, 1.0, 2.0], [3.0, 4.0, 5.0]), + pytest.param("fmin", [0.0, 1.0, 2.0], [3.0, 4.0, 5.0]), pytest.param("fmod", [5, 3], [2, 2.0]), pytest.param( "gradient", [1, 2, 4, 7, 11, 16], [0.0, 1.0, 1.5, 3.5, 4.0, 6.0] @@ -651,8 +651,8 @@ def test_1in_1out(func, data, usm_type): pytest.param("inner", [1.0, 2.0, 3.0], [4.0, 5.0, 6.0]), pytest.param("kron", [3.0, 4.0, 5.0], [1.0, 2.0]), pytest.param("logaddexp", [[-1, 2, 5, 9]], [[4, -3, 2, -8]]), - pytest.param("maximum", [[0.0, 1.0, 2.0]], [[3.0, 4.0, 5.0]]), - pytest.param("minimum", [[0.0, 1.0, 2.0]], [[3.0, 4.0, 5.0]]), + pytest.param("maximum", [0.0, 1.0, 2.0], [3.0, 4.0, 5.0]), + pytest.param("minimum", [0.0, 1.0, 2.0], [3.0, 4.0, 5.0]), pytest.param("searchsorted", [11, 12, 13, 14, 15], [-10, 20, 12, 13]), pytest.param( "tensordot",