From b20deda1e2c2bb3781fd81b8e75160f6edce88cc Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 16 Aug 2024 15:16:55 +0200 Subject: [PATCH 1/6] Align handling of NaN values for axis not None with numpy --- dpnp/dpnp_iface_manipulation.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index 228b17c69eb2..249ff4f42ba5 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -188,7 +188,7 @@ def _unique_build_sort_indices(a, index_sh): """ - is_complex = dpnp.iscomplexobj(a) + is_inexact = dpnp.issubdtype(a, dpnp.inexact) if dpnp.issubdtype(a.dtype, numpy.unsignedinteger): ar_cmp = a.astype(dpnp.intp) elif dpnp.issubdtype(a.dtype, dpnp.bool): @@ -200,8 +200,24 @@ def compare_axis_elems(idx1, idx2): comp = dpnp.trim_zeros(ar_cmp[idx1] - ar_cmp[idx2], "f") if comp.shape[0] > 0: diff = comp[0] - if is_complex and dpnp.isnan(diff): - return True + if is_inexact and dpnp.isnan(diff): + isnan1 = dpnp.isnan(ar_cmp[idx1]) + if not isnan1.any(): # no NaN in ar_cmp[idx1] + return True # ar_cmp[idx1] goes to left + + isnan2 = dpnp.isnan(ar_cmp[idx2]) + if not isnan2.any(): # no NaN in ar_cmp[idx2] + return False # ar_cmp[idx1] goes to right + + # for complex all NaNs are considered equivalent + if (isnan1 & isnan2).all(): # NaNs at the same places + return False # ar_cmp[idx1] goes to right + + xor_nan_idx = dpnp.where(isnan1 ^ isnan2)[0] + if dpnp.isnan(ar_cmp[idx2][xor_nan_idx[0]]): + # first NaN in XOR mask is from ar_cmp[idx2] + return True # ar_cmp[idx1] goes to left + return False return diff < 0 return False From 15032b9e8565cf3c38a1589f62007dcc6a7bf555 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 16 Aug 2024 15:17:24 +0200 Subject: [PATCH 2/6] Add missing parametrize for test_sycl_queue.py::test_unique --- tests/test_sycl_queue.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index 4c13416606c7..6ef1d60bc886 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -2393,6 +2393,11 @@ def test_astype(device_x, device_y): @pytest.mark.parametrize("axis", [None, 0, -1]) +@pytest.mark.parametrize( + "device", + valid_devices, + ids=[device.filter_string for device in valid_devices], +) def test_unique(axis, device): a = numpy.array([[1, 1], [2, 3]]) ia = dpnp.array(a, device=device) From 659a156ee7a074225ba9efe401e7cd7a0e1ca6f1 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 16 Aug 2024 15:18:23 +0200 Subject: [PATCH 3/6] Remove obsolete test --- dpnp/dpnp_iface_manipulation.py | 2 +- tests/test_manipulation.py | 15 --------------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index 249ff4f42ba5..f088575e9e59 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -212,7 +212,7 @@ def compare_axis_elems(idx1, idx2): # for complex all NaNs are considered equivalent if (isnan1 & isnan2).all(): # NaNs at the same places return False # ar_cmp[idx1] goes to right - + xor_nan_idx = dpnp.where(isnan1 ^ isnan2)[0] if dpnp.isnan(ar_cmp[idx2][xor_nan_idx[0]]): # first NaN in XOR mask is from ar_cmp[idx2] diff --git a/tests/test_manipulation.py b/tests/test_manipulation.py index abea70f61806..e9eb711d83ca 100644 --- a/tests/test_manipulation.py +++ b/tests/test_manipulation.py @@ -88,21 +88,6 @@ def test_result_type_only_arrays(): assert dpnp.result_type(*X) == numpy.result_type(*X_np) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize( - "array", - [[1, 2, 3], [1, 2, 2, 1, 2, 4], [2, 2, 2, 2], []], - ids=["[1, 2, 3]", "[1, 2, 2, 1, 2, 4]", "[2, 2, 2, 2]", "[]"], -) -def test_unique(array): - np_a = numpy.array(array) - dpnp_a = dpnp.array(array) - - expected = numpy.unique(np_a) - result = dpnp.unique(dpnp_a) - assert_array_equal(result, expected) - - class TestRepeat: @pytest.mark.parametrize( "data", From 3850d2dabfc0478037bc25afe371a8e1400930e5 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 16 Aug 2024 15:19:03 +0200 Subject: [PATCH 4/6] Applied black formating --- dpnp/dpnp_iface_manipulation.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index f088575e9e59..4f692aa66a79 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -202,21 +202,21 @@ def compare_axis_elems(idx1, idx2): diff = comp[0] if is_inexact and dpnp.isnan(diff): isnan1 = dpnp.isnan(ar_cmp[idx1]) - if not isnan1.any(): # no NaN in ar_cmp[idx1] - return True # ar_cmp[idx1] goes to left + if not isnan1.any(): # no NaN in ar_cmp[idx1] + return True # ar_cmp[idx1] goes to left isnan2 = dpnp.isnan(ar_cmp[idx2]) - if not isnan2.any(): # no NaN in ar_cmp[idx2] - return False # ar_cmp[idx1] goes to right + if not isnan2.any(): # no NaN in ar_cmp[idx2] + return False # ar_cmp[idx1] goes to right # for complex all NaNs are considered equivalent - if (isnan1 & isnan2).all(): # NaNs at the same places - return False # ar_cmp[idx1] goes to right + if (isnan1 & isnan2).all(): # NaNs at the same places + return False # ar_cmp[idx1] goes to right xor_nan_idx = dpnp.where(isnan1 ^ isnan2)[0] if dpnp.isnan(ar_cmp[idx2][xor_nan_idx[0]]): # first NaN in XOR mask is from ar_cmp[idx2] - return True # ar_cmp[idx1] goes to left + return True # ar_cmp[idx1] goes to left return False return diff < 0 return False From 1c2975824e89d8d0d2cda27e9564e5ae6daf49c7 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 16 Aug 2024 15:30:33 +0200 Subject: [PATCH 5/6] Add a test with NaNs and axis not None --- dpnp/dpnp_iface_manipulation.py | 3 +++ pyproject.toml | 2 +- tests/test_manipulation.py | 45 +++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index 4f692aa66a79..a6e305979077 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -214,6 +214,9 @@ def compare_axis_elems(idx1, idx2): return False # ar_cmp[idx1] goes to right xor_nan_idx = dpnp.where(isnan1 ^ isnan2)[0] + if xor_nan_idx.size == 0: + return False + if dpnp.isnan(ar_cmp[idx2][xor_nan_idx[0]]): # first NaN in XOR mask is from ar_cmp[idx2] return True # ar_cmp[idx1] goes to left diff --git a/pyproject.toml b/pyproject.toml index 528ba40a4119..5b8c944b2c98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ exclude-protected = ["_create_from_usm_ndarray"] max-args = 11 max-locals = 30 max-branches = 15 -max-returns = 7 +max-returns = 8 [tool.pylint.format] max-line-length = 80 diff --git a/tests/test_manipulation.py b/tests/test_manipulation.py index e9eb711d83ca..991c5106af74 100644 --- a/tests/test_manipulation.py +++ b/tests/test_manipulation.py @@ -9,6 +9,7 @@ from .helper import ( get_all_dtypes, get_complex_dtypes, + get_float_complex_dtypes, get_float_dtypes, get_integer_dtypes, has_support_aspect64, @@ -733,3 +734,47 @@ def test_equal_nan(self, eq_nan_kwd): result = dpnp.unique(ia, **eq_nan_kwd) expected = numpy.unique(a, **eq_nan_kwd) assert_array_equal(result, expected) + + @pytest.mark.parametrize("dt", get_float_complex_dtypes()) + @pytest.mark.parametrize( + "axis_kwd", + [ + {}, + {"axis": 0}, + {"axis": 1}, + ], + ) + @pytest.mark.parametrize( + "return_kwds", + [ + {}, + { + "return_index": True, + "return_inverse": True, + "return_counts": True, + }, + ], + ) + @pytest.mark.parametrize( + "row", [[2, 3, 4], [2, numpy.nan, 4], [numpy.nan, 3, 4]] + ) + def test_2d_axis_nans(self, dt, axis_kwd, return_kwds, row): + a = numpy.array( + [ + [1, 0, 0], + [1, 0, 0], + [numpy.nan, numpy.nan, numpy.nan], + row, + [1, 0, 1], + [numpy.nan, numpy.nan, numpy.nan], + ] + ).astype(dt) + ia = dpnp.array(a) + + result = dpnp.unique(ia, **axis_kwd, **return_kwds) + expected = numpy.unique(a, **axis_kwd, **return_kwds) + if len(return_kwds) == 0: + assert_array_equal(result, expected) + else: + for iv, v in zip(result, expected): + assert_array_equal(iv, v) From bdb59a4891299b7757eda04d6b55fce31a3977e3 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 16 Aug 2024 15:40:32 +0200 Subject: [PATCH 6/6] For complex dtype the result may vary --- tests/third_party/cupy/manipulation_tests/test_add_remove.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/third_party/cupy/manipulation_tests/test_add_remove.py b/tests/third_party/cupy/manipulation_tests/test_add_remove.py index 4037b22cda30..36ed6f74f366 100644 --- a/tests/third_party/cupy/manipulation_tests/test_add_remove.py +++ b/tests/third_party/cupy/manipulation_tests/test_add_remove.py @@ -300,7 +300,7 @@ def test_unique_equal_nan(self, xp, dtype, equal_nan): [[2, xp.nan, 2], [xp.nan, 1, xp.nan], [xp.nan, 1, xp.nan]], dtype=dtype, ) - return xp.unique(a, axis=0, equal_nan=equal_nan) + return xp.unique(a, axis=1, equal_nan=equal_nan) @testing.parameterize(*testing.product({"trim": ["fb", "f", "b"]}))