diff --git a/Changelog.rst b/Changelog.rst index dc1af6e6ce..2b236bb907 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -3,6 +3,8 @@ version 3.17.0 **2024-??-??** +* New methods: `cf.Field.pad_missing` and `cf.Data.pad_missing` + (https://github.com/NCAS-CMS/cf-python/issues/717) * Fix occasional bug when calculating UGRID cell areas when non-spatial coordinates span the discrete axis (https://github.com/NCAS-CMS/cf-python/issues/721) diff --git a/cf/aggregate.py b/cf/aggregate.py index 6fe53c2c9f..f95814da38 100644 --- a/cf/aggregate.py +++ b/cf/aggregate.py @@ -3920,7 +3920,22 @@ def _sort_indices(m, canonical_axes): """ canonical_axes = [m.id_to_axis[identity] for identity in canonical_axes] sort_indices = tuple([m.sort_indices[axis] for axis in canonical_axes]) - needs_sorting = sort_indices != (slice(None),) * len(sort_indices) + + # Whether or not one or more of the axes needs sorting + needs_sorting = False + for sort_index in sort_indices: + # Note: sort_index can only be a slice object or a numpy array + # (see `_create_hash_and_first_values`) + if isinstance(sort_index, slice): + if sort_index != slice(None): + # sort_index is a slice other than slice(None) + needs_sorting = True + break + elif sort_index.size > 1: + # sort_index is an array of 2 or more integers + needs_sorting = True + break + return sort_indices, needs_sorting diff --git a/cf/data/data.py b/cf/data/data.py index a881eefe22..9c730b72d7 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -2180,6 +2180,109 @@ def mean_of_upper_decile( return d + @_inplace_enabled(default=False) + def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False): + """Pad an axis with missing data. + + :Parameters: + + axis: `int` + Select the axis for which the padding is to be + applied. + + *Parameter example:* + Pad second axis: ``axis=1``. + + *Parameter example:* + Pad the last axis: ``axis=-1``. + + {{pad_width: sequence of `int`, optional}} + + {{to_size: `int`, optional}} + + {{inplace: `bool`, optional}} + + :Returns: + + `Data` or `None` + The padded data, or `None` if the operation was + in-place. + + **Examples** + + >>> d = cf.Data(np.arange(6).reshape(2, 3)) + >>> print(d.array) + [[0 1 2] + [3 4 5]] + >>> e = d.pad_missing(1, (1, 2)) + >>> print(e.array) + [[-- 0 1 2 -- --] + [-- 3 4 5 -- --]] + >>> f = e.pad_missing(0, (0, 1)) + >>> print(f.array) + [[-- 0 1 2 -- --] + [-- 3 4 5 -- --] + [-- -- -- -- -- --]] + + >>> g = d.pad_missing(1, to_size=5) + >>> print(g.array) + [[0 1 2 -- --] + [3 4 5 -- --]] + + """ + if not 0 <= axis < self.ndim: + raise ValueError( + f"'axis' must be a valid dimension position. Got {axis}" + ) + + if to_size is not None: + # Set pad_width from to_size + if pad_width is not None: + raise ValueError("Can't set both 'pad_width' and 'to_size'") + + pad_width = (0, to_size - self.shape[axis]) + elif pad_width is None: + raise ValueError("Must set either 'pad_width' or 'to_size'") + + pad_width = np.asarray(pad_width) + if pad_width.shape != (2,) or not pad_width.dtype.kind == "i": + raise ValueError( + "'pad_width' must be a sequence of two integers. " + f"Got: {pad_width}" + ) + + pad_width = tuple(pad_width) + if any(n < 0 for n in pad_width): + if to_size is not None: + raise ValueError( + f"'to_size' ({to_size}) must not be smaller than the " + f"original axis size ({self.shape[axis]})" + ) + + raise ValueError( + f"Can't set a negative number of pad values. Got: {pad_width}" + ) + + d = _inplace_enabled_define_and_cleanup(self) + + dx = d.to_dask_array() + mask0 = da.ma.getmaskarray(dx) + + pad = [(0, 0)] * dx.ndim + pad[axis] = pad_width + + # Pad the data with zero. This will lose the original mask. + dx = da.pad(dx, pad, mode="constant", constant_values=0) + + # Pad the mask with True + mask = da.pad(mask0, pad, mode="constant", constant_values=True) + + # Set the mask + dx = da.ma.masked_where(mask, dx) + + d._set_dask(dx) + return d + @_inplace_enabled(default=False) def percentile( self, diff --git a/cf/docstring/docstring.py b/cf/docstring/docstring.py index 172ed469e4..28a7360df4 100644 --- a/cf/docstring/docstring.py +++ b/cf/docstring/docstring.py @@ -587,6 +587,14 @@ "{{weights auto: `bool`, optional}}": """auto: `bool`, optional If True then return `False` if weights can't be found, rather than raising an exception.""", + # pad_width + "{{pad_width: sequence of `int`, optional}}": """pad_width: sequence of `int`, optional + Number of values to pad before and after the edges of + the axis.""", + # to_size + "{{to_size: `int`, optional}}": """to_size: `int`, optional + Pad the axis after so that the new axis has the given + size.""", # ---------------------------------------------------------------- # Method description substitutions (4 levels of indentation) # ---------------------------------------------------------------- diff --git a/cf/field.py b/cf/field.py index cd5dcc3729..9ce8d88c6d 100644 --- a/cf/field.py +++ b/cf/field.py @@ -11953,6 +11953,136 @@ def halo( return f + @_inplace_enabled(default=False) + def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False): + """Pad an axis with missing data. + + The field's data and all metadata constructs that span the + axis are padded. + + .. versionadded:: 3.17.0 + + :Parameters: + + axis: `str` or `int` + Select the domain axis which is to be padded, defined + by that which would be selected by passing the given + axis description to a call of the field construct's + `domain_axis` method. For example, for a value of + ``'X'``, the domain axis construct returned by + ``f.domain_axis('X')`` is selected. + + {{pad_width: sequence of `int`, optional}} + + {{to_size: `int`, optional}} + + {{inplace: `bool`, optional}} + + :Returns: + + `Field` or `None` + The padded field construct, or `None` if the operation + was in-place. + + **Examples* + + >>> f = cf.example_field(6) + >>> print(f) + Field: precipitation_amount (ncvar%pr) + -------------------------------------- + Data : precipitation_amount(cf_role=timeseries_id(2), time(4)) + Dimension coords: time(4) = [2000-01-16 12:00:00, ..., 2000-04-15 00:00:00] gregorian + Auxiliary coords: latitude(cf_role=timeseries_id(2)) = [25.0, 7.0] degrees_north + : longitude(cf_role=timeseries_id(2)) = [10.0, 40.0] degrees_east + : cf_role=timeseries_id(cf_role=timeseries_id(2)) = [x1, y2] + : altitude(cf_role=timeseries_id(2), 3, 4) = [[[1.0, ..., --]]] m + Coord references: grid_mapping_name:latitude_longitude + >>> print(f.array) + [[1. 2. 3. 4.] + [5. 6. 7. 8.]] + >>> g = f.pad_missing('T', (0, 5)) + >>> print(g) + Field: precipitation_amount (ncvar%pr) + -------------------------------------- + Data : precipitation_amount(cf_role=timeseries_id(2), time(9)) + Dimension coords: time(9) = [2000-01-16 12:00:00, ..., --] gregorian + Auxiliary coords: latitude(cf_role=timeseries_id(2)) = [25.0, 7.0] degrees_north + : longitude(cf_role=timeseries_id(2)) = [10.0, 40.0] degrees_east + : cf_role=timeseries_id(cf_role=timeseries_id(2)) = [x1, y2] + : altitude(cf_role=timeseries_id(2), 3, 4) = [[[1.0, ..., --]]] m + Coord references: grid_mapping_name:latitude_longitude + >>> print(g.array) + [[1.0 2.0 3.0 4.0 -- -- -- -- --] + [5.0 6.0 7.0 8.0 -- -- -- -- --]] + >>> h = g.pad_missing('cf_role=timeseries_id', (0, 1)) + >>> print(h) + Field: precipitation_amount (ncvar%pr) + -------------------------------------- + Data : precipitation_amount(cf_role=timeseries_id(3), time(9)) + Dimension coords: time(9) = [2000-01-16 12:00:00, ..., --] gregorian + Auxiliary coords: latitude(cf_role=timeseries_id(3)) = [25.0, 7.0, --] degrees_north + : longitude(cf_role=timeseries_id(3)) = [10.0, 40.0, --] degrees_east + : cf_role=timeseries_id(cf_role=timeseries_id(3)) = [x1, y2, --] + : altitude(cf_role=timeseries_id(3), 3, 4) = [[[1.0, ..., --]]] m + Coord references: grid_mapping_name:latitude_longitude + >>> print(h.array) + [[1.0 2.0 3.0 4.0 -- -- -- -- --] + [5.0 6.0 7.0 8.0 -- -- -- -- --] + [ -- -- -- -- -- -- -- -- --]] + + >>> print(f.pad_missing('time', to_size=6)) + Field: precipitation_amount (ncvar%pr) + -------------------------------------- + Data : precipitation_amount(cf_role=timeseries_id(2), time(6)) + Dimension coords: time(6) = [2000-01-16 12:00:00, ..., --] gregorian + Auxiliary coords: latitude(cf_role=timeseries_id(2)) = [25.0, 7.0] degrees_north + : longitude(cf_role=timeseries_id(2)) = [10.0, 40.0] degrees_east + : cf_role=timeseries_id(cf_role=timeseries_id(2)) = [x1, y2] + : altitude(cf_role=timeseries_id(2), 3, 4) = [[[1.0, ..., --]]] m + Coord references: grid_mapping_name:latitude_longitude + + """ + f = _inplace_enabled_define_and_cleanup(self) + + try: + axis1 = f._parse_axes(axis) + except ValueError: + raise ValueError( + f"Can't pad_missing: Bad axis specification: {axis!r}" + ) + + if len(axis1) != 1: + raise ValueError( + f"Can't pad_missing: Bad axis specification: {axis!r}" + ) + + data_axes = f.get_data_axes() + axis = axis1[0] + iaxis = data_axes.index(axis) + + # Pad the field + super(Field, f).pad_missing( + iaxis, pad_width=pad_width, to_size=to_size, inplace=True + ) + + # Set new domain axis size + domain_axis = f.domain_axis(axis) + domain_axis.set_size(f.shape[iaxis]) + + data_axes = f.constructs.data_axes() + for key, construct in f.constructs.filter_by_data(todict=True).items(): + construct_axes = data_axes[key] + if axis not in construct_axes: + continue + + # Pad the construct + iaxis = construct_axes.index(axis) + construct.pad_missing( + iaxis, pad_width=pad_width, to_size=to_size, inplace=True + ) + + return f + def percentile( self, ranks, diff --git a/cf/mixin/propertiesdata.py b/cf/mixin/propertiesdata.py index de0706251b..80269e11e5 100644 --- a/cf/mixin/propertiesdata.py +++ b/cf/mixin/propertiesdata.py @@ -1888,6 +1888,38 @@ def minimum(self): "ERROR: Can't get the minimum when there is no data array" ) + @_inplace_enabled(default=False) + def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False): + """Pad an axis with missing data. + + :Parameters: + + axis: `int` + Select the axis for which the padding is to be + applied. + + {{pad_width: sequence of `int`, optional}} + + {{to_size: `int`, optional}} + + {{inplace: `bool`, optional}} + + :Returns: + + `{{class}}` or `None` + The {{class}} with padded data, or `None` if the + operation was in-place. + + """ + return self._apply_data_oper( + _inplace_enabled_define_and_cleanup(self), + "pad_missing", + axis=axis, + pad_width=pad_width, + to_size=to_size, + inplace=inplace, + ) + def period(self, *value, **config): """Return or set the period of the data. diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index 4f854a4b6d..15434785ce 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -3874,6 +3874,40 @@ def inspect(self): """ print(cf_inspect(self)) # pragma: no cover + @_inplace_enabled(default=False) + def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False): + """Pad an axis with missing data. + + :Parameters: + + axis: `int` + Select the axis for which the padding is to be + applied. + + {{pad_width: sequence of `int`, optional}} + + {{to_size: `int`, optional}} + + {{inplace: `bool`, optional}} + + :Returns: + + `{{class}}` or `None` + The {{class}} with padded data, or `None` if the + operation was in-place. + + """ + return self._apply_superclass_data_oper( + _inplace_enabled_define_and_cleanup(self), + "pad_missing", + bounds=True, + interior_ring=True, + axis=axis, + pad_width=pad_width, + to_size=to_size, + inplace=inplace, + ) + def period(self, *value, **config): """Return or set the period for cyclic values. diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 2d439ba695..53656208ef 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -4731,6 +4731,31 @@ def test_Data_sparse_array(self): with self.assertRaises(ValueError): cf.Data(s, mask=mask) + def test_Data_pad_missing(self): + """Test Data.pad_missing.""" + d = cf.Data(np.arange(6).reshape(2, 3)) + + g = d.pad_missing(1, to_size=5) + self.assertEqual(g.shape, (2, 5)) + self.assertTrue(g[:, 3:].mask.all()) + + self.assertIsNone(d.pad_missing(1, pad_width=(1, 2), inplace=True)) + self.assertEqual(d.shape, (2, 6)) + self.assertTrue(d[:, 0].mask.all()) + self.assertTrue(d[:, 4:].mask.all()) + + e = d.pad_missing(0, pad_width=(0, 1)) + self.assertEqual(e.shape, (3, 6)) + self.assertTrue(e[2, :].mask.all()) + + # Can't set both pad_width and to_size + with self.assertRaises(ValueError): + d.pad_missing(0, pad_width=(0, 1), to_size=99) + + # Axis out of bounds + with self.assertRaises(ValueError): + d.pad_missing(99, to_size=99) + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) diff --git a/cf/test/test_Field.py b/cf/test/test_Field.py index 0bdece56c7..6f528642fa 100644 --- a/cf/test/test_Field.py +++ b/cf/test/test_Field.py @@ -2669,6 +2669,23 @@ def test_Field_file_location(self): f.del_file_location("/invalid") self.assertEqual(f.file_locations(), set((location,))) + def test_Field_pad_missing(self): + """Test Field.pad_missing.""" + f = cf.example_field(0) + + g = f.pad_missing("X", to_size=10) + self.assertEqual(g.shape, (5, 10)) + self.assertTrue(g[:, 8:].mask.all()) + + self.assertIsNone(f.pad_missing("X", pad_width=(1, 2), inplace=True)) + self.assertEqual(f.shape, (5, 11)) + self.assertTrue(f[:, 0].mask.all()) + self.assertTrue(f[:, 9:].mask.all()) + + g = f.pad_missing("Y", pad_width=(0, 1)) + self.assertEqual(g.shape, (6, 11)) + self.assertTrue(g[5, :].mask.all()) + if __name__ == "__main__": print("Run date:", datetime.datetime.now())