diff --git a/Changelog.rst b/Changelog.rst index c0f56d65e5..92eaf67731 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -3,6 +3,9 @@ version 3.15.4 **2023-??-??** +* Record dimension coordinate cell characteristics + (https://github.com/NCAS-CMS/cf-python/issues/692) +* New set of methods to query, set, del, and `get_cell_characterstics` * Fix bug in `cf.Field.match_by_construct` that always returned True for 1-d constructs whose axis is not in the data, even when the criterion was not matched diff --git a/cf/aggregate.py b/cf/aggregate.py index 2a0df913b6..2771fbd7cb 100644 --- a/cf/aggregate.py +++ b/cf/aggregate.py @@ -2311,7 +2311,7 @@ def aggregate( **Units** Units must be provided on the conditions where applicable, - since conditions without defined units do not match + since conditions without defined units will not match dimension coordinate constructs with defined units. **Multiple conditions** @@ -2358,18 +2358,26 @@ def aggregate( >>> x = cf.aggregate(fl, cells=cf.climatology_cells()) + **Storage of conditions** + + All returned field or domain constructs that have passed + dimension coordinate cell conditions will have those + conditions stored on the appropriate dimension coordinate + constructs, retrievable via their + `DimensionCoordinate.get_cell_characteristics` methods. + **Performance** The testing of the conditions has a computational overhead, as well as an I/O overhead if the dimension - coordinate data are on disk. Try to avoid setting redundant - conditions. For instance, if the inputs comprise monthly mean air - temperature and daily mean precipitation fields, then the - different field identities alone will ensure a correct - aggregation. In this case, adding cell conditions of - ``{'T': [{'cellsize': cf.D()}, {'cellsize': cf.M()}]}`` - will not change the result, but tests will still be - carried out. + coordinate data are on disk. Try to avoid setting + redundant conditions. For instance, if the inputs comprise + monthly mean air temperature and daily mean precipitation + fields, then the different field identities alone will + ensure a correct aggregation. In this case, adding cell + conditions of ``{'T': [{'cellsize': cf.D()}, {'cellsize': + cf.M()}]}`` will not change the result, but tests will + still be carried out. When setting a sequence of conditions, performance will be improved if the conditions towards the beginning of the @@ -2489,9 +2497,9 @@ def aggregate( # Initialise the cache of canonical metadata attributes canonical = _Canonical() - output_constructs = [] - - output_constructs_append = output_constructs.append + output_meta = [] + output_meta_append = output_meta.append + output_meta_extend = output_meta.extend if exclude: exclude = " NOT" @@ -2655,10 +2663,10 @@ def aggregate( # This field does not have a structural signature, so # it can't be aggregated. Put it straight into the # output list and move on to the next input construct. - if not copy: - output_constructs_append(f) - else: - output_constructs_append(f.copy()) + if copy: + meta = meta.copy() + + output_meta_append(meta) continue @@ -2724,11 +2732,10 @@ def aggregate( # add it straight to the output list and move on to the # next signature. # -------------------------------------------------------- - if not copy: - output_constructs_append(meta[0].field) - else: - output_constructs_append(meta[0].field.copy()) + if copy: + meta[0] = meta[0].copy() + output_meta_append(meta[0]) continue if not relaxed_units and not meta[0].units.isvalid: @@ -2741,9 +2748,9 @@ def aggregate( if not exclude: if copy: - output_constructs.extend(m.field.copy() for m in meta) + output_meta_extend(m.copy() for m in meta) else: - output_constructs.extend(m.field for m in meta) + output_meta_extend(meta) continue @@ -3035,11 +3042,16 @@ def aggregate( status = 1 if not exclude: if copy: - output_constructs.extend((m.field.copy() for m in meta0)) + output_meta_extend(m.copy() for m in meta0) else: - output_constructs.extend((m.field for m in meta0)) + output_meta_extend(meta0) else: - output_constructs.extend((m.field for m in meta)) + output_meta_extend(meta) + + if cells: + _set_cell_conditions(output_meta) + + output_constructs = [m.field for m in output_meta] aggregate.status = status @@ -3058,6 +3070,52 @@ def aggregate( return output_constructs +def _set_cell_conditions(output_meta): + """Store the cell characteristics from any cell conditions. + + The cell size and cell spacing characteristics are stored on the + appropriate dimension coordinate constructs. + + .. versionadded:: 3.15.4 + + :Parameters: + + output_meta: `list` + The list of `_Meta` objects, each of which contains an + output field or domain construct. The field or constructs + are updated in-place. + + :Returns: + + `None` + + """ + for m in output_meta: + for value in m.axis.values(): + dim_index = value["dim_coord_index"] + if dim_index is None: + # There is no dimension coordinate construct for this + # axis + continue + + cellsize = value["cellsize"][dim_index] + if cellsize is None: + # There is no cell size condition + continue + + spacing = value["spacing"][dim_index] + if spacing is None: + # There is no cell spacing condition + continue + + # Set the cell conditions on the dimension coordinate + # construct + dim_coord = m.field.dimension_coordinate(value["keys"][dim_index]) + dim_coord.set_cell_characteristics( + cellsize=cellsize, spacing=spacing + ) + + # -------------------------------------------------------------------- # Initialise the status # -------------------------------------------------------------------- @@ -3179,7 +3237,7 @@ def climatology_cells( {'cellsize': }, {'cellsize': }]} - Add a condition that separately aggregates decadal data: + Add a condition for decadal data: >>> cells['T'].append({'cellsize': cf.wi(3600, 3660, 'day')}) >>> cells @@ -4169,6 +4227,7 @@ def _aggregate_2_fields( verbose=None, concatenate=True, data_concatenation=None, + cell_conditions=None, relaxed_units=False, copy=True, ): @@ -4192,7 +4251,8 @@ def _aggregate_2_fields( data_concatenation: `dict` The dictionary that contains the data arrays for each - construct type that will need concatenating. + construct type that will need concatenating. Will be + updated in-place. .. versionadded:: 3.15.1 @@ -4248,10 +4308,6 @@ def _aggregate_2_fields( hash_values1 = m1.hash_values[a_identity] for i, (hash0, hash1) in enumerate(zip(hash_values0, hash_values1)): - # try: - # hash_values0[i].append(hash_values1[i]) - # except AttributeError: - # hash_values0[i] = [hash_values0[i], hash_values1[i]] hash_values0[i] = hash_values0[i] + hash_values1[i] # N-d auxiliary coordinates diff --git a/cf/dimensioncoordinate.py b/cf/dimensioncoordinate.py index b572129587..fd4229a1b4 100644 --- a/cf/dimensioncoordinate.py +++ b/cf/dimensioncoordinate.py @@ -55,6 +55,61 @@ def __new__(cls, *args, **kwargs): instance._Bounds = Bounds return instance + def __init__( + self, + properties=None, + data=None, + bounds=None, + geometry=None, + interior_ring=None, + source=None, + copy=True, + _use_data=True, + ): + """**Initialisation** + + :Parameters: + + {{init properties: `dict`, optional}} + + *Parameter example:* + ``properties={'standard_name': 'time'}`` + + {{init data: data_like, optional}} + + {{init bounds: `Bounds`, optional}} + + {{init geometry: `str`, optional}} + + {{init interior_ring: `InteriorRing`, optional}} + + {{init source: optional}} + + {{init copy: `bool`, optional}} + + """ + super().__init__( + properties=properties, + data=data, + bounds=bounds, + geometry=geometry, + interior_ring=interior_ring, + source=source, + copy=copy, + _use_data=_use_data, + ) + + if source: + # Reset cell characteristics after set_data/set_bounds has + # removed them + try: + chars = source._get_component("cell_characteristics", None) + except AttributeError: + chars = None + + if chars is not None: + self._set_component("cell_characteristics", chars, copy=False) + def __repr__(self): """Called by the `repr` built-in function. @@ -710,6 +765,41 @@ def create_bounds( return bounds + def del_cell_characteristics(self, default=ValueError()): + """Remove the cell characteristics. + + A cell characteristic is assumed to be valid for each cell. Cell + characteristics are not inferred from the coordinate or bounds + data, but may be defined with the `set_cell_characteristics` + method. Cell characteristics are automatically removed + whenever the new data or bounds are set with `set_data` or + `set_bounds` respectively. + + .. versionadded:: 3.15.4 + + .. seealso:: `get_cell_characteristics`, + `has_cell_characteristics`, + `set_cell_characteristics` + + :Parameters: + + default: optional + Return the value of the *default* parameter if cell + characteristics have not been set. + + {{default Exception}} + + :Returns: + + `dict` + The removed cell size characteristics, as would have + been returned by `get_cell_characteristics`. + + """ + out = self.get_cell_characteristics(default=default) + self._del_component("cell_characteristics", default=None) + return out + @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) def flip(self, axes=None, inplace=False, i=False): @@ -776,66 +866,80 @@ def get_bounds(self, default=ValueError(), **kwargs): return super().get_bounds(default=default) - # def autoperiod(self, verbose=False): - # '''TODO Set dimensions to be cyclic. - # - # TODO A dimension is set to be cyclic if it has a unique longitude (or - # grid longitude) dimension coordinate construct with bounds and the - # first and last bounds values differ by 360 degrees (or an - # equivalent amount in other units). - # - # .. versionadded:: 3.0.0 - # - # .. seealso:: `isperiodic`, `period` - # - # :Parameters: - # - # TODO - # - # :Returns: - # - # `bool` - # - # **Examples** - # - # >>> f.autocyclic() - # - # ''' - # if not self.Units.islongitude: - # if verbose: - # print(0) - # if (self.get_property('standard_name', None) not in - # ('longitude', 'grid_longitude')): - # if verbose: - # print(1) - # return False - # - # bounds = self.get_bounds(None) - # if bounds is None: - # if verbose: - # print(2) - # return False - # - # bounds_data = bounds.get_data(None) - # if bounds_data is None: - # if verbose: - # print(3) - # return False - # - # bounds = bounds_data.array - # - # period = Data(360.0, units='degrees') - # - # period.Units = bounds_data.Units - # - # if abs(bounds[-1, -1] - bounds[0, 0]) != period.array: - # if verbose: - # print(4) - # return False - # - # self.period(period) - # - # return True + def get_cell_characteristics(self, default=ValueError()): + """Return cell characteristics. + + A cell characteristic is assumed to be valid for each cell. Cell + characteristics are not inferred from the coordinate or bounds + data, but may be defined with the `set_cell_characteristics` + method. Cell characteristics are automatically removed + whenever the new data or bounds are set with `set_data` or + `set_bounds` respectively. + + .. versionadded:: 3.15.4 + + .. seealso:: `del_cell_characteristics`, + `has_cell_characteristics`, + `set_cell_characteristics` + + :Parameters: + + default: optional + Return the value of the *default* parameter if cell + characteristics have not been set. + + {{default Exception}} + + :Returns: + + `dict` + The cell size characteristic (i.e. the absolute + difference between the cell bounds) and cell spacing + characteristic (i.e. the absolute difference between + two neighbouring coordinate values), with keys + ``'cellsize'`` and ``'spacing'`` respectively. If + either has a value of `None` then no characteristic + has been stored for that type. + + """ + out = self._get_component("cell_characteristics", default=None) + if out is None: + if default is None: + return + + return self._default( + default, + f"{self.__class__.__name__} has no 'cell_characteristics' " + "component", + ) + + from copy import deepcopy + + return deepcopy(out) + + def has_cell_characteristics(self): + """Whether or not there are any cell characteristics. + + A cell characteristic is assumed to be valid for each cell. Cell + characteristics are not inferred from the coordinate or bounds + data, but may be defined with the `set_cell_characteristics` + method. Cell characteristics are automatically removed + whenever the new data or bounds are set with `set_data` or + `set_bounds` respectively. + + .. versionadded:: 3.15.4 + + .. seealso:: `del_cell_characteristics`, + `get_cell_characteristics`, + `set_cell_characteristics` + + :Returns: + + `bool` + Whether or not there are any cell characteristics. + + """ + return self._has_component("cell_characteristics") @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) @@ -852,7 +956,6 @@ def roll(self, axis, shift, inplace=False, i=False): # Null roll return c - # period = self._custom.get('period') period = c.period() if period is None: @@ -908,6 +1011,164 @@ def roll(self, axis, shift, inplace=False, i=False): return c + def set_bounds(self, bounds, copy=True): + """Set the bounds. + + .. versionadded:: 3.15.4 + + .. seealso: `del_bounds`, `get_bounds`, `has_bounds`, `set_data` + + :Parameters: + + bounds: `Bounds` + The bounds to be inserted. + + copy: `bool`, optional + If True then copy the bounds prior to + insertion. By default the bounds are copied. + + :Returns: + + `None` + + **Examples** + + >>> import numpy + >>> b = {{package}}.Bounds(data=numpy.arange(10).reshape(5, 2)) + >>> c.set_bounds(b) + >>> c.has_bounds() + True + >>> c.get_bounds() + + >>> b = c.del_bounds() + >>> b + + >>> c.has_bounds() + False + >>> print(c.get_bounds(None)) + None + >>> print(c.del_bounds(None)) + None + + """ + self._del_component("cell_characteristics", default=None) + super().set_bounds(bounds, copy=copy) + + def set_cell_characteristics(self, cellsize, spacing): + """Set cell characteristics. + + A cell characteristic is assumed to be valid for each cell. Cell + characteristics are not inferred from the coordinate or bounds + data, but may be set with this method. Cell characteristics + are automatically removed whenever the new data or bounds are + set with `set_data` or `set_bounds` respectively. + + .. versionadded:: 3.15.4 + + .. seealso:: `del_cell_characteristics`, + `get_cell_characteristics`, + `has_cell_characteristics` + + :Parameters: + + cellsize: + The cell size (i.e. the absolute difference between + the cell bounds) characteristic. May be a `Query`, + `TimeDuration`, scalar `Data`, scalar data_like + object, or `None`. A value of `None` means no + characteristc has been set. + + spacing: + The cell spacing (i.e. the absolute difference between + two neighbouring coordinate values) characteristic. + May be a `Query`, `TimeDuration`, scalar `Data`, + scalar data_like object, or `None`. A value of `None` + means no characteristc has been set. + + :Returns: + + `None` + + **Examples** + + >>> d.set_cell_characteristics(cellsize=cf.D(5), spacing=cf.D(1)) + + >>> d.set_cell_characteristics(cf.Data(10, 'degree_E'), None) + + >>> d.set_cell_characteristics(cf.wi(100, 200), 150) + + """ + chars = {} + if cellsize is not None: + chars["cellsize"] = cellsize + + if spacing is not None: + chars["spacing"] = spacing + + if not chars: + self.del_cell_characteristics(None) + return + + self._set_component("cell_characteristics", chars, copy=False) + + def set_data(self, data, copy=True, inplace=True): + """Set the data. + + The units, calendar and fill value of the incoming `Data` + instance are removed prior to insertion. + + .. versionadded:: 3.15.4 + + .. seealso:: `data`, `del_data`, `get_data`, `has_data` + + :Parameters: + + data: `Data` + The data to be inserted. + + {{data_like}} + + copy: `bool`, optional + If True then copy the data prior to + insertion. By default the data are copied. + + {{inplace: `bool`, optional (default True)}} + + .. versionadded:: 3.7.0 + + :Returns: + + `None` or `{{class}}` + If the operation was in-place then `None` is returned, + otherwise return a new `{{class}}` instance containing + the new data. + + **Examples** + + >>> f = cf.{{class}}() + >>> f.set_data([1, 2, 3]) + >>> f.has_data() + True + >>> f.get_data() + + >>> f.data + + >>> f.del_data() + + >>> g = f.set_data([4, 5, 6], inplace=False) + >>> g.data + + >>> f.has_data() + False + >>> print(f.get_data(None)) + None + >>> print(f.del_data(None)) + None + + """ + self._del_component("cell_characteristics", default=None) + return super().set_data(data, copy=copy, inplace=inplace) + # ---------------------------------------------------------------- # Deprecated attributes and methods # ---------------------------------------------------------------- diff --git a/cf/test/test_DimensionCoordinate.py b/cf/test/test_DimensionCoordinate.py index 44d6055b7d..9244a202b1 100644 --- a/cf/test/test_DimensionCoordinate.py +++ b/cf/test/test_DimensionCoordinate.py @@ -696,6 +696,47 @@ def test_DimensionCoordinate_create_regular(self): ) self.assertEqual(longitude_decreasing_no_bounds.units, "degrees_east") + def test_DimensionCoordinate_cell_characteristics(self): + """Test the `cell_characteristic` DimensionCoordinate methods.""" + d = self.dim.copy() + self.assertFalse(d.has_cell_characteristics()) + self.assertIsNone(d.get_cell_characteristics(None)) + self.assertIsNone(d.set_cell_characteristics(cellsize=5, spacing=None)) + self.assertTrue(d.has_cell_characteristics()) + self.assertEqual( + d.get_cell_characteristics(), + {"cellsize": 5}, + ) + self.assertEqual(d.del_cell_characteristics(), {"cellsize": 5}) + self.assertIsNone(d.del_cell_characteristics(None)) + + # Copy preserves cell charactersitics + d.set_cell_characteristics(1, 2) + e = d.copy() + self.assertEqual( + d.get_cell_characteristics(), e.get_cell_characteristics() + ) + d.set_cell_characteristics(3, 4) + self.assertNotEqual( + d.get_cell_characteristics(), e.get_cell_characteristics() + ) + + # set_data clears cell characteristics + d.set_data(d.data) + self.assertIsNone( + d.get_cell_characteristics(None), + ) + + # set_bounds clears cell characteristics + self.assertIsNone(d.set_cell_characteristics(spacing=2, cellsize=1)) + self.assertEqual( + d.get_cell_characteristics(), + {"cellsize": 1, "spacing": 2}, + ) + d.set_bounds(d.bounds) + with self.assertRaises(ValueError): + d.get_cell_characteristics() + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) diff --git a/cf/test/test_aggregate.py b/cf/test/test_aggregate.py index 56104e4bcf..cd95ad72eb 100644 --- a/cf/test/test_aggregate.py +++ b/cf/test/test_aggregate.py @@ -363,7 +363,19 @@ def test_aggregate_cells(self): } }, ): - self.assertEqual(len(cf.aggregate(fl, cells=cells)), 1) + x = cf.aggregate(fl, cells=cells) + self.assertEqual(len(x), 1) + + # Test storage of cell conditions + x = x[0] + lat = x.dimension_coordinate("latitude") + chars = lat.get_cell_characteristics() + self.assertTrue(chars["cellsize"].equals(cf.wi(30, 60, "degrees_N"))) + self.assertTrue(chars["spacing"].equals(cf.set([30, 45], "degrees_N"))) + for identity in ("longitude", "time"): + self.assertIsNone( + x.dimension_coordinate(identity).get_cell_characteristics(None) + ) for cells in ( {"Y": {"cellsize": cf.wi(39, 60, "km")}}, diff --git a/docs/source/class/cf.DimensionCoordinate.rst b/docs/source/class/cf.DimensionCoordinate.rst index d7910611a4..4033d7d950 100644 --- a/docs/source/class/cf.DimensionCoordinate.rst +++ b/docs/source/class/cf.DimensionCoordinate.rst @@ -338,7 +338,6 @@ Cyclicity :toctree: ../method/ :template: method.rst - ~cf.DimensionCoordinate.rechunk ~cf.DimensionCoordinate.close ~cf.DimensionCoordinate.convert_reference_time @@ -365,7 +364,11 @@ Miscellaneous ~cf.DimensionCoordinate.inspect ~cf.DimensionCoordinate.to_memory ~cf.DimensionCoordinate.uncompress - + ~cf.DimensionCoordinate.del_cell_characteristics + ~cf.DimensionCoordinate.get_cell_characteristics + ~cf.DimensionCoordinate.has_cell_characteristics + ~cf.DimensionCoordinate.set_cell_characteristics + Mathematical operations -----------------------