diff --git a/Changelog.rst b/Changelog.rst index 40899cedf4..53e8886d84 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -1,10 +1,16 @@ version 3.17.0 -------------- -**2025-02-??** +**2025-??-??** +* Replace dataset aggregation functionality (CFA) with that imported + from `cfdm` (https://github.com/NCAS-CMS/cf-python/issues/841) * New keyword parameter to `cf.Field.compute_vertical_coordinates`: ``key`` (https://github.com/NCAS-CMS/cf-python/issues/802) +* Changed dependency: ``1.12.0.0<=cfdm<1.12.1.0`` +* Changed dependency: ``h5py>=3.12.0`` + +---- version 3.16.3 diff --git a/cf/__init__.py b/cf/__init__.py index 8fae51223c..05066c691d 100644 --- a/cf/__init__.py +++ b/cf/__init__.py @@ -105,7 +105,6 @@ raise ImportError(_error0 + str(error1)) __cf_version__ = cfdm.core.__cf_version__ -__cfa_version__ = "0.6.2" from packaging.version import Version import importlib.util @@ -207,13 +206,10 @@ ) # Check the version of cfdm -_minimum_vn = "1.11.2.0" -_maximum_vn = "1.11.3.0" -if ( - not Version(_minimum_vn) - <= Version(cfdm.__version__) - < Version(_maximum_vn) -): +_minimum_vn = "1.12.0.0" +_maximum_vn = "1.12.1.0" +_cfdm_version = Version(cfdm.__version__) +if not Version(_minimum_vn) <= _cfdm_version < Version(_maximum_vn): raise RuntimeError( f"Bad cfdm version: cf requires {_minimum_vn}<=cfdm<{_maximum_vn}. " f"Got {cfdm.__version__} at {cfdm.__file__}" @@ -291,10 +287,9 @@ from .field import Field from .data import Data from .data.array import ( + AggregatedArray, BoundsFromNodesArray, CellConnectivityArray, - CFAH5netcdfArray, - CFANetCDF4Array, FullArray, GatheredArray, H5netcdfArray, @@ -308,12 +303,6 @@ UMArray, ) -from .data.fragment import ( - FullFragmentArray, - NetCDFFragmentArray, - UMFragmentArray, -) - from .aggregate import aggregate, climatology_cells from .query import ( Query, diff --git a/cf/aggregate.py b/cf/aggregate.py index c5207e5427..952861b210 100644 --- a/cf/aggregate.py +++ b/cf/aggregate.py @@ -426,6 +426,7 @@ def __init__( # Promote selected properties to field ancillaries that span # the same domain axes as the field # ------------------------------------------------------------ + self.promoted_field_ancillaries = [] if field_ancillaries: f = self.promote_to_field_ancillary(field_ancillaries) @@ -2088,13 +2089,6 @@ def promote_to_field_ancillary(self, properties): ancillary construct that spans the entire domain, with the constant value of the property. - The `Data` of any new field ancillary construct is marked - as a CFA term, meaning that it will only be written to disk if - the parent field construct is written as a CFA aggregation - variable, and in that case the field ancillary is written as a - non-standard CFA aggregation instruction variable, rather than - a CF-netCDF ancillary variable. - If a domain construct is being aggregated then it is always returned unchanged. @@ -2125,7 +2119,6 @@ def promote_to_field_ancillary(self, properties): data = Data( FullArray(value, shape=f.shape, dtype=np.array(value).dtype) ) - data._cfa_set_term(True) field_anc = FieldAncillary( data=data, properties={"long_name": prop}, copy=False @@ -2137,9 +2130,15 @@ def promote_to_field_ancillary(self, properties): f = f.copy() copy = False - f.set_construct(field_anc, axes=f.get_data_axes(), copy=False) + key = f.set_construct( + field_anc, axes=f.get_data_axes(), copy=False + ) f.del_property(prop) + # Record that this field ancillary is derived from a + # promotion + self.promoted_field_ancillaries.append(key) + self.field = f return f @@ -2434,9 +2433,9 @@ def aggregate( Create new field ancillary constructs for each input field which has one or more of the given properties. For each input field, each property is converted to a field - ancillary construct that spans the entire domain, with the - constant value of the property, and the property itself is - deleted. + ancillary construct that spans the aggregation axes with + the constant value of the property, and the property + itself is deleted. .. versionadded:: 3.15.0 @@ -3039,6 +3038,9 @@ def aggregate( unaggregatable = False + # Record the names of the axes that are actually aggregated + axes_aggregated = [] + for axis in aggregating_axes: number_of_fields = len(meta) if number_of_fields == 1: @@ -3251,6 +3253,7 @@ def aggregate( # the aggregated fields as a single list ready for # aggregation along the next axis. # -------------------------------------------------------- + axes_aggregated.append(axis) meta = [m for gm in grouped_meta for m in gm] # Add fields to the output list @@ -3267,6 +3270,10 @@ def aggregate( if cells: _set_cell_conditions(output_meta) + # Remove non-aggregated axes from promoted field ancillaries + if field_ancillaries: + _fix_promoted_field_ancillaries(output_meta, axes_aggregated) + output_constructs = [m.field for m in output_meta] aggregate.status = status @@ -4724,6 +4731,14 @@ def _aggregate_2_fields( hash_value1 = anc1["hash_value"] anc0["hash_value"] = hash_value0 + hash_value1 + # The result of aggregating a promoted amd non-promoted + # field ancillary is a non-promoted field ancillary + if ( + key0 in m0.promoted_field_ancillaries + and key1 not in m1.promoted_field_ancillaries + ): + m0.promoted_field_ancillaries.remove(key0) + # Domain ancillaries for identity in m0.domain_anc: anc0 = m0.domain_anc[identity] @@ -4745,9 +4760,9 @@ def _aggregate_2_fields( anc0["hash_value"] = hash_value0 + hash_value1 # ---------------------------------------------------------------- - # For each matching pair of coordinates, cell measures, field and - # domain ancillaries which span the aggregating axis, insert the - # one from parent1 into the one from parent0 + # For each matching pair of coordinates, cell measures, and field + # and domain ancillaries which span the aggregating axis, insert + # the one from parent1 into the one from parent0 # ---------------------------------------------------------------- for key0, key1, construct0, construct1 in spanning_variables: construct_axes0 = parent0.get_data_axes(key0) @@ -4909,7 +4924,7 @@ def _aggregate_2_fields( actual_range = parent0.del_property("actual_range", None) if actual_range is not None and is_log_level_info(logger): logger.info( - "Deleted 'actual_range' attribute due to being " + "Deleted 'actual_range' attribute due to it being " "outside of 'valid_range' attribute limits." ) @@ -4919,7 +4934,6 @@ def _aggregate_2_fields( # Make a note that the parent construct in this _Meta object has # already been aggregated - m0.aggregated_field = True # ---------------------------------------------------------------- @@ -4986,3 +5000,53 @@ def dsg_feature_type_axis(meta, axis): # cf_role property cf_role = coords["cf_role"] return cf_role.count(None) != len(cf_role) + + +def _fix_promoted_field_ancillaries(output_meta, axes_aggregated): + """Remove non-aggregated axes from promoted field ancillaries. + + .. versionadded:: NEXTVERSION + + :Parameters: + + output_meta: `list` + The list of `_Meta` objects. If any include promoted field + ancillaries then these will be updated in-place. + + :Returns: + + `None` + + """ + for m in output_meta: + for value in m.field_anc.values(): + index = [] + squeeze = [] + + key = value["key"] + if key not in m.promoted_field_ancillaries: + continue + + # Remove the non-aggregated axes from the promoted field + # ancillary + for i, axis in enumerate(value["axes"]): + if axis in axes_aggregated: + index.append(slice(None)) + else: + index.append(0) + squeeze.append(i) + + if not squeeze: + continue + + fa_axes = m.field.get_data_axes(key) + fa = m.field.del_construct(key) + fa = fa[tuple(index)] + fa.squeeze(squeeze, inplace=True) + fa_axes = [a for i, a in enumerate(fa_axes) if i not in squeeze] + + # Record the field ancillary as being able to be written + # as a CF-netCDF aggregation 'value' variable + fa.data._nc_set_aggregation_fragment_type("value") + + m.field.set_construct(fa, axes=fa_axes, copy=False) diff --git a/cf/cfimplementation.py b/cf/cfimplementation.py index 468f2e10db..3de62364df 100644 --- a/cf/cfimplementation.py +++ b/cf/cfimplementation.py @@ -27,10 +27,9 @@ ) from .data import Data from .data.array import ( + AggregatedArray, BoundsFromNodesArray, CellConnectivityArray, - CFAH5netcdfArray, - CFANetCDF4Array, GatheredArray, H5netcdfArray, NetCDF4Array, @@ -114,49 +113,14 @@ def set_construct(self, parent, construct, axes=None, copy=True, **kwargs): parent, construct, axes=axes, copy=copy, **kwargs ) - def initialise_CFANetCDF4Array(self, **kwargs): - """Return a `CFANetCDF4Array` instance. - - :Parameters: - - kwargs: optional - Initialisation parameters to pass to the new instance. - - :Returns: - - `CFANetCDF4Array` - - """ - cls = self.get_class("CFANetCDF4Array") - return cls(**kwargs) - - def initialise_CFAH5netcdfArray(self, **kwargs): - """Return a `CFAH5netcdfArray` instance. - - .. versionadded:: 1.11.2.0 - - :Parameters: - - kwargs: optional - Initialisation parameters to pass to the new instance. - - :Returns: - - `CFAH5netcdfArray` - - """ - cls = self.get_class("CFAH5netcdfArray") - return cls(**kwargs) - _implementation = CFImplementation( cf_version=CF(), + AggregatedArray=AggregatedArray, AuxiliaryCoordinate=AuxiliaryCoordinate, CellConnectivity=CellConnectivity, CellMeasure=CellMeasure, CellMethod=CellMethod, - CFAH5netcdfArray=CFAH5netcdfArray, - CFANetCDF4Array=CFANetCDF4Array, CoordinateReference=CoordinateReference, DimensionCoordinate=DimensionCoordinate, Domain=Domain, @@ -214,8 +178,6 @@ def implementation(): 'CellConnectivityArray': cf.data.array.cellconnectivityarray.CellConnectivityArray, 'CellMeasure': cf.cellmeasure.CellMeasure, 'CellMethod': cf.cellmethod.CellMethod, - 'CFAH5netcdfArray': cf.data.array.cfah5netcdfarray.CFAH5netcdfArray, - 'CFANetCDF4Array': cf.data.array.cfanetcdf4array.CFANetCDF4Array, 'CoordinateReference': cf.coordinatereference.CoordinateReference, 'DimensionCoordinate': cf.dimensioncoordinate.DimensionCoordinate, 'Domain': cf.domain.Domain, diff --git a/cf/data/array/__init__.py b/cf/data/array/__init__.py index cd2c53766b..693fec0fb4 100644 --- a/cf/data/array/__init__.py +++ b/cf/data/array/__init__.py @@ -1,7 +1,6 @@ +from .aggregatedarray import AggregatedArray from .boundsfromnodesarray import BoundsFromNodesArray from .cellconnectivityarray import CellConnectivityArray -from .cfah5netcdfarray import CFAH5netcdfArray -from .cfanetcdf4array import CFANetCDF4Array from .fullarray import FullArray from .gatheredarray import GatheredArray from .h5netcdfarray import H5netcdfArray diff --git a/cf/data/array/abstract/__init__.py b/cf/data/array/abstract/__init__.py index ed8033a8e0..1dd4744403 100644 --- a/cf/data/array/abstract/__init__.py +++ b/cf/data/array/abstract/__init__.py @@ -1,2 +1 @@ from .array import Array -from .filearray import FileArray diff --git a/cf/data/array/abstract/array.py b/cf/data/array/abstract/array.py index c70931d797..307fa4a421 100644 --- a/cf/data/array/abstract/array.py +++ b/cf/data/array/abstract/array.py @@ -1,10 +1,9 @@ import cfdm from ....mixin_container import Container -from ..mixin import ArrayMixin -class Array(ArrayMixin, Container, cfdm.Array): +class Array(Container, cfdm.Array): """Abstract base class for a container of an underlying array. The form of the array is defined by the initialisation parameters diff --git a/cf/data/array/abstract/filearray.py b/cf/data/array/abstract/filearray.py deleted file mode 100644 index 750a7f8f31..0000000000 --- a/cf/data/array/abstract/filearray.py +++ /dev/null @@ -1,80 +0,0 @@ -from ....functions import _DEPRECATION_ERROR_ATTRIBUTE -from ..mixin import FileArrayMixin -from .array import Array - - -class FileArray(FileArrayMixin, Array): - """Abstract base class for an array stored in a file.""" - - def __getitem__(self, indices): - """Return a subspace of the array. - - x.__getitem__(indices) <==> x[indices] - - Returns a subspace of the array as an independent numpy array. - - """ - raise NotImplementedError( - f"Must implement {self.__class__.__name__}.__getitem__" - ) # pragma: no cover - - def __repr__(self): - """x.__repr__() <==> repr(x)""" - return f"" - - def __str__(self): - """x.__str__() <==> str(x)""" - return f"{self.get_filename()}, {self.get_address()}" - - @property - def dtype(self): - """Data-type of the array.""" - return self._get_component("dtype") - - @property - def filename(self): - """The name of the file containing the array. - - Deprecated at version 3.14.0. Use method `get_filename` instead. - - """ - _DEPRECATION_ERROR_ATTRIBUTE( - self, - "filename", - message="Use method 'get_filename' instead.", - version="3.14.0", - removed_at="5.0.0", - ) # pragma: no cover - - @property - def shape(self): - """Shape of the array.""" - return self._get_component("shape") - - def close(self): - """Close the dataset containing the data.""" - raise NotImplementedError( - f"Must implement {self.__class__.__name__}.close" - ) # pragma: no cover - - def get_address(self): - """The address in the file of the variable. - - .. versionadded:: 3.14.0 - - :Returns: - - `str` or `None` - The address, or `None` if there isn't one. - - """ - raise NotImplementedError( - f"Must implement {self.__class__.__name__}.get_address " - "in subclasses" - ) # pragma: no cover - - def open(self): - """Returns an open dataset containing the data array.""" - raise NotImplementedError( - f"Must implement {self.__class__.__name__}.open" - ) # pragma: no cover diff --git a/cf/data/array/aggregatedarray.py b/cf/data/array/aggregatedarray.py new file mode 100644 index 0000000000..3fb41d46e4 --- /dev/null +++ b/cf/data/array/aggregatedarray.py @@ -0,0 +1,23 @@ +import cfdm + +from ...mixin_container import Container +from ..fragment import FragmentFileArray + + +class AggregatedArray(Container, cfdm.AggregatedArray): + """An array stored in a CF aggregation variable. + + .. versionadded:: NEXTVERSION + + """ + + def __new__(cls, *args, **kwargs): + """Store fragment array classes. + + .. versionadded:: NEXTVERSION + + """ + # Override the inherited FragmentFileArray class + instance = super().__new__(cls) + instance._FragmentArray["location"] = FragmentFileArray + return instance diff --git a/cf/data/array/boundsfromnodesarray.py b/cf/data/array/boundsfromnodesarray.py index e65177fcd1..6eb0952fa3 100644 --- a/cf/data/array/boundsfromnodesarray.py +++ b/cf/data/array/boundsfromnodesarray.py @@ -1,23 +1,10 @@ import cfdm from ...mixin_container import Container -from .mixin import ArrayMixin, CompressedArrayMixin class BoundsFromNodesArray( - CompressedArrayMixin, - ArrayMixin, Container, cfdm.BoundsFromNodesArray, ): - """An array of cell bounds defined by UGRID node coordinates. - - The UGRID node coordinates contain the locations of the nodes of - the domain topology. In UGRID, the bounds of edge, face and volume - cells may be defined by these locations in conjunction with a - mapping from each cell boundary vertex to its corresponding - coordinate value. - - .. versionadded:: 3.16.0 - - """ + pass diff --git a/cf/data/array/cellconnectivityarray.py b/cf/data/array/cellconnectivityarray.py index 6f631176d8..f7585aed9a 100644 --- a/cf/data/array/cellconnectivityarray.py +++ b/cf/data/array/cellconnectivityarray.py @@ -1,25 +1,10 @@ import cfdm from ...mixin_container import Container -from .mixin import ArrayMixin, CompressedArrayMixin class CellConnectivityArray( - CompressedArrayMixin, - ArrayMixin, Container, cfdm.CellConnectivityArray, ): - """A connectivity array derived from a UGRID connectivity variable. - - A UGRID connectivity variable contains indices which map each cell - to its neighbours, as found in a UGRID "face_face_connectivity" or - "volume_volume_connectivity" variable. - - The connectivity array has one more column than the corresponding - UGRID variable. The extra column, in the first position, contains - the identifier for each cell. - - .. versionadded:: 3.16.0 - - """ + pass diff --git a/cf/data/array/cfah5netcdfarray.py b/cf/data/array/cfah5netcdfarray.py deleted file mode 100644 index 6e73d84bd9..0000000000 --- a/cf/data/array/cfah5netcdfarray.py +++ /dev/null @@ -1,10 +0,0 @@ -from .h5netcdfarray import H5netcdfArray -from .mixin import CFAMixin - - -class CFAH5netcdfArray(CFAMixin, H5netcdfArray): - """A CFA-netCDF array accessed with `h5netcdf` - - .. versionadded:: 1.11.2.0 - - """ diff --git a/cf/data/array/cfanetcdf4array.py b/cf/data/array/cfanetcdf4array.py deleted file mode 100644 index 475bb5fb28..0000000000 --- a/cf/data/array/cfanetcdf4array.py +++ /dev/null @@ -1,10 +0,0 @@ -from .mixin import CFAMixin -from .netcdf4array import NetCDF4Array - - -class CFANetCDF4Array(CFAMixin, NetCDF4Array): - """A CFA-netCDF array accessed with `netCDF4`. - - .. versionadded:: 1.11.2.0 - - """ diff --git a/cf/data/array/fullarray.py b/cf/data/array/fullarray.py index 81cca452a1..b7b4ff19ee 100644 --- a/cf/data/array/fullarray.py +++ b/cf/data/array/fullarray.py @@ -1,13 +1,9 @@ -import numpy as np -from cfdm.data.mixin import IndexMixin +import cfdm -from ...functions import indices_shape, parse_indices -from .abstract import Array +from ...mixin_container import Container -_FULLARRAY_HANDLED_FUNCTIONS = {} - -class FullArray(IndexMixin, Array): +class FullArray(Container, cfdm.FullArray): """A array filled with a given value. The array may be empty or all missing values. @@ -15,248 +11,3 @@ class FullArray(IndexMixin, Array): .. versionadded:: 3.14.0 """ - - def __init__( - self, - fill_value=None, - dtype=None, - shape=None, - attributes=None, - source=None, - copy=True, - ): - """**Initialisation** - - :Parameters: - - fill_value : scalar, optional - The fill value for the array. May be set to - `cf.masked` or `np.ma.masked`. - - dtype: `numpy.dtype` - The data type of the array. - - shape: `tuple` - The array dimension sizes. - - {{init attributes: `dict` or `None`, optional}} - - .. versionadded:: 1.11.2.0 - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - units: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - calendar: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - """ - super().__init__(source=source, copy=copy) - - if source is not None: - try: - fill_value = source._get_component("full_value", None) - except AttributeError: - fill_value = None - - try: - dtype = source._get_component("dtype", None) - except AttributeError: - dtype = None - - try: - shape = source._get_component("shape", None) - except AttributeError: - shape = None - - try: - attributes = source._get_component("attributes", False) - except AttributeError: - attributes = None - - self._set_component("full_value", fill_value, copy=False) - self._set_component("dtype", dtype, copy=False) - self._set_component("shape", shape, copy=False) - self._set_component("attributes", attributes, copy=False) - - def __array_function__(self, func, types, args, kwargs): - """The `numpy` `__array_function__` protocol. - - .. versionadded:: 3.15.0 - - """ - if func not in _FULLARRAY_HANDLED_FUNCTIONS: - return NotImplemented - - # Note: This allows subclasses that don't override - # __array_function__ to handle FullArray objects - if not all(issubclass(t, self.__class__) for t in types): - return NotImplemented - - return _FULLARRAY_HANDLED_FUNCTIONS[func](*args, **kwargs) - - def __repr__(self): - """Called by the `repr` built-in function. - - x.__repr__() <==> repr(x) - - """ - return f"" - - def __str__(self): - """Called by the `str` built-in function. - - x.__str__() <==> str(x) - - """ - fill_value = self.get_full_value() - if fill_value is None: - return "Uninitialised" - - return f"Filled with {fill_value!r}" - - def _get_array(self, index=None): - """Returns the full array. - - .. versionadded:: 1.11.2.0 - - .. seealso:: `__array__`, `index` - - :Parameters: - - {{index: `tuple` or `None`, optional}} - - :Returns: - - `numpy.ndarray` - The subspace. - - """ - if index is None: - shape = self.shape - else: - original_shape = self.original_shape - index = parse_indices(original_shape, index, keepdims=False) - shape = indices_shape(index, original_shape, keepdims=False) - - fill_value = self.get_full_value() - if fill_value is np.ma.masked: - array = np.ma.masked_all(shape, dtype=self.dtype) - elif fill_value is not None: - array = np.full(shape, fill_value=fill_value, dtype=self.dtype) - else: - array = np.empty(shape, dtype=self.dtype) - - return array - - @property - def array(self): - """Return an independent numpy array containing the data. - - .. versionadded:: 1.11.2.0 - - :Returns: - - `numpy.ndarray` - An independent numpy array of the data. - """ - return np.asanyarray(self) - - @property - def dtype(self): - """Data-type of the data elements.""" - return self._get_component("dtype") - - @property - def shape(self): - """Tuple of array dimension sizes.""" - return self._get_component("shape") - - def get_full_value(self, default=AttributeError()): - """Return the data array fill value. - - .. versionadded:: 3.14.0 - - .. seealso:: `set_full_value` - - :Parameters: - - default: optional - Return the value of the *default* parameter if the - fill value has not been set. If set to an `Exception` - instance then it will be raised instead. - - :Returns: - - The fill value. - - """ - return self._get_component("full_value", default=default) - - def set_full_value(self, fill_value): - """Set the data array fill value. - - .. versionadded:: 3.14.0 - - .. seealso:: `get_full_value` - - :Parameters: - - fill_value : scalar, optional - The fill value for the array. May be set to - `cf.masked` or `np.ma.masked`. - - :Returns: - - `None` - - """ - self._set_component("full_value", fill_value, copy=False) - - -def fullarray_implements(numpy_function): - """Register an __array_function__ implementation for FullArray objects. - - .. versionadded:: 3.15.0 - - """ - - def decorator(func): - _FULLARRAY_HANDLED_FUNCTIONS[numpy_function] = func - return func - - return decorator - - -@fullarray_implements(np.unique) -def unique( - a, return_index=False, return_inverse=False, return_counts=False, axis=None -): - """Version of `np.unique` that is optimised for `FullArray` objects. - - .. versionadded:: 3.15.0 - - """ - if return_index or return_inverse or return_counts or axis is not None: - # Fall back to the slow unique. (I'm sure we could probably do - # something more clever here, but there is no use case at - # present.) - return np.unique( - a[...], - return_index=return_index, - return_inverse=return_inverse, - return_counts=return_counts, - axis=axis, - ) - - # Fast unique based on the full value - x = a.get_full_value() - if x is np.ma.masked: - return np.ma.masked_all((1,), dtype=a.dtype) - - return np.array([x], dtype=a.dtype) diff --git a/cf/data/array/gatheredarray.py b/cf/data/array/gatheredarray.py index 607b1fb0b7..159e4da991 100644 --- a/cf/data/array/gatheredarray.py +++ b/cf/data/array/gatheredarray.py @@ -1,23 +1,7 @@ import cfdm from ...mixin_container import Container -from .mixin import ArrayMixin, CompressedArrayMixin -class GatheredArray( - CompressedArrayMixin, ArrayMixin, Container, cfdm.GatheredArray -): - """An underlying gathered array. - - Compression by gathering combines axes of a multidimensional array - into a new, discrete axis whilst omitting the missing values and - thus reducing the number of values that need to be stored. - - The information needed to uncompress the data is stored in a "list - variable" that gives the indices of the required points. - - See CF section 8.2. "Lossless Compression by Gathering". - - .. versionadded:: 3.0.0 - - """ +class GatheredArray(Container, cfdm.GatheredArray): + pass diff --git a/cf/data/array/h5netcdfarray.py b/cf/data/array/h5netcdfarray.py index 14151436e3..8b7d7e8685 100644 --- a/cf/data/array/h5netcdfarray.py +++ b/cf/data/array/h5netcdfarray.py @@ -1,23 +1,16 @@ import cfdm from ...mixin_container import Container -from .mixin import ActiveStorageMixin, ArrayMixin, FileArrayMixin +from .mixin import ActiveStorageMixin class H5netcdfArray( ActiveStorageMixin, - FileArrayMixin, - ArrayMixin, Container, cfdm.H5netcdfArray, ): """A netCDF array accessed with `h5netcdf`. - **Active storage reductions** - - An active storage reduction may be enabled with the `actify` - method. See `cf.data.collapse.Collapse` for details. - - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 """ diff --git a/cf/data/array/mixin/__init__.py b/cf/data/array/mixin/__init__.py index af036620cf..309087cfeb 100644 --- a/cf/data/array/mixin/__init__.py +++ b/cf/data/array/mixin/__init__.py @@ -1,5 +1 @@ from .activestoragemixin import ActiveStorageMixin -from .arraymixin import ArrayMixin -from .cfamixin import CFAMixin -from .compressedarraymixin import CompressedArrayMixin -from .filearraymixin import FileArrayMixin diff --git a/cf/data/array/mixin/activestoragemixin.py b/cf/data/array/mixin/activestoragemixin.py index 5666338871..eaf8e5bb99 100644 --- a/cf/data/array/mixin/activestoragemixin.py +++ b/cf/data/array/mixin/activestoragemixin.py @@ -1,7 +1,7 @@ class ActiveStorageMixin: """Mixin class for enabling active storage operations. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 """ @@ -12,7 +12,7 @@ def active_storage(self): Currently, active storage operations are allowed unless the data are numerically packed. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 :Returns: diff --git a/cf/data/array/mixin/arraymixin.py b/cf/data/array/mixin/arraymixin.py deleted file mode 100644 index 4f83fa0d98..0000000000 --- a/cf/data/array/mixin/arraymixin.py +++ /dev/null @@ -1,44 +0,0 @@ -import numpy as np - -from ....units import Units - - -class ArrayMixin: - """Mixin class for a container of an array. - - .. versionadded:: 3.14.0 - - """ - - def __array_function__(self, func, types, args, kwargs): - """Implement the `numpy` ``__array_function__`` protocol. - - .. versionadded:: 3.14.0 - - """ - return NotImplemented - - @property - def _meta(self): - """Normalise the array to an appropriate Dask meta object. - - The Dask meta can be thought of as a suggestion to Dask. Dask - uses this meta to generate the task graph until it can infer - the actual metadata from the values. It does not force the - output to have the structure or dtype of the specified meta. - - .. versionadded:: 1.11.2.0 - - .. seealso:: `dask.utils.meta_from_array` - - """ - return np.array((), dtype=self.dtype) - - @property - def Units(self): - """The `cf.Units` object containing the units of the array. - - .. versionadded:: 3.14.0 - - """ - return Units(self.get_units(None), self.get_calendar(None)) diff --git a/cf/data/array/mixin/cfamixin.py b/cf/data/array/mixin/cfamixin.py deleted file mode 100644 index ecb1f82c2e..0000000000 --- a/cf/data/array/mixin/cfamixin.py +++ /dev/null @@ -1,858 +0,0 @@ -from copy import deepcopy -from functools import partial -from itertools import accumulate, product - -import numpy as np -from cfdm.data.utils import chunk_locations, chunk_positions - - -class CFAMixin: - """Mixin class for a CFA array. - - .. versionadded:: 1.11.2.0 - - """ - - def __new__(cls, *args, **kwargs): - """Store fragment array classes. - - .. versionadded:: 1.11.2.0 - - """ - # Import fragment array classes. Do this here (as opposed to - # outside the class) to avoid a circular import. - from ...fragment import ( - FullFragmentArray, - NetCDFFragmentArray, - UMFragmentArray, - ) - - instance = super().__new__(cls) - instance._FragmentArray = { - "nc": NetCDFFragmentArray, - "um": UMFragmentArray, - "full": FullFragmentArray, - } - return instance - - def __init__( - self, - filename=None, - address=None, - dtype=None, - mask=True, - unpack=True, - instructions=None, - substitutions=None, - term=None, - attributes=None, - storage_options=None, - source=None, - copy=True, - x=None, - ): - """**Initialisation** - - :Parameters: - - filename: (sequence of) `str`, optional - The name of the CFA file containing the array. If a - sequence then it must contain one element. - - address: (sequence of) `str`, optional - The name of the CFA aggregation variable for the - array. If a sequence then it must contain one element. - - dtype: `numpy.dtype` - The data type of the aggregated data array. May be - `None` if the numpy data-type is not known (which can - be the case for some string types, for example). - - mask: `bool` - If True (the default) then mask by convention when - reading data from disk. - - A array is masked depending on the values of any of - the variable attributes ``valid_min``, ``valid_max``, - ``valid_range``, ``_FillValue`` and ``missing_value``. - - {{init unpack: `bool`, optional}} - - .. versionadded:: 1.11.2.0 - - instructions: `str`, optional - The ``aggregated_data`` attribute value as found on - the CFA variable. If set then this will be used to - improve the performance of `__dask_tokenize__`. - - substitutions: `dict`, optional - A dictionary whose key/value pairs define text - substitutions to be applied to the fragment file - names. Each key must be specified with the ``${...}`` - syntax, for instance ``{'${base}': 'sub'}``. - - .. versionadded:: 3.15.0 - - term: `str`, optional - The name of a non-standard aggregation instruction - term from which the array is to be created, instead of - creating the aggregated data in the standard terms. If - set then *address* must be the name of the term's - aggregation instruction variable, which must be - defined on the fragment dimensions and no others. Each - value of the aggregation instruction variable will be - broadcast across the shape of the corresponding - fragment. - - *Parameter example:* - ``address='cfa_tracking_id', term='tracking_id'`` - - .. versionadded:: 3.15.0 - - storage_options: `dict` or `None`, optional - Key/value pairs to be passed on to the creation of - `s3fs.S3FileSystem` file systems to control the - opening of fragment files in S3 object stores. Ignored - for files not in an S3 object store, i.e. those whose - names do not start with ``s3:``. - - By default, or if `None`, then *storage_options* is - taken as ``{}``. - - If the ``'endpoint_url'`` key is not in - *storage_options* or is not in a dictionary defined by - the ``'client_kwargs`` key (which is always the case - when *storage_options* is `None`), then one will be - automatically inserted for accessing a fragment S3 - file. For example, for a file name of - ``'s3://store/data/file.nc'``, an ``'endpoint_url'`` - key with value ``'https://store'`` would be created. - - *Parameter example:* - ``{'key: 'scaleway-api-key...', 'secret': - 'scaleway-secretkey...', 'endpoint_url': - 'https://s3.fr-par.scw.cloud', 'client_kwargs': - {'region_name': 'fr-par'}}`` - - .. versionadded:: 1.11.2.0 - - {{init attributes: `dict` or `None`, optional}} - - If *attributes* is `None`, the default, then the - attributes will be set from the netCDF variable during - the first `__getitem__` call. - - .. versionaddedd:: 1.11.2.0 - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - units: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - calendar: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - """ - if source is not None: - super().__init__(source=source, copy=copy) - - try: - fragment_shape = source.get_fragment_shape() - except AttributeError: - fragment_shape = None - - try: - instructions = source._get_component("instructions") - except AttributeError: - instructions = None - - try: - aggregated_data = source.get_aggregated_data(copy=False) - except AttributeError: - aggregated_data = {} - - try: - substitutions = source.get_substitutions() - except AttributeError: - substitutions = None - - try: - term = source.get_term() - except AttributeError: - term = None - - elif filename is not None: - shape, fragment_shape, aggregated_data = self._parse_cfa( - x, term, substitutions - ) - super().__init__( - filename=filename, - address=address, - shape=shape, - dtype=dtype, - mask=mask, - attributes=attributes, - copy=copy, - ) - else: - super().__init__( - filename=filename, - address=address, - dtype=dtype, - mask=mask, - attributes=attributes, - copy=copy, - ) - - fragment_shape = None - aggregated_data = None - instructions = None - term = None - - self._set_component("fragment_shape", fragment_shape, copy=False) - self._set_component("aggregated_data", aggregated_data, copy=False) - self._set_component("instructions", instructions, copy=False) - self._set_component("term", term, copy=False) - - if substitutions is not None: - self._set_component( - "substitutions", substitutions.copy(), copy=False - ) - - def _parse_cfa(self, x, term, substitutions): - """Parse the CFA aggregation instructions. - - .. versionadded:: 1.11.2.0 - - :Parameters: - - x: `dict` - - term: `str` or `None` - The name of a non-standard aggregation instruction - term from which the array is to be created, instead of - creating the aggregated data in the standard - terms. Each value of the aggregation instruction - variable will be broadcast across the shape of the - corresponding fragment. - - substitutions: `dict` or `None` - A dictionary whose key/value pairs define text - substitutions to be applied to the fragment file - names. Each key must be specified with the ``${...}`` - syntax, for instance ``{'${base}': 'sub'}``. - - :Returns: - - 3-`tuple` - 1. The shape of the aggregated data. - 2. The shape of the array of fragments. - 3. The parsed aggregation instructions. - - """ - aggregated_data = {} - - location = x["location"] - ndim = location.shape[0] - compressed = np.ma.compressed - chunks = [compressed(i).tolist() for i in location] - shape = [sum(c) for c in chunks] - positions = chunk_positions(chunks) - locations = chunk_locations(chunks) - - if term is not None: - # -------------------------------------------------------- - # Each fragment contains a constant value, not file - # locations. - # -------------------------------------------------------- - term = x[term] - fragment_shape = term.shape - aggregated_data = { - frag_loc: { - "location": loc, - "fill_value": term[frag_loc].item(), - "format": "full", - } - for frag_loc, loc in zip(positions, locations) - } - else: - # -------------------------------------------------------- - # Each fragment contains file locations - # -------------------------------------------------------- - a = x["address"] - f = x["file"] - file_fmt = x["format"] - - extra_dimension = f.ndim > ndim - if extra_dimension: - # There is an extra non-fragment dimension - fragment_shape = f.shape[:-1] - else: - fragment_shape = f.shape - - if not a.ndim: - a = (a.item(),) - scalar_address = True - else: - scalar_address = False - - if not file_fmt.ndim: - file_fmt = file_fmt.item() - scalar_fmt = True - else: - scalar_fmt = False - - for frag_loc, location in zip(positions, locations): - if extra_dimension: - filename = compressed(f[frag_loc]).tolist() - if scalar_address: - address = a * len(filename) - else: - address = compressed(a[frag_loc].tolist()) - - if scalar_fmt: - fmt = file_fmt - else: - fmt = compressed(file_fmt[frag_loc]).tolist() - else: - filename = (f[frag_loc].item(),) - if scalar_address: - address = a - else: - address = (a[frag_loc].item(),) - - if scalar_fmt: - fmt = file_fmt - else: - fmt = file_fmt[frag_loc].item() - - aggregated_data[frag_loc] = { - "location": location, - "filename": filename, - "address": address, - "format": fmt, - } - - # Apply string substitutions to the fragment filenames - if substitutions: - for value in aggregated_data.values(): - filenames2 = [] - for filename in value["filename"]: - for base, sub in substitutions.items(): - filename = filename.replace(base, sub) - - filenames2.append(filename) - - value["filename"] = filenames2 - - return shape, fragment_shape, aggregated_data - - def __dask_tokenize__(self): - """Used by `dask.base.tokenize`. - - .. versionadded:: 3.14.0 - - """ - out = super().__dask_tokenize__() - aggregated_data = self._get_component("instructions", None) - if aggregated_data is None: - aggregated_data = self.get_aggregated_data(copy=False) - - return out + (aggregated_data,) - - def __getitem__(self, indices): - """x.__getitem__(indices) <==> x[indices]""" - return NotImplemented # pragma: no cover - - def get_aggregated_data(self, copy=True): - """Get the aggregation data dictionary. - - The aggregation data dictionary contains the definitions of - the fragments and the instructions on how to aggregate them. - The keys are indices of the CFA fragment dimensions, - e.g. ``(1, 0, 0 ,0)``. - - .. versionadded:: 3.14.0 - - :Parameters: - - copy: `bool`, optional - Whether or not to return a copy of the aggregation - dictionary. By default a deep copy is returned. - - .. warning:: If False then changing the returned - dictionary in-place will change the - aggregation dictionary stored in the - {{class}} instance, **as well as in any - copies of it**. - - :Returns: - - `dict` - The aggregation data dictionary. - - **Examples** - - >>> a.shape - (12, 1, 73, 144) - >>> a.get_fragment_shape() - (2, 1, 1, 1) - >>> a.get_aggregated_data() - {(0, 0, 0, 0): { - 'file': ('January-June.nc',), - 'address': ('temp',), - 'format': 'nc', - 'location': [(0, 6), (0, 1), (0, 73), (0, 144)]}, - (1, 0, 0, 0): { - 'file': ('July-December.nc',), - 'address': ('temp',), - 'format': 'nc', - 'location': [(6, 12), (0, 1), (0, 73), (0, 144)]}} - - """ - aggregated_data = self._get_component("aggregated_data") - if copy: - aggregated_data = deepcopy(aggregated_data) - - return aggregated_data - - def get_fragmented_dimensions(self): - """Get the positions of dimensions that have two or more fragments. - - .. versionadded:: 3.14.0 - - :Returns: - - `list` - The dimension positions. - - **Examples** - - >>> a.get_fragment_shape() - (20, 1, 40, 1) - >>> a.get_fragmented_dimensions() - [0, 2] - - >>> a.get_fragment_shape() - (1, 1, 1) - >>> a.get_fragmented_dimensions() - [] - - """ - return [ - i for i, size in enumerate(self.get_fragment_shape()) if size > 1 - ] - - def get_fragment_shape(self): - """Get the sizes of the fragment dimensions. - - The fragment dimension sizes are given in the same order as - the aggregated dimension sizes given by `shape`. - - .. versionadded:: 3.14.0 - - :Returns: - - `tuple` - The shape of the fragment dimensions. - - """ - return self._get_component("fragment_shape") - - def get_storage_options(self): - """Return `s3fs.S3FileSystem` options for accessing S3 fragment files. - - .. versionadded:: 1.11.2.0 - - :Returns: - - `dict` or `None` - The `s3fs.S3FileSystem` options. - - **Examples** - - >>> f.get_storage_options() - {} - - >>> f.get_storage_options() - {'anon': True} - - >>> f.get_storage_options() - {'key: 'scaleway-api-key...', - 'secret': 'scaleway-secretkey...', - 'endpoint_url': 'https://s3.fr-par.scw.cloud', - 'client_kwargs': {'region_name': 'fr-par'}} - - """ - return super().get_storage_options(create_endpoint_url=False) - - def get_term(self, default=ValueError()): - """The CFA aggregation instruction term for the data, if set. - - .. versionadded:: 3.15.0 - - :Parameters: - - default: optional - Return the value of the *default* parameter if the - term has not been set. If set to an `Exception` - instance then it will be raised instead. - - :Returns: - - `str` - The CFA aggregation instruction term name. - - """ - return self._get_component("term", default=default) - - def subarray_shapes(self, shapes): - """Create the subarray shapes. - - A fragmented dimension (i.e. one spanned by two or more - fragments) will always have a subarray size equal to the - size of each of its fragments, overriding any other size - implied by the *shapes* parameter. - - .. versionadded:: 3.14.0 - - .. seealso:: `subarrays` - - :Parameters: - - shapes: `int`, sequence, `dict` or `str`, optional - Define the subarray shapes. - - Any value accepted by the *chunks* parameter of the - `dask.array.from_array` function is allowed. - - The subarray sizes implied by *chunks* for a dimension - that has been fragmented are ignored, so their - specification is arbitrary. - - :Returns: - - `tuple` - The subarray sizes along each dimension. - - **Examples** - - >>> a.shape - (12, 1, 73, 144) - >>> a.get_fragment_shape() - (2, 1, 1, 1) - >>> a.fragmented_dimensions() - [0] - >>> a.subarray_shapes(-1) - ((6, 6), (1,), (73,), (144,)) - >>> a.subarray_shapes(None) - ((6, 6), (1,), (73,), (144,)) - >>> a.subarray_shapes("auto") - ((6, 6), (1,), (73,), (144,)) - >>> a.subarray_shapes((None, 1, 40, 50)) - ((6, 6), (1,), (40, 33), (50, 50, 44)) - >>> a.subarray_shapes((None, None, "auto", 50)) - ((6, 6), (1,), (73,), (50, 50, 44)) - >>> a.subarray_shapes({2: 40}) - ((6, 6), (1,), (40, 33), (144,)) - - """ - from numbers import Number - - from dask.array.core import normalize_chunks - - # Positions of fragmented dimensions (i.e. those spanned by - # two or more fragments) - f_dims = self.get_fragmented_dimensions() - - shape = self.shape - aggregated_data = self.get_aggregated_data(copy=False) - - # Create the base chunks. - chunks = [] - ndim = self.ndim - for dim, (n_fragments, size) in enumerate( - zip(self.get_fragment_shape(), self.shape) - ): - if dim in f_dims: - # This aggregated dimension is spanned by two or more - # fragments => set the chunks to be the same size as - # each fragment. - c = [] - index = [0] * ndim - for j in range(n_fragments): - index[dim] = j - loc = aggregated_data[tuple(index)]["location"][dim] - chunk_size = loc[1] - loc[0] - c.append(chunk_size) - - chunks.append(tuple(c)) - else: - # This aggregated dimension is spanned by exactly one - # fragment => store `None` for now. This will get - # overwritten from 'shapes'. - chunks.append(None) - - if isinstance(shapes, (str, Number)) or shapes is None: - chunks = [ - c if i in f_dims else shapes for i, c in enumerate(chunks) - ] - elif isinstance(shapes, dict): - chunks = [ - chunks[i] if i in f_dims else shapes.get(i, "auto") - for i, c in enumerate(chunks) - ] - else: - # chunks is a sequence - if len(shapes) != ndim: - raise ValueError( - f"Wrong number of 'shapes' elements in {shapes}: " - f"Got {len(shapes)}, expected {self.ndim}" - ) - - chunks = [ - c if i in f_dims else shapes[i] for i, c in enumerate(chunks) - ] - - return normalize_chunks(chunks, shape=shape, dtype=self.dtype) - - def subarrays(self, subarray_shapes): - """Return descriptors for every subarray. - - .. versionadded:: 3.14.0 - - .. seealso:: `subarray_shapes` - - :Parameters: - - subarray_shapes: `tuple` - The subarray sizes along each dimension, as returned - by a prior call to `subarray_shapes`. - - :Returns: - - 6-`tuple` of iterators - Each iterator iterates over a particular descriptor - from each subarray. - - 1. The indices of the aggregated array that correspond - to each subarray. - - 2. The shape of each subarray. - - 3. The indices of the fragment that corresponds to each - subarray (some subarrays may be represented by a - part of a fragment). - - 4. The location of each subarray. - - 5. The location on the fragment dimensions of the - fragment that corresponds to each subarray. - - 6. The shape of each fragment that overlaps each chunk. - - **Examples** - - An aggregated array with shape (12, 73, 144) has two - fragments, both with with shape (6, 73, 144). - - >>> a.shape - (12, 73, 144) - >>> a.get_fragment_shape() - (2, 1, 1) - >>> a.fragmented_dimensions() - [0] - >>> subarray_shapes = a.subarray_shapes({1: 40}) - >>> print(subarray_shapes) - ((6, 6), (40, 33), (144,)) - >>> ( - ... u_indices, - ... u_shapes, - ... f_indices, - ... s_locations, - ... f_locations, - ... f_shapes, - ... ) = a.subarrays(subarray_shapes) - >>> for i in u_indices: - ... print(i) - ... - (slice(0, 6, None), slice(0, 40, None), slice(0, 144, None)) - (slice(0, 6, None), slice(40, 73, None), slice(0, 144, None)) - (slice(6, 12, None), slice(0, 40, None), slice(0, 144, None)) - (slice(6, 12, None), slice(40, 73, None), slice(0, 144, None)) - - >>> for i in u_shapes - ... print(i) - ... - (6, 40, 144) - (6, 33, 144) - (6, 40, 144) - (6, 33, 144) - >>> for i in f_indices: - ... print(i) - ... - (slice(None, None, None), slice(0, 40, None), slice(0, 144, None)) - (slice(None, None, None), slice(40, 73, None), slice(0, 144, None)) - (slice(None, None, None), slice(0, 40, None), slice(0, 144, None)) - (slice(None, None, None), slice(40, 73, None), slice(0, 144, None)) - >>> for i in s_locations: - ... print(i) - ... - (0, 0, 0) - (0, 1, 0) - (1, 0, 0) - (1, 1, 0) - >>> for i in f_locations: - ... print(i) - ... - (0, 0, 0) - (0, 0, 0) - (1, 0, 0) - (1, 0, 0) - >>> for i in f_shapes: - ... print(i) - ... - (6, 73, 144) - (6, 73, 144) - (6, 73, 144) - (6, 73, 144) - - """ - f_dims = self.get_fragmented_dimensions() - - # The indices of the uncompressed array that correspond to - # each subarray, the shape of each uncompressed subarray, and - # the location of each subarray - s_locations = [] - u_shapes = [] - u_indices = [] - f_locations = [] - for dim, c in enumerate(subarray_shapes): - nc = len(c) - s_locations.append(tuple(range(nc))) - u_shapes.append(c) - - if dim in f_dims: - f_locations.append(tuple(range(nc))) - else: - # No fragmentation along this dimension - f_locations.append((0,) * nc) - - c = tuple(accumulate((0,) + c)) - u_indices.append([slice(i, j) for i, j in zip(c[:-1], c[1:])]) - - # For each subarray, the part of the fragment that corresponds - # to it. - f_indices = [ - (slice(None),) * len(u) if dim in f_dims else u - for dim, u in enumerate(u_indices) - ] - - # For each subarray, the shape of the fragment that - # corresponds to it. - f_shapes = [ - u_shape if dim in f_dims else (size,) * len(u_shape) - for dim, (u_shape, size) in enumerate(zip(u_shapes, self.shape)) - ] - - return ( - product(*u_indices), - product(*u_shapes), - product(*f_indices), - product(*s_locations), - product(*f_locations), - product(*f_shapes), - ) - - def to_dask_array(self, chunks="auto"): - """Create a dask array with `FragmentArray` chunks. - - .. versionadded:: 3.14.0 - - :Parameters: - - chunks: `int`, `tuple`, `dict` or `str`, optional - Specify the chunking of the returned dask array. - - Any value accepted by the *chunks* parameter of the - `dask.array.from_array` function is allowed. - - The chunk sizes implied by *chunks* for a dimension that - has been fragmented are ignored and replaced with values - that are implied by that dimensions fragment sizes. - - :Returns: - - `dask.array.Array` - - """ - import dask.array as da - from dask.array.core import getter - from dask.base import tokenize - - name = (f"{self.__class__.__name__}-{tokenize(self)}",) - - dtype = self.dtype - units = self.get_units(None) - calendar = self.get_calendar(None) - aggregated_data = self.get_aggregated_data(copy=False) - - # Set the chunk sizes for the dask array - chunks = self.subarray_shapes(chunks) - - fragment_arrays = self._FragmentArray - if not self.get_mask(): - fragment_arrays = fragment_arrays.copy() - fragment_arrays["nc"] = partial(fragment_arrays["nc"], mask=False) - - storage_options = self.get_storage_options() - - dsk = {} - for ( - u_indices, - u_shape, - f_indices, - chunk_location, - fragment_location, - fragment_shape, - ) in zip(*self.subarrays(chunks)): - kwargs = aggregated_data[fragment_location].copy() - kwargs.pop("location", None) - - fragment_format = kwargs.pop("format", None) - try: - FragmentArray = fragment_arrays[fragment_format] - except KeyError: - raise ValueError( - "Can't get FragmentArray class for unknown " - f"fragment dataset format: {fragment_format!r}" - ) - - if storage_options and kwargs["address"] == "nc": - # Pass on any file system options - kwargs["storage_options"] = storage_options - - fragment = FragmentArray( - dtype=dtype, - shape=fragment_shape, - aggregated_units=units, - aggregated_calendar=calendar, - **kwargs, - ) - - key = f"{fragment.__class__.__name__}-{tokenize(fragment)}" - dsk[key] = fragment - dsk[name + chunk_location] = ( - getter, - key, - f_indices, - False, - False, - ) - - # Return the dask array - return da.Array(dsk, name[0], chunks=chunks, dtype=dtype) diff --git a/cf/data/array/mixin/compressedarraymixin.py b/cf/data/array/mixin/compressedarraymixin.py deleted file mode 100644 index 8a1d5dfbe1..0000000000 --- a/cf/data/array/mixin/compressedarraymixin.py +++ /dev/null @@ -1,131 +0,0 @@ -import dask.array as da - - -class CompressedArrayMixin: - """Mixin class for compressed arrays. - - .. versionadded:: 3.14.0 - - """ - - def _lock_file_read(self, array): - """Try to return a dask array that does not support concurrent - reads. - - .. versionadded:: 3.14.0 - - :Parameters: - - array: array_like - The array to process. - - :Returns" - - `dask.array.Array` or array_like - The new `dask` array, or the orginal array if it - couldn't be ascertained how to form the `dask` array. - - """ - try: - return array.to_dask_array() - except AttributeError: - pass - - try: - chunks = array.chunks - except AttributeError: - chunks = "auto" - - try: - array = array.source() - except (ValueError, AttributeError): - pass - - try: - array.get_filenames() - except AttributeError: - pass - else: - array = da.from_array(array, chunks=chunks, lock=True) - - return array - - def to_dask_array(self, chunks="auto"): - """Convert the data to a `dask` array. - - .. versionadded:: 3.14.0 - - :Parameters: - - chunks: `int`, `tuple`, `dict` or `str`, optional - Specify the chunking of the returned dask array. - - Any value accepted by the *chunks* parameter of the - `dask.array.from_array` function is allowed. - - The chunk sizes implied by *chunks* for a dimension that - has been fragmented are ignored and replaced with values - that are implied by that dimensions fragment sizes. - - :Returns: - - `dask.array.Array` - The `dask` array representation. - - """ - from functools import partial - - import dask.array as da - from cfdm.data.utils import normalize_chunks - from dask import config - from dask.array.core import getter - from dask.base import tokenize - - name = (f"{self.__class__.__name__}-{tokenize(self)}",) - - dtype = self.dtype - - context = partial(config.set, scheduler="synchronous") - - # If possible, convert the compressed data to a dask array - # that doesn't support concurrent reads. This prevents - # "compute called by compute" failures problems at compute - # time. - # - # TODO: This won't be necessary if this is refactored so that - # the compressed data is part of the same dask graph as - # the compressed subarrays. - conformed_data = self.conformed_data() - conformed_data = { - k: self._lock_file_read(v) for k, v in conformed_data.items() - } - subarray_kwargs = {**conformed_data, **self.subarray_parameters()} - - # Get the (cfdm) subarray class - Subarray = self.get_Subarray() - subarray_name = Subarray().__class__.__name__ - - # Set the chunk sizes for the dask array - chunks = normalize_chunks( - self.subarray_shapes(chunks), - shape=self.shape, - dtype=dtype, - ) - - dsk = {} - for u_indices, u_shape, c_indices, chunk_location in zip( - *self.subarrays(chunks) - ): - subarray = Subarray( - indices=c_indices, - shape=u_shape, - context_manager=context, - **subarray_kwargs, - ) - - key = f"{subarray_name}-{tokenize(subarray)}" - dsk[key] = subarray - dsk[name + chunk_location] = (getter, key, Ellipsis, False, False) - - # Return the dask array - return da.Array(dsk, name[0], chunks=chunks, dtype=dtype) diff --git a/cf/data/array/mixin/filearraymixin.py b/cf/data/array/mixin/filearraymixin.py deleted file mode 100644 index b5b314b9e2..0000000000 --- a/cf/data/array/mixin/filearraymixin.py +++ /dev/null @@ -1,218 +0,0 @@ -from os import sep -from os.path import basename, dirname, join - -from ....functions import _DEPRECATION_ERROR_ATTRIBUTE, abspath - - -class FileArrayMixin: - """Mixin class for an array stored in a file. - - .. versionadded:: 3.14.0 - - """ - - def __dask_tokenize__(self): - """Return a value fully representative of the object. - - .. versionadded:: 3.15.0 - - """ - return ( - self.__class__, - self.shape, - self.get_filenames(), - self.get_addresses(), - ) - - @property - def filename(self): - """The name of the file containing the array. - - Deprecated at version 3.14.0. Use method `get_filename` instead. - - """ - _DEPRECATION_ERROR_ATTRIBUTE( - self, - "filename", - message="Use method 'get_filename' instead.", - version="3.14.0", - removed_at="5.0.0", - ) # pragma: no cover - - def del_file_location(self, location): - """Remove reference to files in the given location. - - .. versionadded:: 3.15.0 - - :Parameters: - - location: `str` - The file location to remove. - - :Returns: - - `{{class}}` - A new {{class}} with reference to files in *location* - removed. - - **Examples** - - >>> a.get_filenames() - ('/data1/file1', '/data2/file2') - >>> a.get_addresses() - ('tas1', 'tas2') - >>> b = a.del_file_location('/data1') - >>> b = get_filenames() - ('/data2/file2',) - >>> b.get_addresses() - ('tas2',) - - >>> a.get_filenames() - ('/data1/file1', '/data2/file1', '/data2/file2') - >>> a.get_addresses() - ('tas1', 'tas1', 'tas2') - >>> b = a.del_file_location('/data2') - >>> b.get_filenames() - ('/data1/file1',) - >>> b.get_addresses() - ('tas1',) - - """ - location = abspath(location).rstrip(sep) - - new_filenames = [] - new_addresses = [] - for filename, address in zip( - self.get_filenames(), self.get_addresses() - ): - if dirname(filename) != location: - new_filenames.append(filename) - new_addresses.append(address) - - if not new_filenames: - raise ValueError( - "Can't delete a file location when it results in there " - "being no files" - ) - - a = self.copy() - a._set_component("filename", tuple(new_filenames), copy=False) - a._set_component("address", tuple(new_addresses), copy=False) - return a - - def file_locations(self): - """The locations of the files, any of which may contain the data. - - .. versionadded:: 3.15.0 - - :Returns: - - `tuple` - The file locations, one for each file, as absolute - paths with no trailing path name component separator. - - **Examples** - - >>> a.get_filenames() - ('/data1/file1',) - >>> a.file_locations() - ('/data1,) - - >>> a.get_filenames() - ('/data1/file1', '/data2/file2') - >>> a.file_locations() - ('/data1', '/data2') - - >>> a.get_filenames() - ('/data1/file1', '/data2/file2', '/data1/file2') - >>> a.file_locations() - ('/data1', '/data2', '/data1') - - """ - return tuple(map(dirname, self.get_filenames())) - - def add_file_location(self, location): - """Add a new file location. - - All existing files are additionally referenced from the given - location. - - .. versionadded:: 3.15.0 - - :Parameters: - - location: `str` - The new location. - - :Returns: - - `{{class}}` - A new {{class}} with all previous files additionally - referenced from *location*. - - **Examples** - - >>> a.get_filenames() - ('/data1/file1',) - >>> a.get_addresses() - ('tas',) - >>> b = a.add_file_location('/home') - >>> b.get_filenames() - ('/data1/file1', '/home/file1') - >>> b.get_addresses() - ('tas', 'tas') - - >>> a.get_filenames() - ('/data1/file1', '/data2/file2',) - >>> a.get_addresses() - ('tas', 'tas') - >>> b = a.add_file_location('/home/') - >>> b = get_filenames() - ('/data1/file1', '/data2/file2', '/home/file1', '/home/file2') - >>> b.get_addresses() - ('tas', 'tas', 'tas', 'tas') - - >>> a.get_filenames() - ('/data1/file1', '/data2/file1',) - >>> a.get_addresses() - ('tas1', 'tas2') - >>> b = a.add_file_location('/home/') - >>> b.get_filenames() - ('/data1/file1', '/data2/file1', '/home/file1') - >>> b.get_addresses() - ('tas1', 'tas2', 'tas1') - - >>> a.get_filenames() - ('/data1/file1', '/data2/file1',) - >>> a.get_addresses() - ('tas1', 'tas2') - >>> b = a.add_file_location('/data1') - >>> b.get_filenames() - ('/data1/file1', '/data2/file1') - >>> b.get_addresses() - ('tas1', 'tas2') - - """ - location = abspath(location).rstrip(sep) - - filenames = self.get_filenames() - addresses = self.get_addresses() - - # Note: It is assumed that each existing file name is either - # an absolute path or a fully qualified URI. - new_filenames = list(filenames) - new_addresses = list(addresses) - for filename, address in zip(filenames, addresses): - new_filename = join(location, basename(filename)) - if new_filename not in new_filenames: - new_filenames.append(new_filename) - new_addresses.append(address) - - a = self.copy() - a._set_component("filename", tuple(new_filenames), copy=False) - a._set_component( - "address", - tuple(new_addresses), - copy=False, - ) - return a diff --git a/cf/data/array/netcdf4array.py b/cf/data/array/netcdf4array.py index 5255109006..49c2c05c50 100644 --- a/cf/data/array/netcdf4array.py +++ b/cf/data/array/netcdf4array.py @@ -1,21 +1,12 @@ import cfdm from ...mixin_container import Container -from .mixin import ActiveStorageMixin, ArrayMixin, FileArrayMixin +from .mixin import ActiveStorageMixin class NetCDF4Array( ActiveStorageMixin, - FileArrayMixin, - ArrayMixin, Container, cfdm.NetCDF4Array, ): - """A netCDF array accessed with `netCDF4`. - - **Active storage reductions** - - An active storage reduction may be enabled with the `actify` - method. See `cf.data.collapse.Collapse` for details. - - """ + """A netCDF array accessed with `netCDF4`.""" diff --git a/cf/data/array/netcdfarray.py b/cf/data/array/netcdfarray.py index 6f56e2d930..67b497a78b 100644 --- a/cf/data/array/netcdfarray.py +++ b/cf/data/array/netcdfarray.py @@ -1,7 +1,7 @@ class NetCDFArray: """A netCDF array accessed with `netCDF4`. - Deprecated at version 1.11.2.0 and is no longer available. Use + Deprecated at version 3.16.3 and is no longer available. Use `cf.NetCDF4Array` instead. """ @@ -11,6 +11,6 @@ def __init__(self, *args, **kwargs): from ...functions import DeprecationError raise DeprecationError( - f"{self.__class__.__name__} was deprecated at version 1.11.2.0 " + f"{self.__class__.__name__} was deprecated at version 3.16.3 " "and is no longer available. Use cf.NetCDF4Array instead." ) diff --git a/cf/data/array/pointtopologyarray.py b/cf/data/array/pointtopologyarray.py index d5c00a2ae4..ce8c2107a2 100644 --- a/cf/data/array/pointtopologyarray.py +++ b/cf/data/array/pointtopologyarray.py @@ -1,21 +1,10 @@ import cfdm from ...mixin_container import Container -from .mixin import ArrayMixin, CompressedArrayMixin class PointTopologyArray( - CompressedArrayMixin, - ArrayMixin, Container, cfdm.PointTopologyArray, ): - """A point cell domain topology array derived from a UGRID variable. - - A point cell domain topology array derived from an underlying - UGRID "edge_node_connectivity" or UGRID "face_node_connectivity" - array. - - .. versionadded:: 3.16.0 - - """ + pass diff --git a/cf/data/array/raggedcontiguousarray.py b/cf/data/array/raggedcontiguousarray.py index 145e8c22db..1b33c48dea 100644 --- a/cf/data/array/raggedcontiguousarray.py +++ b/cf/data/array/raggedcontiguousarray.py @@ -1,27 +1,7 @@ import cfdm from ...mixin_container import Container -from .mixin import ArrayMixin, CompressedArrayMixin -class RaggedContiguousArray( - CompressedArrayMixin, ArrayMixin, Container, cfdm.RaggedContiguousArray -): - """An underlying contiguous ragged array. - - A collection of features stored using a contiguous ragged array - combines all features along a single dimension (the "sample - dimension") such that each feature in the collection occupies a - contiguous block. - - The information needed to uncompress the data is stored in a - "count variable" that gives the size of each block. - - It is assumed that the compressed dimension is the left-most - dimension in the compressed array. - - See CF section 9 "Discrete Sampling Geometries". - - .. versionadded:: 3.0.0 - - """ +class RaggedContiguousArray(Container, cfdm.RaggedContiguousArray): + pass diff --git a/cf/data/array/raggedindexedarray.py b/cf/data/array/raggedindexedarray.py index 974327ed26..69e2ee7a9a 100644 --- a/cf/data/array/raggedindexedarray.py +++ b/cf/data/array/raggedindexedarray.py @@ -1,28 +1,7 @@ import cfdm from ...mixin_container import Container -from .mixin import ArrayMixin, CompressedArrayMixin -class RaggedIndexedArray( - CompressedArrayMixin, ArrayMixin, Container, cfdm.RaggedIndexedArray -): - """An underlying indexed ragged array. - - A collection of features stored using an indexed ragged array - combines all features along a single dimension (the "sample - dimension") such that the values of each feature in the collection - are interleaved. - - The information needed to uncompress the data is stored in an - "index variable" that specifies the feature that each element of - the sample dimension belongs to. - - It is assumed that the compressed dimension is the left-most - dimension in the compressed array. - - See CF section 9 "Discrete Sampling Geometries". - - .. versionadded:: 3.0.0 - - """ +class RaggedIndexedArray(Container, cfdm.RaggedIndexedArray): + pass diff --git a/cf/data/array/raggedindexedcontiguousarray.py b/cf/data/array/raggedindexedcontiguousarray.py index 13f65737be..96870fd59a 100644 --- a/cf/data/array/raggedindexedcontiguousarray.py +++ b/cf/data/array/raggedindexedcontiguousarray.py @@ -1,34 +1,10 @@ import cfdm from ...mixin_container import Container -from .mixin import ArrayMixin, CompressedArrayMixin class RaggedIndexedContiguousArray( - CompressedArrayMixin, - ArrayMixin, Container, cfdm.RaggedIndexedContiguousArray, ): - """An underlying indexed contiguous ragged array. - - A collection of features, each of which is sequence of (vertical) - profiles, stored using an indexed contiguous ragged array combines - all feature elements along a single dimension (the "sample - dimension") such that a contiguous ragged array representation is - used for each profile and the indexed ragged array representation - to organise the profiles into timeseries. - - The information needed to uncompress the data is stored in a - "count variable" that gives the size of each profile; and in a - "index variable" that specifies the feature that each profile - belongs to. - - It is assumed that the compressed dimensions are the two left-most - dimensions in the compressed array. - - See CF section 9 "Discrete Sampling Geometries". - - .. versionadded:: 3.0.0 - - """ + pass diff --git a/cf/data/array/subsampledarray.py b/cf/data/array/subsampledarray.py index 7d8e24f5c5..15b7e0c6ad 100644 --- a/cf/data/array/subsampledarray.py +++ b/cf/data/array/subsampledarray.py @@ -1,207 +1,7 @@ import cfdm from ...mixin_container import Container -from .mixin import ArrayMixin, CompressedArrayMixin -class SubsampledArray( - CompressedArrayMixin, ArrayMixin, Container, cfdm.SubsampledArray -): - """An underlying subsampled array. - - For some structured coordinate data (e.g. coordinates describing - remote sensing products) space may be saved by storing a subsample - of the data, called tie points. The uncompressed data can be - reconstituted by interpolation, from the subsampled values. This - process will likely result in a loss in accuracy (as opposed to - precision) in the uncompressed variables, due to rounding and - approximation errors in the interpolation calculations, but it is - assumed that these errors will be small enough to not be of - concern to users of the uncompressed dataset. The creator of the - compressed dataset can control the accuracy of the reconstituted - data through the degree of subsampling and the choice of - interpolation method. - - See CF section 8.3 "Lossy Compression by Coordinate Subsampling" - and Appendix J "Coordinate Interpolation Methods". - - >>> tie_point_indices={{package}}.TiePointIndex(data=[0, 4, 7, 8, 11]) - >>> w = {{package}}.InterpolationParameter(data=[5, 10, 5]) - >>> coords = {{package}}.SubsampledArray( - ... interpolation_name='quadratic', - ... compressed_array={{package}}.Data([15, 135, 225, 255, 345]), - ... shape=(12,), - ... tie_point_indices={0: tie_point_indices}, - ... parameters={"w": w}, - ... parameter_dimensions={"w": (0,)}, - ... ) - >>> print(coords[...]) - [ 15. 48.75 80. 108.75 135. - 173.88888889 203.88888889 225. 255. 289.44444444 - 319.44444444 345. ] - - **Cell boundaries** - - When the tie points array represents bounds tie points then the - *shape* parameter describes the uncompressed bounds shape. See CF - section 8.3.9 "Interpolation of Cell Boundaries". - - >>> bounds = {{package}}.SubsampledArray( - ... interpolation_name='quadratic', - ... compressed_array={{package}}.Data([0, 150, 240, 240, 360]), - ... shape=(12, 2), - ... tie_point_indices={0: tie_point_indices}, - ... parameters={"w": w}, - ... parameter_dimensions={"w": (0,)}, - ... ) - >>> print(bounds[...]) - [[0.0 33.2] - [33.2 64.8] - [64.8 94.80000000000001] - [94.80000000000001 123.2] - [123.2 150.0] - [150.0 188.88888888888889] - [188.88888888888889 218.88888888888889] - [218.88888888888889 240.0] - [240.0 273.75] - [273.75 305.0] - [305.0 333.75] - [333.75 360.0]] - - .. versionadded:: 3.14.0 - - """ - - def to_dask_array(self, chunks="auto"): - """Convert the data to a `dask` array. - - .. versionadded:: 3.14.0 - - :Parameters: - - chunks: `int`, `tuple`, `dict` or `str`, optional - Specify the chunking of the returned dask array. - - Any value accepted by the *chunks* parameter of the - `dask.array.from_array` function is allowed. - - The chunk sizes implied by *chunks* for a dimension that - has been fragmented are ignored and replaced with values - that are implied by that dimensions fragment sizes. - - :Returns: - - `dask.array.Array` - The `dask` array representation. - - """ - from functools import partial - - import dask.array as da - from dask import config - from dask.array.core import getter, normalize_chunks - from dask.base import tokenize - - name = (f"{self.__class__.__name__}-{tokenize(self)}",) - - dtype = self.dtype - - context = partial(config.set, scheduler="synchronous") - - compressed_dimensions = self.compressed_dimensions() - conformed_data = self.conformed_data() - compressed_data = conformed_data["data"] - parameters = conformed_data["parameters"] - dependent_tie_points = conformed_data["dependent_tie_points"] - - # If possible, convert the compressed data, parameters and - # dependent tie points to dask arrays that don't support - # concurrent reads. This prevents "compute called by compute" - # failures problems at compute time. - # - # TODO: This won't be necessary if this is refactored so that - # arrays are part of the same dask graph as the - # compressed subarrays. - compressed_data = self._lock_file_read(compressed_data) - parameters = { - k: self._lock_file_read(v) for k, v in parameters.items() - } - dependent_tie_points = { - k: self._lock_file_read(v) for k, v in dependent_tie_points.items() - } - - # Get the (cfdm) subarray class - Subarray = self.get_Subarray() - subarray_name = Subarray().__class__.__name__ - - # Set the chunk sizes for the dask array - # - # Note: The chunks created here are incorrect for the - # compressed dimensions, since these chunk sizes are a - # function of the tie point indices which haven't yet - # been accessed. Therefore, the chunks for the - # compressed dimensons need to be redefined later. - chunks = normalize_chunks( - self.subarray_shapes(chunks), - shape=self.shape, - dtype=dtype, - ) - - # Re-initialise the chunks - u_dims = list(compressed_dimensions) - chunks = [[] if i in u_dims else c for i, c in enumerate(chunks)] - - # For each dimension, initialise the index of the chunk - # previously created (prior to the chunk currently being - # created). The value -1 is an arbitrary negative value that is - # always less than any chunk index, which is always a natural - # number. - previous_chunk_location = [-1] * len(chunks) - - dsk = {} - for ( - u_indices, - u_shape, - c_indices, - subarea_indices, - first, - chunk_location, - ) in zip(*self.subarrays(shapes=chunks)): - subarray = Subarray( - data=compressed_data, - indices=c_indices, - shape=u_shape, - compressed_dimensions=compressed_dimensions, - first=first, - subarea_indices=subarea_indices, - parameters=parameters, - dependent_tie_points=dependent_tie_points, - context_manager=context, - ) - - key = f"{subarray_name}-{tokenize(subarray)}" - dsk[key] = subarray - dsk[name + chunk_location] = ( - getter, - key, - Ellipsis, - False, - False, - ) - - # Add correct chunk sizes for compressed dimensions - for d in u_dims[:]: - previous = previous_chunk_location[d] - new = chunk_location[d] - if new > previous: - chunks[d].append(u_shape[d]) - previous_chunk_location[d] = new - elif new < previous: - # No more chunk sizes required for this compressed - # dimension - u_dims.remove(d) - - chunks = [tuple(c) for c in chunks] - - # Return the dask array - return da.Array(dsk, name[0], chunks=chunks, dtype=dtype) +class SubsampledArray(Container, cfdm.SubsampledArray): + pass diff --git a/cf/data/array/umarray.py b/cf/data/array/umarray.py index 051674a9b3..b71a920f89 100644 --- a/cf/data/array/umarray.py +++ b/cf/data/array/umarray.py @@ -4,13 +4,11 @@ from ...functions import _DEPRECATION_ERROR_ATTRIBUTE, load_stash2standard_name from ...umread_lib.umfile import File, Rec from .abstract import Array -from .mixin import FileArrayMixin class UMArray( - FileArrayMixin, cfdm.data.mixin.IndexMixin, - cfdm.data.mixin.FileArrayMixin, + cfdm.data.abstract.FileArray, Array, ): """A sub-array stored in a PP or UM fields file.""" @@ -24,7 +22,10 @@ def __init__( fmt=None, word_size=None, byte_ordering=None, + mask=True, + unpack=True, attributes=None, + storage_options=None, source=None, copy=True, ): @@ -67,7 +68,7 @@ def __init__( already been set will be inferred from the lookup header and cached for future use. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 {{init source: optional}} @@ -90,42 +91,33 @@ def __init__( Deprecated at version 3.15.0. units: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the + Deprecated at version 3.16.3. Use the *attributes* parameter instead. calendar: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the + Deprecated at version 3.16.3. Use the *attributes* parameter instead. """ - super().__init__(source=source, copy=copy) + super().__init__( + filename=filename, + address=address, + dtype=dtype, + shape=shape, + mask=mask, + unpack=unpack, + attributes=attributes, + storage_options=storage_options, + source=source, + copy=copy, + ) if source is not None: - try: - shape = source._get_component("shape", None) - except AttributeError: - shape = None - - try: - filename = source._get_component("filename", None) - except AttributeError: - filename = None - - try: - address = source._get_component("address", None) - except AttributeError: - address = None - try: fmt = source._get_component("fmt", None) except AttributeError: fmt = None - try: - dtype = source._get_component("dtype", None) - except AttributeError: - dtype = None - try: word_size = source._get_component("word_size", None) except AttributeError: @@ -136,31 +128,6 @@ def __init__( except AttributeError: byte_ordering = None - try: - attributes = source._get_component("attributes", None) - except AttributeError: - attributes = None - - if filename is not None: - if isinstance(filename, str): - filename = (filename,) - else: - filename = tuple(filename) - - self._set_component("filename", filename, copy=False) - - if address is not None: - if isinstance(address, int): - address = (address,) - else: - address = tuple(address) - - self._set_component("address", address, copy=False) - - self._set_component("shape", shape, copy=False) - self._set_component("dtype", dtype, copy=False) - self._set_component("attributes", attributes, copy=False) - if fmt is not None: self._set_component("fmt", fmt, copy=False) @@ -176,7 +143,7 @@ def __init__( def _get_array(self, index=None): """Returns a subspace of the dataset variable. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 .. seealso:: `__array__`, `index` @@ -215,8 +182,8 @@ def _get_array(self, index=None): # Get the data subspace, applying any masking and unpacking array = cfdm.netcdf_indexer( array, - mask=True, - unpack=True, + mask=self.get_mask(), + unpack=self.get_unpack(), always_masked_array=False, orthogonal_indexing=True, attributes=attributes, @@ -276,7 +243,7 @@ def _get_rec(self, f, header_offset): def _set_FillValue(self, int_hdr, real_hdr, attributes): """Set the ``_FillValue`` attribute. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 :Parameters: @@ -314,8 +281,6 @@ def _set_units(self, int_hdr, attributes): .. versionadded:: 3.14.0 - .. versionadded:: 1.11.2.0 - :Parameters: int_hdr: `numpy.ndarray` @@ -372,7 +337,7 @@ def _set_units(self, int_hdr, attributes): def _set_unpack(self, int_hdr, real_hdr, attributes): """Set the ``add_offset`` and ``scale_factor`` attributes. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 :Parameters: diff --git a/cf/data/collapse/collapse_active.py b/cf/data/collapse/collapse_active.py index acf5e60b6e..51ac197076 100644 --- a/cf/data/collapse/collapse_active.py +++ b/cf/data/collapse/collapse_active.py @@ -40,7 +40,7 @@ def actify(method): can be done in active storage, or the active storage reduction failed) then the computations will be done locally "as usual". - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 .. seealso:: `active_chunk_function` @@ -92,7 +92,7 @@ def active_chunk_function(method, *args, **kwargs): reduction components, similar to that returned by a ``cf_*_chunk`` method, is returned. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 .. seealso:: `actify` diff --git a/cf/data/collapse/dask_collapse.py b/cf/data/collapse/dask_collapse.py index 412c91935f..a9245ff10e 100644 --- a/cf/data/collapse/dask_collapse.py +++ b/cf/data/collapse/dask_collapse.py @@ -1133,7 +1133,7 @@ def cf_sum_of_weights2_chunk( This function is passed to `dask.array.reduction` as its *chunk* parameter. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 :Parameters: diff --git a/cf/data/dask_utils.py b/cf/data/dask_utils.py index 4903d2ac2e..4c6923541d 100644 --- a/cf/data/dask_utils.py +++ b/cf/data/dask_utils.py @@ -413,7 +413,7 @@ def cf_units(a, from_units, to_units): def cf_is_masked(a): """Determine whether an array has masked values. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 :Parameters: @@ -436,7 +436,7 @@ def cf_is_masked(a): def cf_filled(a, fill_value=None): """Replace masked elements with a fill value. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 :Parameters: diff --git a/cf/data/data.py b/cf/data/data.py index d26e833af8..21d0bb8602 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -3,7 +3,6 @@ from functools import partial, reduce from itertools import product from operator import mul -from os import sep import cfdm import cftime @@ -28,11 +27,10 @@ from ..functions import ( _DEPRECATION_ERROR_KWARGS, _section, - abspath, free_memory, parse_indices, ) -from ..mixin2 import CFANetCDF, Container +from ..mixin2 import Container from ..units import Units from .collapse import Collapse from .dask_utils import ( @@ -68,7 +66,7 @@ _dtype_bool = np.dtype(bool) -class Data(DataClassDeprecationsMixin, CFANetCDF, Container, cfdm.Data): +class Data(DataClassDeprecationsMixin, Container, cfdm.Data): """An N-dimensional data array with units and masked values. * Contains an N-dimensional, indexable and broadcastable array with @@ -132,20 +130,6 @@ class Data(DataClassDeprecationsMixin, CFANetCDF, Container, cfdm.Data): """ - # Constants used to specify which components should be cleared - # when a new dask array is set. See `_clear_after_dask_update` for - # details. - # - # These constants must have values 2**N (N>=1), except for - # `_NONE` which must be 0, and `_ALL` which must be the sum of - # other constants. It is therefore convenient to define these - # constants in binary. - _NONE = 0b000 - _ARRAY = 0b001 - _CACHE = 0b010 - _CFA = 0b100 - _ALL = 0b111 - def __new__(cls, *args, **kwargs): """Store component classes.""" instance = super().__new__(cls) @@ -486,81 +470,6 @@ def __setitem__(self, indices, value): return - def _cfa_del_write(self): - """Set the CFA write status of the data to `False`. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_get_write`, `_cfa_set_write` - - :Returns: - - `bool` - The CFA status prior to deletion. - - """ - return self._custom.pop("cfa_write", False) - - def _cfa_set_term(self, value): - """Set the CFA aggregation instruction term status. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_get_term`, `cfa_set_term` - - :Parameters: - - status: `bool` - The new CFA aggregation instruction term status. - - :Returns: - - `None` - - """ - if not value: - self._custom.pop("cfa_term", None) - - self._custom["cfa_term"] = bool(value) - - def _is_abstract_Array_subclass(self, array): - """Whether or not an array is a type of Array. - - :Parameters: - - array: - - :Returns: - - `bool` - - """ - return isinstance(array, cfdm.Array) - - def _cfa_set_write(self, status): - """Set the CFA write status of the data. - - If and only if the CFA write status is True then it may be - possible to write the data as an aggregation variable to a - CFA-netCDF file. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_get_write`, `cfa_set_write`, - `_cfa_del_write`, `cf.read`, `cf.write`, - - :Parameters: - - status: `bool` - The new CFA write status. - - :Returns: - - `None` - - """ - self._custom["cfa_write"] = bool(status) - @_inplace_enabled(default=False) def diff(self, axis=-1, n=1, inplace=False): """Calculate the n-th discrete difference along the given axis. @@ -1390,7 +1299,7 @@ def percentile( axes = d._axes d._axes = (new_axis_identifier(axes),) + axes - d._update_deterministic(not is_dask_collection(q)) + d._update_deterministic(q) return d @@ -1433,110 +1342,6 @@ def ceil(self, inplace=False, i=False): d._set_dask(dx) return d - def cfa_get_term(self): - """The CFA aggregation instruction term status. - - If True then the data represents that of a non-standard CFA - aggregation instruction variable. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_set_term` - - :Returns: - - `bool` - - **Examples** - - >>> d = cf.Data([1, 2]) - >>> d.cfa_get_term() - False - - """ - return bool(self._custom.get("cfa_term", False)) - - def cfa_get_write(self): - """The CFA write status of the data. - - If and only if the CFA write status is True then it may be - possible to write the data as an aggregation variable to a - CFA-netCDF file. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_set_write`, `cf.read`, `cf.write` - - :Returns: - - `bool` - - **Examples** - - >>> d = cf.Data([1, 2]) - >>> d.cfa_get_write() - False - - """ - return bool(self._custom.get("cfa_write", False)) - - def cfa_set_term(self, status): - """Set the CFA aggregation instruction term status. - - If True then the data represents that of a non-standard CFA - aggregation instruction variable. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_get_term` - - :Parameters: - - status: `bool` - The new CFA aggregation instruction term status. - - :Returns: - - `None` - - """ - if status: - raise ValueError( - "'cfa_set_term' only allows the CFA aggregation instruction " - "term write status to be set to False" - ) - - self._custom.pop("cfa_term", False) - - def cfa_set_write(self, status): - """Set the CFA write status of the data. - - If and only if the CFA write status is True then it may be - possible to write the data as an aggregation variable to a - CFA-netCDF file. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_get_write`, `cf.read`, `cf.write` - - :Parameters: - - status: `bool` - The new CFA write status. - - :Returns: - - `None` - - """ - if status: - raise ValueError( - "'cfa_set_write' only allows the CFA write status to be " - "set to False" - ) - - self._cfa_del_write() - @_inplace_enabled(default=False) def convolution_filter( self, @@ -1941,47 +1746,6 @@ def _asreftime(self, inplace=False): return d - def _clear_after_dask_update(self, clear=None): - """Remove components invalidated by updating the `dask` array. - - Removes or modifies components that can't be guaranteed to be - consistent with an updated `dask` array. See the *clear* - parameter for details. - - .. versionadded:: 1.11.2.0 - - .. seealso:: `_del_Array`, `_del_cached_elements`, - `_set_dask`, `_cfa_del_write` - - :Parameters: - - clear: `int` or `None`, optional - Specify which components to remove, determined by - sequentially combining an integer value of *clear* - with the relevant class-level constants (such as - ``Data._ARRAY``), using the bitwise AND (&) - operator. If ``clear & `` is - True then the corresponding component is cleared. The - default value of `None` is equivalent to *clear* being - set to ``Data._ALL``. - - The bitwise OR (^) operator can be used to retain a - component (or components) but remove all others. For - instance, if *clear* is ``Data._ALL ^ - Data._CACHE`` then all components except the - cached array values will be removed. - - :Returns: - - `None` - - """ - clear = super()._clear_after_dask_update(clear) - - if clear & self._CFA: - # Set the CFA write status to False - self._cfa_del_write() - def _combined_units(self, data1, method, inplace): """Combines by given method the data's units with other units. @@ -2467,7 +2231,7 @@ def _parse_indices(self, *args, **kwargs): """ raise NotImplementedError( - "'cf.Data._parse_indices' is not available. " + "'cf.Data._parse_indices' is no longer available. " "Use function 'cf.parse_indices' instead." ) @@ -2637,243 +2401,6 @@ def _regrid( return d - @classmethod - def concatenate( - cls, data, axis=0, cull_graph=False, relaxed_units=False, copy=True - ): - """Join a sequence of data arrays together. - - .. seealso:: `cull_graph` - - :Parameters: - - data: sequence of `Data` - The data arrays to be concatenated. Concatenation is - carried out in the order given. Each data array must have - equivalent units and the same shape, except in the - concatenation axis. Note that scalar arrays are treated as - if they were one dimensional. - - axis: `int`, optional - The axis along which the arrays will be joined. The - default is 0. Note that scalar arrays are treated as if - they were one dimensional. - - .. note:: If the axis specified is cyclic, it will become - non-cyclic in the output. - - {{cull_graph: `bool`, optional}} - - .. versionadded:: 3.14.0 - - {{relaxed_units: `bool`, optional}} - - .. versionadded:: 3.14.1 - - copy: `bool`, optional - If True (the default) then make copies of the data, if - required, prior to the concatenation, thereby ensuring - that the input data arrays are not changed by the - concatenation process. If False then some or all input - data arrays might be changed in-place, but the - concatenation process will be faster. - - .. versionadded:: 3.15.1 - - :Returns: - - `Data` - The concatenated data. - - **Examples** - - >>> d = cf.Data([[1, 2], [3, 4]], 'km') - >>> e = cf.Data([[5.0, 6.0]], 'metre') - >>> f = cf.Data.concatenate((d, e)) - >>> print(f.array) - [[ 1. 2. ] - [ 3. 4. ] - [ 0.005 0.006]] - >>> f.equals(cf.Data.concatenate((d, e), axis=-2)) - True - - >>> e = cf.Data([[5.0], [6.0]], 'metre') - >>> f = cf.Data.concatenate((d, e), axis=1) - >>> print(f.array) - [[ 1. 2. 0.005] - [ 3. 4. 0.006]] - - >>> d = cf.Data(1, 'km') - >>> e = cf.Data(50.0, 'metre') - >>> f = cf.Data.concatenate((d, e)) - >>> print(f.array) - [ 1. 0.05] - - >>> e = cf.Data([50.0, 75.0], 'metre') - >>> f = cf.Data.concatenate((d, e)) - >>> print(f.array) - [ 1. 0.05 0.075] - - """ - data = tuple(data) - if len(data) < 2: - raise ValueError( - "Can't concatenate: Must provide at least two data arrays" - ) - - if cull_graph: - # Remove unnecessary components from the graph, which may - # improve performance, and because complicated task graphs - # can sometimes confuse da.concatenate. - for d in data: - d.cull_graph() - - data0 = data[0] - units0 = data0.Units - - if copy: - data0 = data0.copy() - copied = True - else: - copied = False - - processed_data = [] - for index, data1 in enumerate(data): - # Turn any scalar array into a 1-d array - if not data1.ndim: - if not copied: - data1 = data1.copy() - copied = True - - data1.insert_dimension(inplace=True) - - # Check and conform, if necessary, the units of all inputs - units1 = data1.Units - if ( - relaxed_units - and not units0.isvalid - and not units1.isvalid - and units0.__dict__ == units1.__dict__ - ): - # Allow identical invalid units to be equal - pass - elif units0.equals(units1): - pass - elif units0.equivalent(units1): - if not copied: - data1 = data1.copy() - copied = True - - data1.Units = units0 - else: - raise ValueError( - "Can't concatenate: All the input arrays must have " - "equivalent units" - ) - - processed_data.append(data1) - copied = not copy # to avoid making two copies in a given case - - # Get data as dask arrays and apply concatenation - # operation. We can set '_force_to_memory=False' because at compute - # time the concatenation operation does not need to access the - # actual data. - dxs = [d.to_dask_array(_force_to_memory=False) for d in processed_data] - dx = da.concatenate(dxs, axis=axis) - - # Set the CFA write status - # - # Assume at first that all input data instances have True - # status, but ... - cfa = cls._CFA - for d in processed_data: - if not d.cfa_get_write(): - # ... the CFA write status is False when any input - # data instance has False status ... - cfa = cls._NONE - break - - if cfa != cls._NONE: - non_concat_axis_chunks0 = list(processed_data[0].chunks) - non_concat_axis_chunks0.pop(axis) - for d in processed_data[1:]: - non_concat_axis_chunks = list(d.chunks) - non_concat_axis_chunks.pop(axis) - if non_concat_axis_chunks != non_concat_axis_chunks0: - # ... the CFA write status is False when any two - # input data instances have different chunk - # patterns for the non-concatenated axes. - cfa = cls._NONE - break - - # Define the __in_memory__ status - in_memory = processed_data[0].__in_memory__ - for d in processed_data[1:]: - if d.__in_memory__ != in_memory: - # If and only if any two input Data objects have - # different __in_memory__ values, then set - # in_memory=False on the concatenation. - in_memory = False - break - - # Set the new dask array - data0._set_dask(dx, clear=cls._ALL ^ cfa, in_memory=in_memory) - - # Set appropriate cached elements - cached_elements = {} - for i in (0, -1): - element = processed_data[i]._get_cached_elements().get(i) - if element is not None: - cached_elements[i] = element - - if cached_elements: - data0._set_cached_elements(cached_elements) - - # Set whether or not the concatenated name is deterministic - deterministic = True - for d in processed_data: - if not d.has_deterministic_name(): - deterministic = False - break - - data0._update_deterministic(deterministic) - - # Set the CFA-netCDF aggregated data instructions and file - # name substitutions by combining them from all of the input - # data instances, giving precedence to those towards the left - # hand side of the input list. - if data0.cfa_get_write(): - aggregated_data = {} - substitutions = {} - for d in processed_data[::-1]: - aggregated_data.update(d.cfa_get_aggregated_data()) - substitutions.update(d.cfa_file_substitutions()) - - if aggregated_data: - data0.cfa_set_aggregated_data(aggregated_data) - - if substitutions: - data0.cfa_update_file_substitutions(substitutions) - - # Set the CFA aggregation instruction term status - if data0.cfa_get_term(): - for d in processed_data[1:]: - if not d.cfa_get_term(): - data0.cfa_set_term(False) - break - - # Manage cyclicity of axes: if join axis was cyclic, it is no - # longer. - axis = data0._parse_axes(axis)[0] - if axis in data0.cyclic(): - logger.warning( - f"Concatenating along a cyclic axis ({axis}) therefore the " - "axis has been set as non-cyclic in the output." - ) - data0.cyclic(axes=axis, iscyclic=False) - - return data0 - def __add__(self, other): """The binary arithmetic operation ``+`` @@ -3272,6 +2799,114 @@ def is_masked(self): return bool(dx.any()) + @classmethod + def _concatenate_conform_units(cls, data1, units0, relaxed_units, copy): + """Check and conform the units of data prior to concatenation. + + This is a helper function for `concatenate` that may be easily + overridden in subclasses, to allow for customisation of the + concatenation process. + + .. versionadded:: NEXTVERSION + + .. seealso:: `concatenate` + + :Parameters: + + data1: `Data` + Data with units. + + units0: `Units` + The units to conform *data1* to. + + {{relaxed_units: `bool`, optional}} + + copy: `bool` + If False then modify *data1* in-place. Otherwise a + copy of it is modified. + + :Returns: + + `Data` + Returns *data1*, possibly modified so that it conforms + to *units0*. If *copy* is False and *data1* is + modified, then it is done so in-place. + + """ + # Check and conform, if necessary, the units of all inputs + units1 = data1.Units + if ( + relaxed_units + and not units0.isvalid + and not units1.isvalid + and units0.__dict__ == units1.__dict__ + ): + # Allow identical invalid units to be equal + pass + elif units0.equals(units1): + pass + elif units0.equivalent(units1): + if copy: + data1 = data1.copy() + + data1.Units = units0 + else: + raise ValueError( + "Can't concatenate: All the input arrays must have " + f"equivalent units. Got {units0!r} and {units1!r}" + ) + + return data1 + + @classmethod + def _concatenate_post_process( + cls, concatenated_data, axis, conformed_data + ): + """Post-process concatenated data. + + This is a helper function for `concatenate` that may be easily + overridden in subclasses, to allow for customisation of the + concatenation process. + + .. versionadded:: NEXTVERSION + + .. seealso:: `concatenate` + + :Parameters: + + concatenated_data: `Data` + The concatenated data array. + + axis: `int` + The axis of concatenation. + + conformed_data: sequence of `Data` + The ordered sequence of data arrays that were + concatenated. + + :Returns: + + `Data` + Returns *concatenated_data*, possibly modified + in-place. + + """ + concatenated_data = super()._concatenate_post_process( + concatenated_data, axis, conformed_data + ) + + # Manage cyclicity of axes: if join axis was cyclic, it is no + # longer. + axis = concatenated_data._parse_axes(axis)[0] + if axis in concatenated_data.cyclic(): + logger.warning( + f"Concatenating along a cyclic axis ({axis}) therefore the " + "axis has been set as non-cyclic in the output." + ) + concatenated_data.cyclic(axes=axis, iscyclic=False) + + return concatenated_data + @_inplace_enabled(default=False) def arctan(self, inplace=False): """Take the trigonometric inverse tangent of the data element- @@ -3976,58 +3611,6 @@ def convert_reference_time( return d - def add_file_location(self, location): - """Add a new file location in-place. - - All data definitions that reference files are additionally - referenced from the given location. - - .. versionadded:: 3.15.0 - - .. seealso:: `del_file_location`, `file_locations` - - :Parameters: - - location: `str` - The new location. - - :Returns: - - `str` - The new location as an absolute path with no trailing - path name component separator. - - **Examples** - - >>> d.add_file_location('/data/model/') - '/data/model' - - """ - location = abspath(location).rstrip(sep) - - updated = False - - # The dask graph is never going to be computed, so we can set - # '_force_to_memory=False'. - dsk = self.todict(_force_to_memory=False) - for key, a in dsk.items(): - try: - dsk[key] = a.add_file_location(location) - except AttributeError: - # This chunk doesn't contain a file array - continue - - # This chunk contains a file array and the dask graph has - # been updated - updated = True - - if updated: - dx = self.to_dask_array(_force_to_memory=False) - dx = da.Array(dsk, dx.name, dx.chunks, dx.dtype, dx._meta) - self._set_dask(dx, clear=self._NONE, in_memory=None) - - return location - def set_units(self, value): """Set the units. @@ -4068,7 +3651,7 @@ def masked_where(self, condition, inplace=False): `masked_where` causes all delayed operations to be executed. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 .. seealso:: `mask`, `masked_values`, `where` @@ -5809,41 +5392,6 @@ def halo( return d - def file_locations(self): - """The locations of files containing parts of the data. - - Returns the locations of any files that may be required to - deliver the computed data array. - - .. versionadded:: 3.15.0 - - .. seealso:: `add_file_location`, `del_file_location` - - :Returns: - - `set` - The unique file locations as absolute paths with no - trailing path name component separator. - - **Examples** - - >>> d.file_locations() - {'/home/data1', 'file:///data2'} - - """ - out = set() - - # The dask graph is never going to be computed, so we can set - # '_force_to_memory=False'. - for key, a in self.todict(_force_to_memory=False).items(): - try: - out.update(a.file_locations()) - except AttributeError: - # This chunk doesn't contain a file array - pass - - return out - def flat(self, ignore_masked=True): """Return a flat iterator over elements of the data array. @@ -6363,58 +5911,6 @@ def masked_invalid(self, inplace=False): d._set_dask(dx) return d - def del_file_location(self, location): - """Remove a file location in-place. - - All data definitions that reference files will have references - to files in the given location removed from them. - - .. versionadded:: 3.15.0 - - .. seealso:: `add_file_location`, `file_locations` - - :Parameters: - - location: `str` - The file location to remove. - - :Returns: - - `str` - The removed location as an absolute path with no - trailing path name component separator. - - **Examples** - - >>> d.del_file_location('/data/model/') - '/data/model' - - """ - location = abspath(location).rstrip(sep) - - updated = False - - # The dask graph is never going to be computed, so we can set - # '_force_to_memory=False'. - dsk = self.todict(_force_to_memory=False) - for key, a in dsk.items(): - try: - dsk[key] = a.del_file_location(location) - except AttributeError: - # This chunk doesn't contain a file array - continue - - # This chunk contains a file array and the dask graph has - # been updated - updated = True - - if updated: - dx = self.to_dask_array(_force_to_memory=False) - dx = da.Array(dsk, dx.name, dx.chunks, dx.dtype, dx._meta) - self._set_dask(dx, clear=self._NONE, in_memory=None) - - return location - @classmethod def masked_all( cls, shape, dtype=None, units=None, calendar=None, chunks="auto" diff --git a/cf/data/fragment/__init__.py b/cf/data/fragment/__init__.py index b7315107d4..8a93ba7a1d 100644 --- a/cf/data/fragment/__init__.py +++ b/cf/data/fragment/__init__.py @@ -1,5 +1,2 @@ -from .fullfragmentarray import FullFragmentArray -from .h5netcdffragmentarray import H5netcdfFragmentArray -from .netcdffragmentarray import NetCDFFragmentArray -from .netcdf4fragmentarray import NetCDF4FragmentArray -from .umfragmentarray import UMFragmentArray +from .fragmentfilearray import FragmentFileArray +from .fragmentumarray import FragmentUMArray diff --git a/cf/data/fragment/fragmentfilearray.py b/cf/data/fragment/fragmentfilearray.py new file mode 100644 index 0000000000..b71aeb2460 --- /dev/null +++ b/cf/data/fragment/fragmentfilearray.py @@ -0,0 +1,30 @@ +import cfdm + +from ...mixin_container import Container +from ..array.mixin import ActiveStorageMixin + + +class FragmentFileArray( + ActiveStorageMixin, Container, cfdm.data.fragment.FragmentFileArray +): + """Fragment of aggregated data in a file. + + .. versionadded:: NEXTVERSION + + """ + + def __new__(cls, *args, **kwargs): + """Store fragment classes. + + .. versionadded:: NEXTVERSION + + """ + # Import fragment classes. Do this here (as opposed to outside + # the class) to aid subclassing. + from .fragmentumarray import FragmentUMArray + + instance = super().__new__(cls) + instance._FragmentArrays = instance._FragmentArrays + ( + FragmentUMArray, + ) + return instance diff --git a/cf/data/fragment/fragmentumarray.py b/cf/data/fragment/fragmentumarray.py new file mode 100644 index 0000000000..6cfa8bbde2 --- /dev/null +++ b/cf/data/fragment/fragmentumarray.py @@ -0,0 +1,13 @@ +import cfdm + +from ..array.umarray import UMArray + + +class FragmentUMArray( + cfdm.data.fragment.mixin.FragmentFileArrayMixin, UMArray +): + """A fragment of aggregated data in a PP or UM file. + + .. versionadded:: 3.14.0 + + """ diff --git a/cf/data/fragment/fullfragmentarray.py b/cf/data/fragment/fullfragmentarray.py deleted file mode 100644 index 52760f24f7..0000000000 --- a/cf/data/fragment/fullfragmentarray.py +++ /dev/null @@ -1,91 +0,0 @@ -from ..array.fullarray import FullArray -from .mixin import FragmentArrayMixin - - -class FullFragmentArray(FragmentArrayMixin, FullArray): - """A CFA fragment array that is filled with a value. - - .. versionadded:: 3.15.0 - - """ - - def __init__( - self, - fill_value=None, - dtype=None, - shape=None, - aggregated_units=False, - aggregated_calendar=False, - attributes=None, - source=None, - copy=True, - ): - """**Initialisation** - - :Parameters: - - fill_value: scalar - The fill value. - - dtype: `numpy.dtype` - The data type of the aggregated array. May be `None` - if the numpy data-type is not known (which can be the - case for netCDF string types, for example). This may - differ from the data type of the netCDF fragment - variable. - - shape: `tuple` - The shape of the fragment within the aggregated - array. This may differ from the shape of the netCDF - fragment variable in that the latter may have fewer - size 1 dimensions. - - {{init attributes: `dict` or `None`, optional}} - - .. versionadded:: 1.11.2.0 - - {{aggregated_units: `str` or `None`, optional}} - - {{aggregated_calendar: `str` or `None`, optional}} - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - units: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - calendar: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - """ - super().__init__( - fill_value=fill_value, - dtype=dtype, - shape=shape, - attributes=attributes, - source=source, - copy=False, - ) - - if source is not None: - try: - aggregated_units = source._get_component( - "aggregated_units", False - ) - except AttributeError: - aggregated_units = False - - try: - aggregated_calendar = source._get_component( - "aggregated_calendar", False - ) - except AttributeError: - aggregated_calendar = False - - self._set_component("aggregated_units", aggregated_units, copy=False) - self._set_component( - "aggregated_calendar", aggregated_calendar, copy=False - ) diff --git a/cf/data/fragment/h5netcdffragmentarray.py b/cf/data/fragment/h5netcdffragmentarray.py deleted file mode 100644 index 99ac398b09..0000000000 --- a/cf/data/fragment/h5netcdffragmentarray.py +++ /dev/null @@ -1,97 +0,0 @@ -from ..array.h5netcdfarray import H5netcdfArray -from .mixin import FragmentArrayMixin - - -class H5netcdfFragmentArray(FragmentArrayMixin, H5netcdfArray): - """A netCDF fragment array accessed with `h5netcdf`. - - .. versionadded:: 1.11.2.0 - - """ - - def __init__( - self, - filename=None, - address=None, - dtype=None, - shape=None, - aggregated_units=False, - aggregated_calendar=False, - attributes=None, - storage_options=None, - source=None, - copy=True, - ): - """**Initialisation** - - :Parameters: - - filename: (sequence of `str`), optional - The names of the netCDF fragment files containing the - array. - - address: (sequence of `str`), optional - The name of the netCDF variable containing the - fragment array. Required unless *varid* is set. - - dtype: `numpy.dtype`, optional - The data type of the aggregated array. May be `None` - if the numpy data-type is not known (which can be the - case for netCDF string types, for example). This may - differ from the data type of the netCDF fragment - variable. - - shape: `tuple`, optional - The shape of the fragment within the aggregated - array. This may differ from the shape of the netCDF - fragment variable in that the latter may have fewer - size 1 dimensions. - - {{init attributes: `dict` or `None`, optional}} - - If *attributes* is `None`, the default, then the - attributes will be set from the netCDF variable during - the first `__getitem__` call. - - {{aggregated_units: `str` or `None`, optional}} - - {{aggregated_calendar: `str` or `None`, optional}} - - {{init storage_options: `dict` or `None`, optional}} - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - """ - super().__init__( - filename=filename, - address=address, - dtype=dtype, - shape=shape, - mask=True, - attributes=attributes, - storage_options=storage_options, - source=source, - copy=copy, - ) - - if source is not None: - try: - aggregated_units = source._get_component( - "aggregated_units", False - ) - except AttributeError: - aggregated_units = False - - try: - aggregated_calendar = source._get_component( - "aggregated_calendar", False - ) - except AttributeError: - aggregated_calendar = False - - self._set_component("aggregated_units", aggregated_units, copy=False) - self._set_component( - "aggregated_calendar", aggregated_calendar, copy=False - ) diff --git a/cf/data/fragment/mixin/__init__.py b/cf/data/fragment/mixin/__init__.py deleted file mode 100644 index a4a35a1129..0000000000 --- a/cf/data/fragment/mixin/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .fragmentarraymixin import FragmentArrayMixin diff --git a/cf/data/fragment/mixin/fragmentarraymixin.py b/cf/data/fragment/mixin/fragmentarraymixin.py deleted file mode 100644 index e43caec626..0000000000 --- a/cf/data/fragment/mixin/fragmentarraymixin.py +++ /dev/null @@ -1,258 +0,0 @@ -from math import prod - -import numpy as np - -from ....units import Units - - -class FragmentArrayMixin: - """Mixin class for a CFA fragment array. - - .. versionadded:: 3.15.0 - - """ - - def _get_array(self, index=None): - """Returns a subspace of the dataset variable. - - .. versionadded:: 1.11.2.0 - - .. seealso:: `__array__`, `index` - - :Parameters: - - {{index: `tuple` or `None`, optional}} - - It is important that there is a distinct value for each - fragment dimension, which is guaranteed when the - default of the `index` attribute is being used. - - :Returns: - - `numpy.ndarray` - The subspace. - - """ - if index is None: - index = self.index() - - try: - array = super()._get_array(index) - except ValueError: - # A ValueError is expected to be raised when the fragment - # variable has fewer than 'self.ndim' dimensions (we know - # that this is the case because 'index' has 'self.ndim' - # elements). - axis = self._size_1_axis(index) - if axis is not None: - # There is a unique size 1 index that must correspond - # to the missing dimension => Remove it from the - # indices, get the fragment array with the new - # indices; and then insert the missing size one - # dimension. - index = list(index) - index.pop(axis) - array = super()._get_array(tuple(index)) - array = np.expand_dims(array, axis) - else: - # There are multiple size 1 indices so we don't know - # how many missing dimensions the fragment has, nor - # their positions => Get the full fragment array and - # then reshape it to the shape of the dask compute - # chunk; and then apply the index. - array = super()._get_array(Ellipsis) - if array.size > prod(self.original_shape): - raise ValueError( - f"Can't get CFA fragment data from ({self}) when " - "the fragment has two or more missing size 1 " - "dimensions, whilst also spanning two or more " - "Dask compute chunks." - "\n\n" - "Consider re-creating the data with exactly one " - "Dask compute chunk per fragment (e.g. by setting " - "'chunks=None' as a keyword to cf.read)." - ) - - array = array.reshape(self.original_shape) - array = array[index] - - array = self._conform_to_aggregated_units(array) - return array - - def _conform_to_aggregated_units(self, array): - """Conform the array to have the aggregated units. - - .. versionadded:: 3.15.0 - - :Parameters: - - array: `numpy.ndarray` or `dict` - The array to be conformed. If *array* is a `dict` with - `numpy` array values then selected values are - conformed. - - :Returns: - - `numpy.ndarray` or `dict` - The conformed array. The returned array may or may not - be the input array updated in-place, depending on its - data type and the nature of its units and the - aggregated units. - - If *array* is a `dict` then a dictionary of conformed - arrays is returned. - - """ - units = self.Units - if units: - aggregated_units = self.aggregated_Units - if not units.equivalent(aggregated_units): - raise ValueError( - f"Can't convert fragment data with units {units!r} to " - f"have aggregated units {aggregated_units!r}" - ) - - if units != aggregated_units: - if isinstance(array, dict): - # 'array' is a dictionary. - raise ValueError( - "TODOACTIVE. Placeholder notification that " - "we can't yet deal with active " - "storage reductions on CFA fragments." - ) - else: - # 'array' is a numpy array - array = Units.conform( - array, units, aggregated_units, inplace=True - ) - - return array - - def _size_1_axis(self, indices): - """Find the position of a unique size 1 index. - - .. versionadded:: 3.15.0 - - .. seealso:: `_parse_indices`, `__getitem__` - - :Paramealso:: `_parse_indices`, `__getitem__` - - :Parameters: - - indices: sequence of index - The array indices to be parsed, as returned by - `_parse_indices`. - - :Returns: - - `int` or `None` - The position of the unique size 1 index, or `None` if - there are zero or at least two of them. - - **Examples** - - >>> a._size_1_axis(([2, 4, 5], slice(0, 1), slice(0, 73))) - 1 - >>> a._size_1_axis(([2, 4, 5], slice(3, 4), slice(0, 73))) - 1 - >>> a._size_1_axis(([2, 4, 5], [0], slice(0, 73))) - 1 - >>> a._size_1_axis(([2, 4, 5], slice(0, 144), slice(0, 73))) - None - >>> a._size_1_axis(([2, 4, 5], slice(3, 7), [0, 1])) - None - >>> a._size_1_axis(([2, 4, 5], slice(0, 1), [0])) - None - - """ - original_shape = self.original_shape - if original_shape.count(1): - return original_shape.index(1) - - return - - @property - def aggregated_Units(self): - """The units of the aggregated data. - - .. versionadded:: 3.15.0 - - :Returns: - - `Units` - The units of the aggregated data. - - """ - return Units( - self.get_aggregated_units(), self.get_aggregated_calendar(None) - ) - - def get_aggregated_calendar(self, default=ValueError()): - """The calendar of the aggregated array. - - If the calendar is `None` then the CF default calendar is - assumed, if applicable. - - .. versionadded:: 3.15.0 - - :Parameters: - - default: optional - Return the value of the *default* parameter if the - aggregated calendar has not been set. If set to an - `Exception` instance then it will be raised instead. - - :Returns: - - `str` or `None` - The calendar value. - - """ - calendar = self._get_component("aggregated_calendar", False) - if calendar is False: - if default is None: - return - - return self._default( - default, - f"{self.__class__.__name__} 'aggregated_calendar' has not " - "been set", - ) - - return calendar - - def get_aggregated_units(self, default=ValueError()): - """The units of the aggregated array. - - If the units are `None` then the aggregated array has no - defined units. - - .. versionadded:: 3.15.0 - - .. seealso:: `get_aggregated_calendar` - - :Parameters: - - default: optional - Return the value of the *default* parameter if the - aggregated units have not been set. If set to an - `Exception` instance then it will be raised instead. - - :Returns: - - `str` or `None` - The units value. - - """ - units = self._get_component("aggregated_units", False) - if units is False: - if default is None: - return - - return self._default( - default, - f"{self.__class__.__name__} 'aggregated_units' have not " - "been set", - ) - - return units diff --git a/cf/data/fragment/netcdf4fragmentarray.py b/cf/data/fragment/netcdf4fragmentarray.py deleted file mode 100644 index f93a13dc18..0000000000 --- a/cf/data/fragment/netcdf4fragmentarray.py +++ /dev/null @@ -1,108 +0,0 @@ -from ..array.netcdf4array import NetCDF4Array -from .mixin import FragmentArrayMixin - - -class NetCDF4FragmentArray(FragmentArrayMixin, NetCDF4Array): - """A netCDF fragment array accessed with `netCDF4`. - - .. versionadded:: 1.11.2.0 - - """ - - def __init__( - self, - filename=None, - address=None, - dtype=None, - shape=None, - aggregated_units=False, - aggregated_calendar=False, - attributes=None, - storage_options=None, - source=None, - copy=True, - ): - """**Initialisation** - - :Parameters: - - filename: (sequence of `str`), optional - The names of the netCDF fragment files containing the - array. - - address: (sequence of `str`), optional - The name of the netCDF variable containing the - fragment array. Required unless *varid* is set. - - dtype: `numpy.dtype`, optional - The data type of the aggregated array. May be `None` - if the numpy data-type is not known (which can be the - case for netCDF string types, for example). This may - differ from the data type of the netCDF fragment - variable. - - shape: `tuple`, optional - The shape of the fragment within the aggregated - array. This may differ from the shape of the netCDF - fragment variable in that the latter may have fewer - size 1 dimensions. - - units: `str` or `None`, optional - The units of the fragment data. Set to `None` to - indicate that there are no units. If unset then the - units will be set during the first `__getitem__` call. - - calendar: `str` or `None`, optional - The calendar of the fragment data. Set to `None` to - indicate the CF default calendar, if applicable. If - unset then the calendar will be set during the first - `__getitem__` call. - - {{init attributes: `dict` or `None`, optional}} - - If *attributes* is `None`, the default, then the - attributes will be set from the netCDF variable during - the first `__getitem__` call. - - {{aggregated_units: `str` or `None`, optional}} - - {{aggregated_calendar: `str` or `None`, optional}} - - {{init storage_options: `dict` or `None`, optional}} - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - """ - super().__init__( - filename=filename, - address=address, - dtype=dtype, - shape=shape, - mask=True, - attributes=attributes, - storage_options=storage_options, - source=source, - copy=copy, - ) - - if source is not None: - try: - aggregated_units = source._get_component( - "aggregated_units", False - ) - except AttributeError: - aggregated_units = False - - try: - aggregated_calendar = source._get_component( - "aggregated_calendar", False - ) - except AttributeError: - aggregated_calendar = False - - self._set_component("aggregated_units", aggregated_units, copy=False) - self._set_component( - "aggregated_calendar", aggregated_calendar, copy=False - ) diff --git a/cf/data/fragment/netcdffragmentarray.py b/cf/data/fragment/netcdffragmentarray.py deleted file mode 100644 index cd24b07c9d..0000000000 --- a/cf/data/fragment/netcdffragmentarray.py +++ /dev/null @@ -1,239 +0,0 @@ -import cfdm - -from ..array.abstract import Array -from ..array.mixin import FileArrayMixin -from .h5netcdffragmentarray import H5netcdfFragmentArray -from .mixin import FragmentArrayMixin -from .netcdf4fragmentarray import NetCDF4FragmentArray - - -class NetCDFFragmentArray( - FragmentArrayMixin, - cfdm.data.mixin.NetCDFFileMixin, - FileArrayMixin, - cfdm.data.mixin.IndexMixin, - cfdm.data.mixin.FileArrayMixin, - Array, -): - """A netCDF fragment array. - - Access will be with either `netCDF4` or `h5netcdf`. - - .. versionadded:: 3.15.0 - - """ - - def __init__( - self, - filename=None, - address=None, - dtype=None, - shape=None, - aggregated_units=False, - aggregated_calendar=False, - attributes=None, - storage_options=None, - source=None, - copy=True, - ): - """**Initialisation** - - :Parameters: - - filename: (sequence of `str`), optional - The names of the netCDF fragment files containing the - array. - - address: (sequence of `str`), optional - The name of the netCDF variable containing the - fragment array. Required unless *varid* is set. - - dtype: `numpy.dtype`, optional - The data type of the aggregated array. May be `None` - if the numpy data-type is not known (which can be the - case for netCDF string types, for example). This may - differ from the data type of the netCDF fragment - variable. - - shape: `tuple`, optional - The shape of the fragment within the aggregated - array. This may differ from the shape of the netCDF - fragment variable in that the latter may have fewer - size 1 dimensions. - - {{init attributes: `dict` or `None`, optional}} - - If *attributes* is `None`, the default, then the - attributes will be set from the netCDF variable during - the first `__getitem__` call. - - .. versionadded:: 1.11.2.0 - - {{aggregated_units: `str` or `None`, optional}} - - {{aggregated_calendar: `str` or `None`, optional}} - - {{init storage_options: `dict` or `None`, optional}} - - .. versionadded:: 1.11.2.0 - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - units: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - calendar: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - """ - super().__init__( - source=source, - copy=copy, - ) - - if source is not None: - try: - shape = source._get_component("shape", None) - except AttributeError: - shape = None - - try: - filename = source._get_component("filename", None) - except AttributeError: - filename = None - - try: - address = source._get_component("address", None) - except AttributeError: - address = None - - try: - dtype = source._get_component("dtype", None) - except AttributeError: - dtype = None - - try: - attributes = source._get_component("attributes", None) - except AttributeError: - attributes = None - - try: - aggregated_units = source._get_component( - "aggregated_units", False - ) - except AttributeError: - aggregated_units = False - - try: - aggregated_calendar = source._get_component( - "aggregated_calendar", False - ) - except AttributeError: - aggregated_calendar = False - - try: - storage_options = source._get_component( - "storage_options", None - ) - except AttributeError: - storage_options = None - - if filename is not None: - if isinstance(filename, str): - filename = (filename,) - else: - filename = tuple(filename) - - self._set_component("filename", filename, copy=False) - - if address is not None: - if isinstance(address, int): - address = (address,) - else: - address = tuple(address) - - self._set_component("address", address, copy=False) - - if storage_options is not None: - self._set_component("storage_options", storage_options, copy=False) - - self._set_component("shape", shape, copy=False) - self._set_component("dtype", dtype, copy=False) - self._set_component("attributes", attributes, copy=False) - self._set_component("mask", True, copy=False) - - self._set_component("aggregated_units", aggregated_units, copy=False) - self._set_component( - "aggregated_calendar", aggregated_calendar, copy=False - ) - - # By default, close the file after data array access - self._set_component("close", True, copy=False) - - def _get_array(self, index=None): - """Returns a subspace of the dataset variable. - - The method acts as a factory for either a - `NetCDF4FragmentArray` or a `H5netcdfFragmentArray` class, and - it is the result of calling `!_get_array` on the newly created - instance that is returned. - - `H5netcdfFragmentArray` will only be used if - `NetCDF4FragmentArray` returns a `FileNotFoundError` exception. - - .. versionadded:: 1.11.2.0 - - .. seealso:: `__array__`, `index` - - :Parameters: - - {{index: `tuple` or `None`, optional}} - - It is important that there is a distinct value for each - fragment dimension, which is guaranteed when the - default of the `index` attribute is being used. - - :Returns: - - `numpy.ndarray` - The subspace. - - """ - kwargs = { - "dtype": self.dtype, - "shape": self.shape, - "aggregated_units": self.get_aggregated_units(None), - "aggregated_calendar": self.get_aggregated_calendar(None), - "attributes": self.get_attributes(None), - "copy": False, - } - - # Loop round the files, returning as soon as we find one that - # is accessible. - filenames = self.get_filenames() - for filename, address in zip(filenames, self.get_addresses()): - kwargs["filename"] = filename - kwargs["address"] = address - kwargs["storage_options"] = self.get_storage_options( - create_endpoint_url=False - ) - - try: - return NetCDF4FragmentArray(**kwargs)._get_array(index) - except FileNotFoundError: - pass - except Exception: - return H5netcdfFragmentArray(**kwargs)._get_array(index) - - # Still here? - if not filenames: - raise FileNotFoundError("No fragment files") - - if len(filenames) == 1: - raise FileNotFoundError(f"No such fragment file: {filenames[0]}") - - raise FileNotFoundError(f"No such fragment files: {filenames}") diff --git a/cf/data/fragment/umfragmentarray.py b/cf/data/fragment/umfragmentarray.py deleted file mode 100644 index 7eed4fd0a4..0000000000 --- a/cf/data/fragment/umfragmentarray.py +++ /dev/null @@ -1,105 +0,0 @@ -from ..array.umarray import UMArray -from .mixin import FragmentArrayMixin - - -class UMFragmentArray(FragmentArrayMixin, UMArray): - """A CFA fragment array stored in a UM or PP file. - - .. versionadded:: 3.14.0 - - """ - - def __init__( - self, - filename=None, - address=None, - dtype=None, - shape=None, - aggregated_units=False, - aggregated_calendar=False, - attributes=None, - storage_options=None, - source=None, - copy=True, - ): - """**Initialisation** - - :Parameters: - - filename: (sequence of `str`), optional - The names of the UM or PP files containing the fragment. - - address: (sequence of `str`), optional - The start words in the files of the header. - - dtype: `numpy.dtype` - The data type of the aggregated array. May be `None` - if the numpy data-type is not known (which can be the - case for netCDF string types, for example). This may - differ from the data type of the netCDF fragment - variable. - - shape: `tuple` - The shape of the fragment within the aggregated - array. This may differ from the shape of the netCDF - fragment variable in that the latter may have fewer - size 1 dimensions. - - {{init attributes: `dict` or `None`, optional}} - - During the first `__getitem__` call, any of the - ``_FillValue``, ``add_offset``, ``scale_factor``, - ``units``, and ``calendar`` attributes which haven't - already been set will be inferred from the lookup - header and cached for future use. - - .. versionadded:: 1.11.2.0 - - {{aggregated_units: `str` or `None`, optional}} - - {{aggregated_calendar: `str` or `None`, optional}} - - {{init storage_options: `dict` or `None`, optional}} - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - units: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - calendar: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - """ - super().__init__( - filename=filename, - address=address, - dtype=dtype, - shape=shape, - attributes=attributes, - source=source, - copy=False, - ) - - if source is not None: - try: - aggregated_units = source._get_component( - "aggregated_units", False - ) - except AttributeError: - aggregated_units = False - - try: - aggregated_calendar = source._get_component( - "aggregated_calendar", False - ) - except AttributeError: - aggregated_calendar = False - - self._set_component("aggregated_units", aggregated_units, copy=False) - self._set_component( - "aggregated_calendar", aggregated_calendar, copy=False - ) diff --git a/cf/docstring/docstring.py b/cf/docstring/docstring.py index f3a12ff306..9006299d75 100644 --- a/cf/docstring/docstring.py +++ b/cf/docstring/docstring.py @@ -470,38 +470,6 @@ # bounds "{{bounds: `bool`, optional}}": """bounds: `bool`, optional If True (the default) then alter any bounds.""", - # cull - "{{cull_graph: `bool`, optional}}": """cull_graph: `bool`, optional - If True then unnecessary tasks are removed (culled) - from each array's dask graph before - concatenation. This process can have a considerable - overhead but can sometimes improve the overall - performance of a workflow. If False (the default) then - dask graphs are not culled. See - `dask.optimization.cull` for details.""", - # relaxed_units - "{{relaxed_units: `bool`, optional}}": """relaxed_units: `bool`, optional - If True then allow the concatenation of data with - invalid but otherwise equal units. By default, if any - data array has invalid units then the concatenation - will fail. A `Units` object is considered to be - invalid if its `!isvalid` attribute is `False`.""", - # cfa substitutions - "{{cfa substitutions: `dict`}}": """substitutions: `dict` - The substitution definitions in a dictionary whose - key/value pairs are the file name parts to be - substituted and their corresponding substitution text. - - Each substitution definition may be specified with or - without the ``${...}`` syntax. For instance, the - following are equivalent: ``{'base': 'sub'}``, - ``{'${base}': 'sub'}``.""", - # cfa base - "{{cfa base: `str`}}": """base: `str` - The substitution definition to be removed. May be - specified with or without the ``${...}`` syntax. For - instance, the following are equivalent: ``'base'`` and - ``'${base}'``.""", # regular args "{{regular args}}": """A sequence of three numeric values. The first two values in the sequence represent the coordinate range (see the bounds @@ -631,21 +599,6 @@ coordinates check will be carried out, however, if the *check_coordinates* parameter is True.""", - # Returns cfa_file_substitutions - "{{Returns cfa_file_substitutions}}": """The CFA-netCDF file name substitutions in a dictionary - whose key/value pairs are the file name parts to be - substituted and their corresponding substitution - text.""", - # Returns cfa_clear_file_substitutions - "{{Returns cfa_clear_file_substitutions}}": """The removed CFA-netCDF file name substitutions in a - dictionary whose key/value pairs are the file name - parts to be substituted and their corresponding - substitution text.""", - # Returns cfa_clear_file_substitutions - "{{Returns cfa_del_file_substitution}}": """ - The removed CFA-netCDF file name substitution. If the - substitution was not defined then an empty dictionary - is returned.""", # subspace valid modes Field "{{subspace valid modes Field}}": """Valid modes are: diff --git a/cf/domain.py b/cf/domain.py index 8889fdf97a..5efa6fc541 100644 --- a/cf/domain.py +++ b/cf/domain.py @@ -162,104 +162,6 @@ def add_file_location( return location - def cfa_clear_file_substitutions( - self, - ): - """Remove all of the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Returns: - - `dict` - {{Returns cfa_clear_file_substitutions}} - - **Examples** - - >>> d.cfa_clear_file_substitutions() - {} - - """ - out = {} - for c in self.constructs.filter_by_data(todict=True).values(): - out.update(c.cfa_clear_file_substitutions()) - - return out - - def cfa_file_substitutions(self): - """Return the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Returns: - - `dict` - {{Returns cfa_file_substitutions}} - - **Examples** - - >>> d.cfa_file_substitutions() - {} - - """ - out = {} - for c in self.constructs.filter_by_data(todict=True).values(): - out.update(c.cfa_file_substitutions()) - - return out - - def cfa_del_file_substitution( - self, - base, - ): - """Remove a CFA-netCDF file name substitution. - - .. versionadded:: 3.15.0 - - :Parameters: - - base: `str` - {{cfa base: `str`}} - - :Returns: - - `dict` - {{Returns cfa_del_file_substitution}} - - **Examples** - - >>> f.cfa_del_file_substitution('base') - - """ - for c in self.constructs.filter_by_data(todict=True).values(): - c.cfa_del_file_substitution( - base, - ) - - def cfa_update_file_substitutions( - self, - substitutions, - ): - """Set CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Parameters: - - {{cfa substitutions: `dict`}} - - :Returns: - - `None` - - **Examples** - - >>> d.cfa_update_file_substitutions({'base': '/data/model'}) - - """ - for c in self.constructs.filter_by_data(todict=True).values(): - c.cfa_update_file_substitutions(substitutions) - def close(self): """Close all files referenced by the domain construct. diff --git a/cf/field.py b/cf/field.py index 0d328d312a..879688714f 100644 --- a/cf/field.py +++ b/cf/field.py @@ -2,7 +2,6 @@ from dataclasses import dataclass from functools import reduce from operator import mul as operator_mul -from os import sep import cfdm import numpy as np @@ -49,7 +48,6 @@ _DEPRECATION_ERROR_METHOD, DeprecationError, _section, - abspath, flat, parse_indices, ) @@ -2604,164 +2602,6 @@ def cell_area( return w - def cfa_clear_file_substitutions( - self, - ): - """Remove all of the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Returns: - - `dict` - {{Returns cfa_clear_file_substitutions}} - - **Examples** - - >>> f.cfa_clear_file_substitutions() - {} - - """ - out = super().cfa_clear_file_substitution() - - for c in self.constructs.filter_by_data(todict=True).values(): - out.update(c.cfa_clear_file_substitutions()) - - return out - - def cfa_del_file_substitution( - self, - base, - constructs=True, - ): - """Remove a CFA-netCDF file name substitution. - - .. versionadded:: 3.15.0 - - :Parameters: - - {{cfa base: `str`}} - - constructs: `bool`, optional - If True (the default) then metadata constructs also - have the file substitutions removed from them. - - :Returns: - - `dict` - {{Returns cfa_del_file_substitution}} - - **Examples** - - >>> f.cfa_del_file_substitution('base') - - """ - super().cfa_del_file_substitution(base) - - if constructs: - for c in self.constructs.filter_by_data(todict=True).values(): - c.cfa_del_file_substitution(base) - - def cfa_file_substitutions(self, constructs=True): - """Return the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Returns: - - `dict` - {{Returns cfa_file_substitutions}} - - **Examples** - - >>> f.cfa_file_substitutions() - {} - - """ - out = super().cfa_file_substitutions() - - if constructs: - for c in self.constructs.filter_by_data(todict=True).values(): - out.update(c.cfa_file_substitutions()) - - return out - - def del_file_location( - self, - location, - constructs=True, - ): - """Remove a file location in-place. - - All data definitions that reference files will have references - to files in the given location removed from them. - - .. versionadded:: 3.15.0 - - .. seealso:: `add_file_location`, `file_locations` - - :Parameters: - - location: `str` - The file location to remove. - - constructs: `bool`, optional - If True (the default) then metadata constructs also - have the new file location removed from them. - - :Returns: - - `str` - The removed location as an absolute path with no - trailing path name component separator. - - **Examples** - - >>> d.del_file_location('/data/model/') - '/data/model' - - """ - location = abspath(location).rstrip(sep) - super().del_file_location(location) - - if constructs: - for c in self.constructs.filter_by_data(todict=True).values(): - c.del_file_location(location) - - return location - - def cfa_update_file_substitutions( - self, - substitutions, - constructs=True, - ): - """Set CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Parameters: - - {{cfa substitutions: `dict`}} - - constructs: `bool`, optional - If True (the default) then metadata constructs also - have the file substitutions set on them. - - :Returns: - - `None` - - **Examples** - - >>> f.cfa_update_file_substitutions({'base': '/data/model'}) - - """ - super().cfa_update_file_substitutions(substitutions) - - if constructs: - for c in self.constructs.filter_by_data(todict=True).values(): - c.cfa_update_file_substitutions(substitutions) - def get_domain(self): """Return the domain. @@ -3208,134 +3048,6 @@ def iscyclic(self, *identity, **filter_kwargs): return axis in self.cyclic() - @classmethod - def concatenate( - cls, fields, axis=0, cull_graph=False, relaxed_units=False, copy=True - ): - """Join a sequence of fields together. - - This is different to `cf.aggregate` because it does not account - for all metadata. For example, it assumes that the axis order is - the same in each field. - - .. versionadded:: 1.0 - - .. seealso:: `cf.aggregate`, `Data.concatenate`, - `Data.cull_graph` - - :Parameters: - - fields: (sequence of) `Field` - The fields to concatenate. - - axis: `int`, optional - The axis along which the arrays will be joined. The - default is 0. Note that scalar arrays are treated as - if they were one dimensional. - - {{cull_graph: `bool`, optional}} - - .. versionadded:: 3.14.0 - - {{relaxed_units: `bool`, optional}} - - .. versionadded:: 3.15.1 - - copy: `bool`, optional - If True (the default) then make copies of the - {{class}} constructs, prior to the concatenation, - thereby ensuring that the input constructs are not - changed by the concatenation process. If False then - some or all input constructs might be changed - in-place, but the concatenation process will be - faster. - - .. versionadded:: 3.15.1 - - :Returns: - - `Field` - The field generated from the concatenation of input - fields. - - """ - if isinstance(fields, cls): - return fields.copy() - - field0 = fields[0] - if copy: - out = field0.copy() - - if len(fields) == 1: - return out - - new_data = Data.concatenate( - [f.get_data(_fill_value=False) for f in fields], - axis=axis, - cull_graph=cull_graph, - relaxed_units=relaxed_units, - copy=copy, - ) - - # Change the domain axis size - dim = out.get_data_axes()[axis] - out.set_construct(DomainAxis(size=new_data.shape[axis]), key=dim) - - # Insert the concatenated data - out.set_data(new_data, set_axes=False, copy=False) - - # ------------------------------------------------------------ - # Concatenate constructs with data - # ------------------------------------------------------------ - for key, construct in field0.constructs.filter_by_data( - todict=True - ).items(): - construct_axes = field0.get_data_axes(key) - - if dim not in construct_axes: - # This construct does not span the concatenating axis - # in the first field - continue - - constructs = [construct] - for f in fields[1:]: - c = f.constructs.get(key) - if c is None: - # This field does not have this construct - constructs = None - break - - constructs.append(c) - - if not constructs: - # Not every field has this construct, so remove it - # from the output field. - out.del_construct(key) - continue - - # Still here? Then try concatenating the constructs from - # each field. - try: - construct = construct.concatenate( - constructs, - axis=construct_axes.index(dim), - cull_graph=cull_graph, - relaxed_units=relaxed_units, - copy=copy, - ) - except ValueError: - # Couldn't concatenate this construct, so remove it from - # the output field. - out.del_construct(key) - else: - # Successfully concatenated this construct, so insert - # it into the output field. - out.set_construct( - construct, key=key, axes=construct_axes, copy=False - ) - - return out - def weights( self, weights=True, @@ -8877,91 +8589,6 @@ def _update_cell_methods( f" Modified cell methods = {self.cell_methods()}" ) # pragma: no cover - @_inplace_enabled(default=False) - def insert_dimension( - self, axis, position=0, constructs=False, inplace=False - ): - """Insert a size 1 axis into the data array. - - .. versionadded:: 3.0.0 - - .. seealso:: `domain_axis`, `flatten`, `flip`, `squeeze`, - `transpose`, `unsqueeze` - - :Parameters: - - axis: - Select the domain axis to insert, generally defined by that - which would be selected by passing the given axis description - to a call of the field construct's `domain_axis` method. For - example, for a value of ``'X'``, the domain axis construct - returned by ``f.domain_axis('X')`` is selected. - - If *axis* is `None` then a new domain axis construct will - created for the inserted dimension. - - position: `int`, optional - Specify the position that the new axis will have in the - data array. By default the new axis has position 0, the - slowest varying position. - - constructs: `bool`, optional - If True then also insert the new axis into all - metadata constructs that don't already include it. By - default, metadata constructs are not changed. - - .. versionadded:: 3.16.1 - - {{inplace: `bool`, optional}} - - :Returns: - - `Field` or `None` - The field construct with expanded data, or `None` if the - operation was in-place. - - **Examples** - - >>> f = cf.example_field(0) - >>> print(f) - Field: specific_humidity (ncvar%q) - ---------------------------------- - Data : specific_humidity(latitude(5), longitude(8)) 1 - Cell methods : area: mean - Dimension coords: latitude(5) = [-75.0, ..., 75.0] degrees_north - : longitude(8) = [22.5, ..., 337.5] degrees_east - : time(1) = [2019-01-01 00:00:00] - >>> g = f.insert_dimension('T', 0) - >>> print(g) - Field: specific_humidity (ncvar%q) - ---------------------------------- - Data : specific_humidity(time(1), latitude(5), longitude(8)) 1 - Cell methods : area: mean - Dimension coords: latitude(5) = [-75.0, ..., 75.0] degrees_north - : longitude(8) = [22.5, ..., 337.5] degrees_east - : time(1) = [2019-01-01 00:00:00] - - A previously non-existent size 1 axis must be created prior to - insertion: - - >>> f.insert_dimension(None, 1, inplace=True) - >>> print(f) - Field: specific_humidity (ncvar%q) - ---------------------------------- - Data : specific_humidity(time(1), key%domainaxis3(1), latitude(5), longitude(8)) 1 - Cell methods : area: mean - Dimension coords: latitude(5) = [-75.0, ..., 75.0] degrees_north - : longitude(8) = [22.5, ..., 337.5] degrees_east - : time(1) = [2019-01-01 00:00:00] - - """ - return super().insert_dimension( - axis=axis, - position=position, - constructs=constructs, - inplace=inplace, - ) - def indices(self, *config, **kwargs): """Create indices that define a subspace of the field construct. @@ -10903,41 +10530,6 @@ def cumsum( return f - def file_locations(self, constructs=True): - """The locations of files containing parts of the data. - - Returns the locations of any files that may be required to - deliver the computed data array. - - .. versionadded:: 3.15.0 - - .. seealso:: `add_file_location`, `del_file_location` - - :Parameters: - - constructs: `bool`, optional - If True (the default) then the file locations from - metadata constructs are also returned. - - :Returns: - - `set` - The unique file locations as absolute paths with no - trailing path name component separator. - - **Examples** - - >>> f.file_locations() - {'/home/data1', 'file:///data2'} - - """ - out = super().file_locations() - if constructs: - for c in self.constructs.filter_by_data(todict=True).values(): - out.update(c.file_locations()) - - return out - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) def flip(self, axes=None, inplace=False, i=False, **kwargs): @@ -11198,78 +10790,6 @@ def argmin(self, axis=None, unravel=False): return self.data.argmin(axis=axis, unravel=unravel) - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") - def squeeze(self, axes=None, inplace=False, i=False, **kwargs): - """Remove size 1 axes from the data. - - By default all size 1 axes are removed, but particular size 1 axes - may be selected for removal. - - Squeezed domain axis constructs are not removed from the metadata - constructs, nor from the domain of the field construct. - - .. seealso:: `domain_axis`, `flatten`, `insert_dimension`, `flip`, - `remove_axes`, `transpose`, `unsqueeze` - - :Parameters: - - axes: (sequence of) `str` or `int`, optional - Select the domain axes to squeeze, defined by the domain - axes that would be selected by passing each given axis - description to a call of the field construct's - `domain_axis` method. For example, for a value of ``'X'``, - the domain axis construct returned by - ``f.domain_axis('X')`` is selected. - - If no axes are provided then all size 1 axes are squeezed. - - {{inplace: `bool`, optional}} - - {{i: deprecated at version 3.0.0}} - - kwargs: deprecated at version 3.0.0 - - :Returns: - - `Field` or `None` - The field construct with squeezed data, or `None` if the - operation was in-place. - - **Examples** - - >>> g = f.squeeze() - >>> g = f.squeeze('time') - >>> g = f.squeeze(1) - >>> g = f.squeeze(['time', 1, 'dim2']) - >>> f.squeeze(['dim2'], inplace=True) - - """ - if kwargs: - _DEPRECATION_ERROR_KWARGS( - self, "squeeze", kwargs, version="3.0.0", removed_at="4.0.0" - ) # pragma: no cover - - data_axes = self.get_data_axes() - - if axes is None: - domain_axes = self.domain_axes(todict=True) - axes = [ - axis - for axis in data_axes - if domain_axes[axis].get_size(None) == 1 - ] - else: - if isinstance(axes, (str, int)): - axes = (axes,) - - axes = [self.domain_axis(x, key=True) for x in axes] - axes = set(axes).intersection(data_axes) - - iaxes = [data_axes.index(axis) for axis in axes] - - # Squeeze the field's data array - return super().squeeze(iaxes, inplace=inplace) - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) def swapaxes(self, axis0, axis1, inplace=False, i=False): @@ -11338,167 +10858,6 @@ def swapaxes(self, axis0, axis1, inplace=False, i=False): return f - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") - def transpose( - self, - axes=None, - constructs=False, - inplace=False, - items=True, - i=False, - **kwargs, - ): - """Permute the axes of the data array. - - By default the order of the axes is reversed, but any ordering may - be specified by selecting the axes of the output in the required - order. - - By default metadata constructs are not transposed, but they may be - if the *constructs* parameter is set. - - .. seealso:: `domain_axis`, `flatten`, `insert_dimension`, `flip`, - `squeeze`, `unsqueeze` - - :Parameters: - - axes: (sequence of) `str` or `int`, optional - Select the domain axis order, defined by the domain axes - that would be selected by passing each given axis - description to a call of the field construct's - `domain_axis` method. For example, for a value of ``'X'``, - the domain axis construct returned by - ``f.domain_axis('X')`` is selected. - - Each dimension of the field construct's data must be - provided, or if no axes are specified then the axis order - is reversed. - - constructs: `bool`, optional - If True then metadata constructs are also transposed so - that their axes are in the same relative order as in the - transposed data array of the field. By default metadata - constructs are not altered. - - {{inplace: `bool`, optional}} - - items: deprecated at version 3.0.0 - Use the *constructs* parameter instead. - - {{i: deprecated at version 3.0.0}} - - kwargs: deprecated at version 3.0.0 - - :Returns: - - `Field` or `None` - The field construct with transposed data, or `None` if the - operation was in-place. - - **Examples** - - >>> f.ndim - 3 - >>> g = f.transpose() - >>> g = f.transpose(['time', 1, 'dim2']) - >>> f.transpose(['time', -2, 'dim2'], inplace=True) - - """ - if not items: - _DEPRECATION_ERROR_KWARGS( - self, - "transpose", - {"items": items}, - "Use keyword 'constructs' instead.", - version="3.0.0", - removed_at="4.0.0", - ) # pragma: no cover - - if kwargs: - _DEPRECATION_ERROR_KWARGS( - self, "transpose", kwargs, version="3.0.0", removed_at="4.0.0" - ) # pragma: no cover - - if axes is None: - iaxes = list(range(self.ndim - 1, -1, -1)) - else: - data_axes = self.get_data_axes(default=()) - if isinstance(axes, (str, int)): - axes = (axes,) - - axes2 = [self.domain_axis(x, key=True) for x in axes] - - if sorted(axes2) != sorted(data_axes): - raise ValueError( - f"Can't transpose {self.__class__.__name__}: " - f"Bad axis specification: {axes!r}" - ) - - iaxes = [data_axes.index(axis) for axis in axes2] - - # Transpose the field's data array - return super().transpose(iaxes, constructs=constructs, inplace=inplace) - - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") - @_inplace_enabled(default=False) - def unsqueeze(self, inplace=False, i=False, axes=None, **kwargs): - """Insert size 1 axes into the data array. - - All size 1 domain axes which are not spanned by the field - construct's data are inserted. - - The axes are inserted into the slowest varying data array positions. - - .. seealso:: `flatten`, `flip`, `insert_dimension`, `squeeze`, - `transpose` - - :Parameters: - - {{inplace: `bool`, optional}} - - {{i: deprecated at version 3.0.0}} - - axes: deprecated at version 3.0.0 - - kwargs: deprecated at version 3.0.0 - - :Returns: - - `Field` or `None` - The field construct with size-1 axes inserted in its data, - or `None` if the operation was in-place. - - **Examples** - - >>> g = f.unsqueeze() - >>> f.unsqueeze(['dim2'], inplace=True) - - """ - if kwargs: - _DEPRECATION_ERROR_KWARGS( - self, "unsqueeze", kwargs, version="3.0.0", removed_at="4.0.0" - ) # pragma: no cover - - if axes is not None: - _DEPRECATION_ERROR_KWARGS( - self, - "unsqueeze", - {"axes": axes}, - "All size one domain axes missing from the data are " - "inserted. Use method 'insert_dimension' to insert an " - "individual size one domain axis.", - version="3.0.0", - removed_at="4.0.0", - ) # pragma: no cover - - f = _inplace_enabled_define_and_cleanup(self) - - size_1_axes = self.domain_axes(filter_by_size=(1,), todict=True) - for axis in set(size_1_axes).difference(self.get_data_axes()): - f.insert_dimension(axis, position=0, inplace=True) - - return f - def domain_axis_position(self, *identity, **filter_kwargs): """Return the position in the data of a domain axis construct. @@ -13553,48 +12912,6 @@ def subspace(self): """ return SubspaceField(self) - def add_file_location( - self, - location, - constructs=True, - ): - """Add a new file location in-place. - - All data definitions that reference files are additionally - referenced from the given location. - - .. versionadded:: 3.15.0 - - .. seealso:: `del_file_location`, `file_locations` - - :Parameters: - - location: `str` - The new location. - - constructs: `bool`, optional - If True (the default) then metadata constructs also - have the new file location added to them. - - :Returns: - - `str` - The new location as an absolute path with no trailing - path name component separator. - - **Examples** - - >>> f.add_file_location('/data/model/') - '/data/model' - - """ - location = super().add_file_location(location) - if constructs: - for c in self.constructs.filter_by_data(todict=True).values(): - c.add_file_location(location) - - return location - def section(self, axes=None, stop=None, min_step=1, **kwargs): """Return a FieldList of m dimensional sections of a Field of n dimensions, where M <= N. diff --git a/cf/functions.py b/cf/functions.py index f2e8f3f173..bc86d41bfd 100644 --- a/cf/functions.py +++ b/cf/functions.py @@ -13,7 +13,6 @@ from math import isnan from os import mkdir from os.path import abspath as _os_path_abspath -from os.path import dirname as _os_path_dirname from os.path import expanduser as _os_path_expanduser from os.path import expandvars as _os_path_expandvars from os.path import join as _os_path_join @@ -26,7 +25,7 @@ from dask.base import is_dask_collection from psutil import virtual_memory -from . import __cfa_version__, __file__, __version__ +from . import __file__, __version__ from .constants import ( CONSTANTS, OperandBoundsCombination, @@ -1359,6 +1358,8 @@ def CF(): def CFA(): """The version of the CFA conventions. + Deprecated at version NEXTVERSION and is no longer available. + This indicates which version of the CFA conventions are represented by this release of the cf package, and therefore the version can not be changed. @@ -1379,7 +1380,9 @@ def CFA(): '0.6.2' """ - return __cfa_version__ + _DEPRECATION_ERROR_FUNCTION( + "CFA", version="NEXTVERSION", removed_at="5.0.0" + ) # pragma: no cover # Module-level alias to avoid name clashes with function keyword @@ -2692,48 +2695,11 @@ def flat(x): yield a -def abspath(filename): - """Return a normalized absolute version of a file name. - - If `None` or a string containing URL is provided then it is - returned unchanged. - - .. seealso:: `cf.dirname`, `cf.pathjoin`, `cf.relpath` - - :Parameters: - - filename: `str` or `None` - The name of the file, or `None` - - :Returns: - - `str` - - The normalized absolutised version of *filename*, or - `None`. - - **Examples** - - >>> import os - >>> os.getcwd() - '/data/archive' - >>> cf.abspath('file.nc') - '/data/archive/file.nc' - >>> cf.abspath('..//archive///file.nc') - '/data/archive/file.nc' - >>> cf.abspath('http://data/archive/file.nc') - 'http://data/archive/file.nc' +def abspath(path, uri=None): + return cfdm.abspath(path, uri=uri) - """ - u = urlparse(filename) - scheme = u.scheme - if not scheme: - return _os_path_abspath(filename) - if scheme == "file": - return u.path - - return filename +abspath.__doc__ = cfdm.abspath.__doc__.replace("cfdm.", "cf.") def relpath(filename, start=None): @@ -2780,39 +2746,17 @@ def relpath(filename, start=None): return _os_path_relpath(filename) -def dirname(filename): - """Return the directory name of a file. - - If a string containing URL is provided then everything up to, but - not including, the last slash (/) is returned. - - .. seealso:: `cf.abspath`, `cf.pathjoin`, `cf.relpath` - - :Parameters: - - filename: `str` - The name of the file. - - :Returns: +def dirname(path, normalise=False, uri=None, isdir=False, sep=False): + return cfdm.dirname( + path, normalise=normalise, uri=uri, isdir=isdir, sep=sep + ) - `str` - The directory name. - **Examples** - - >>> cf.dirname('/data/archive/file.nc') - '/data/archive' - >>> cf.dirname('..//file.nc') - '..' - >>> cf.dirname('http://data/archive/file.nc') - 'http://data/archive' - - """ - u = urlparse(filename) - if u.scheme != "": - return filename.rpartition("/")[0] +dirname.__doc__ = cfdm.dirname.__doc__.replace("cfdm.", "cf.") - return _os_path_dirname(filename) +from functools import partial +dirname2 = partial(cfdm.dirname) +dirname2.__doc__ = cfdm.dirname.__doc__.replace("cfdm.", "cf.") def pathjoin(path1, path2): diff --git a/cf/mixin/fielddomain.py b/cf/mixin/fielddomain.py index 4c23d8efeb..6d78ac2027 100644 --- a/cf/mixin/fielddomain.py +++ b/cf/mixin/fielddomain.py @@ -387,6 +387,7 @@ def _indices(self, config, data_axes, ancillary_mask, kwargs): if debug: logger.debug( + f" constructs = {constructs!r}\n" f" item_axes = {item_axes!r}\n" f" keys = {keys!r}" ) # pragma: no cover @@ -405,6 +406,7 @@ def _indices(self, config, data_axes, ancillary_mask, kwargs): if debug: logger.debug( f" {n_items} 1-d constructs: {constructs!r}\n" + f" item = {item!r}\n" f" axis = {axis!r}\n" f" value = {value!r}\n" f" identity = {identity!r}" @@ -530,8 +532,8 @@ def _indices(self, config, data_axes, ancillary_mask, kwargs): index = normalize_index(index, (size,))[0] else: raise ValueError( - "Must specify a domain axis construct or a " - "construct with data for which to create indices" + "Could not find a unique construct with identity " + f"{identity!r} from which to infer the indices." ) if debug: diff --git a/cf/mixin/properties.py b/cf/mixin/properties.py index a6c4e5a13e..dd7d80fa86 100644 --- a/cf/mixin/properties.py +++ b/cf/mixin/properties.py @@ -26,9 +26,8 @@ class Properties(Container): def __new__(cls, *args, **kwargs): """Store component classes. - .. note:: If a child class requires a different component - classes than the ones defined here, then they must be redefined - in the child class. + Child classes should consider redefining these component + classes. """ instance = super().__new__(cls) diff --git a/cf/mixin/propertiesdata.py b/cf/mixin/propertiesdata.py index e0ce8acd8d..d6086d0471 100644 --- a/cf/mixin/propertiesdata.py +++ b/cf/mixin/propertiesdata.py @@ -1,6 +1,5 @@ import logging from itertools import chain -from os import sep import numpy as np from cfdm import is_log_level_info @@ -17,7 +16,6 @@ _DEPRECATION_ERROR_ATTRIBUTE, _DEPRECATION_ERROR_KWARGS, _DEPRECATION_ERROR_METHOD, - abspath, default_netCDF_fillvals, ) from ..functions import equivalent as cf_equivalent @@ -1192,14 +1190,6 @@ def Units(self, value): self._custom["direction"] = None - # units = getattr(value, 'units', None) - # if units is not None: - # self.set_property('units', units) - # - # calendar = getattr(value, 'calendar', None) - # if calendar is not None: - # self.set_property('calendar', calendar) - @Units.deleter def Units(self): raise AttributeError( @@ -1604,14 +1594,6 @@ def units(self): f"{self.__class__.__name__} doesn't have CF property 'units'" ) - # value = getattr(self.Units, "units", None) - # if value is None: - # raise AttributeError( - # f"{self.__class__.__name__} doesn't have CF property 'units'" - # ) - # - # return value - @units.setter def units(self, value): self.Units = Units(value, getattr(self, "calendar", None)) @@ -1626,39 +1608,6 @@ def units(self): self.Units = Units(None, getattr(self, "calendar", None)) - def add_file_location(self, location): - """Add a new file location in-place. - - All data definitions that reference files are additionally - referenced from the given location. - - .. versionadded:: 3.15.0 - - .. seealso:: `del_file_location`, `file_locations` - - :Parameters: - - location: `str` - The new location. - - :Returns: - - `str` - The new location as an absolute path with no trailing - path name component separator. - - **Examples** - - >>> d.add_file_location('/data/model/') - '/data/model' - - """ - data = self.get_data(None, _fill_value=False, _units=False) - if data is not None: - return data.add_file_location(location) - - return abspath(location).rstrip(sep) - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) def mask_invalid(self, inplace=False, i=False): @@ -2012,48 +1961,6 @@ def period(self, *value, **config): return old - @_inplace_enabled(default=False) - def persist(self, inplace=False): - """Persist the underlying dask array into memory. - - This turns an underlying lazy dask array into a equivalent - chunked dask array, but now with the results fully computed. - - `persist` is particularly useful when using distributed - systems, because the results will be kept in distributed - memory, rather than returned to the local process. - - **Performance** - - `persist` causes all delayed operations to be computed. - - .. versionadded:: 3.14.0 - - .. seealso:: `array`, `datetime_array`, - `dask.array.Array.persist` - - :Parameters: - - {{inplace: `bool`, optional}} - - :Returns: - - `{{class}}` or `None` - The construct with persisted data. If the operation - was in-place then `None` is returned. - - **Examples** - - >>> g = f.persist() - - """ - return self._apply_data_oper( - _inplace_enabled_define_and_cleanup(self), - "persist", - inplace=inplace, - delete_props=False, - ) - def range(self): """The absolute difference between the maximum and minimum of the data array. @@ -2577,100 +2484,6 @@ def ceil(self, inplace=False, i=False): delete_props=True, ) - def cfa_update_file_substitutions(self, substitutions): - """Set CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Parameters: - - {{cfa substitutions: `dict`}} - - :Returns: - - `None` - - **Examples** - - >>> f.cfa_update_file_substitutions({'base', '/data/model'}) - - """ - data = self.get_data(None, _fill_value=False, _units=False) - if data is not None: - data.cfa_update_file_substitutions(substitutions) - - @_inplace_enabled(default=False) - def cfa_clear_file_substitutions(self, inplace=False): - """Remove all of the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Parameters: - - {{inplace: `bool`, optional}} - - :Returns: - - `dict` - {{Returns cfa_clear_file_substitutions}} - - **Examples** - - >>> f.cfa_clear_file_substitutions() - {} - - """ - data = self.get_data(None) - if data is None: - return {} - - return data.cfa_clear_file_substitutions({}) - - def cfa_del_file_substitution( - self, - base, - ): - """Remove a CFA-netCDF file name substitution. - - .. versionadded:: 3.15.0 - - :Parameters: - - `dict` - {{Returns cfa_del_file_substitution}} - - **Examples** - - >>> f.cfa_del_file_substitution('base') - - """ - data = self.get_data(None, _fill_value=False, _units=False) - if data is not None: - data.cfa_del_file_substitution(base) - - def cfa_file_substitutions( - self, - ): - """Return the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Returns: - - `dict` - {{Returns cfa_file_substitutions}} - - **Examples** - - >>> g = f.cfa_file_substitutions() - - """ - data = self.get_data(None) - if data is None: - return {} - - return data.cfa_file_substitutions({}) - def chunk(self, chunksize=None): """Partition the data array. @@ -2773,67 +2586,6 @@ def close(self): removed_at="5.0.0", ) # pragma: no cover - @classmethod - def concatenate( - cls, - variables, - axis=0, - cull_graph=False, - relaxed_units=False, - copy=True, - ): - """Join a sequence of variables together. - - .. seealso:: `Data.cull_graph` - - :Parameters: - - variables: sequence of constructs. - - axis: `int`, optional - - {{cull_graph: `bool`, optional}} - - .. versionadded:: 3.14.0 - - {{relaxed_units: `bool`, optional}} - - .. versionadded:: 3.15.1 - - copy: `bool`, optional - If True (the default) then make copies of the - {{class}} constructs, prior to the concatenation, - thereby ensuring that the input constructs are not - changed by the concatenation process. If False then - some or all input constructs might be changed - in-place, but the concatenation process will be - faster. - - .. versionadded:: 3.15.1 - - :Returns: - - TODO - - """ - out = variables[0] - if copy: - out = out.copy() - - if len(variables) == 1: - return out - - data = Data.concatenate( - [v.get_data(_fill_value=False) for v in variables], - axis=axis, - cull_graph=cull_graph, - relaxed_units=relaxed_units, - copy=copy, - ) - out.set_data(data, copy=False) - - return out - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) def cos(self, inplace=False, i=False): @@ -3066,39 +2818,6 @@ def datum(self, *index): return data.datum(*index) - def del_file_location(self, location): - """Remove a file location in-place. - - All data definitions that reference files will have references - to files in the given location removed from them. - - .. versionadded:: 3.15.0 - - .. seealso:: `add_file_location`, `file_locations` - - :Parameters: - - location: `str` - The file location to remove. - - :Returns: - - `str` - The removed location as an absolute path with no - trailing path name component separator. - - **Examples** - - >>> f.del_file_location('/data/model/') - '/data/model' - - """ - data = self.get_data(None, _fill_value=False, _units=False) - if data is not None: - return data.del_file_location(location) - - return abspath(location).rstrip(sep) - @_manage_log_level_via_verbosity def equals( self, @@ -3428,34 +3147,6 @@ def convert_reference_time( calendar_years=calendar_years, ) - def file_locations(self): - """The locations of files containing parts of the data. - - Returns the locations of any files that may be required to - deliver the computed data array. - - .. versionadded:: 3.15.0 - - .. seealso:: `add_file_location`, `del_file_location` - - :Returns: - - `set` - The unique file locations as absolute paths with no - trailing path name component separator. - - **Examples** - - >>> d.file_locations() - {'/home/data1', 'file:///data2'} - - """ - data = self.get_data(None, _fill_value=False, _units=False) - if data is not None: - return data.file_locations() - - return set() - @_inplace_enabled(default=False) def filled(self, fill_value=None, inplace=False): """Replace masked elements with a fill value. @@ -3938,7 +3629,7 @@ def flip(self, axes=None, inplace=False, i=False): """Flip (reverse the direction of) data dimensions. .. seealso:: `flatten`, `insert_dimension`, `squeeze`, - `transpose`, `unsqueeze` + `transpose` :Parameters: diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index 6066133561..21e8b9f803 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -1150,45 +1150,6 @@ def dtype(self): if data is not None: del data.dtype - def add_file_location(self, location): - """Add a new file location in-place. - - All data definitions that reference files are additionally - referenced from the given location. - - .. versionadded:: 3.15.0 - - .. seealso:: `del_file_location`, `file_locations` - - :Parameters: - - location: `str` - The new location. - - :Returns: - - `str` - The new location as an absolute path with no trailing - path name component separator. - - **Examples** - - >>> d.add_file_location('/data/model/') - '/data/model' - - """ - location = super().add_file_location(location) - - bounds = self.get_bounds(None) - if bounds is not None: - bounds.add_file_location(location) - - interior_ring = self.get_interior_ring(None) - if interior_ring is not None: - interior_ring.add_file_location(location) - - return location - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) def ceil(self, bounds=True, inplace=False, i=False): @@ -1236,121 +1197,6 @@ def ceil(self, bounds=True, inplace=False, i=False): i=i, ) - def cfa_clear_file_substitutions( - self, - ): - """Remove all of the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Returns: - - `dict` - {{Returns cfa_clear_file_substitutions}} - - **Examples** - - >>> f.cfa_clear_file_substitutions() - {} - - """ - out = super().cfa_clear_file_substitutions() - - bounds = self.get_bounds(None) - if bounds is not None: - out.update(bounds.cfa_clear_file_substitutions()) - - interior_ring = self.get_interior_ring(None) - if interior_ring is not None: - out.update(interior_ring.cfa_clear_file_substitutions()) - - return out - - def cfa_del_file_substitution(self, base): - """Remove a CFA-netCDF file name substitution. - - .. versionadded:: 3.15.0 - - :Parameters: - - {{cfa base: `str`}} - - :Returns: - - `dict` - {{Returns cfa_del_file_substitution}} - - **Examples** - - >>> c.cfa_del_file_substitution('base') - - """ - super().cfa_del_file_substitution(base) - - bounds = self.get_bounds(None) - if bounds is not None: - bounds.cfa_del_file_substitution(base) - - interior_ring = self.get_interior_ring(None) - if interior_ring is not None: - interior_ring.cfa_del_file_substitution(base) - - def cfa_file_substitutions(self): - """Return the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Returns: - - `dict` - {{Returns cfa_file_substitutions}} - - **Examples** - - >>> c.cfa_file_substitutions() - {} - - """ - out = super().cfa_file_substitutions() - - bounds = self.get_bounds(None) - if bounds is not None: - out.update(bounds.cfa_file_substitutions({})) - - interior_ring = self.get_interior_ring(None) - if interior_ring is not None: - out.update(interior_ring.cfa_file_substitutions({})) - - return out - - def cfa_update_file_substitutions(self, substitutions): - """Set CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Parameters: - - {{cfa substitutions: `dict`}} - - :Returns: - - `None` - - **Examples** - - >>> c.cfa_add_file_substitutions({'base', '/data/model'}) - - """ - super().cfa_update_file_substitutions(substitutions) - - bounds = self.get_bounds(None) - if bounds is not None: - bounds.cfa_update_file_substitutions(substitutions) - - interior_ring = self.get_interior_ring(None) - if interior_ring is not None: - interior_ring.cfa_update_file_substitutions(substitutions) - def chunk(self, chunksize=None): """Partition the data array. @@ -1467,87 +1313,6 @@ def close(self): removed_at="5.0.0", ) # pragma: no cover - @classmethod - def concatenate( - cls, - variables, - axis=0, - cull_graph=False, - relaxed_units=False, - copy=True, - ): - """Join a sequence of variables together. - - .. seealso:: `Data.cull_graph` - - :Parameters: - - variables: sequence of constructs - - axis: `int`, optional - - {{cull_graph: `bool`, optional}} - - .. versionadded:: 3.14.0 - - {{relaxed_units: `bool`, optional}} - - .. versionadded:: 3.15.1 - - copy: `bool`, optional - If True (the default) then make copies of the - {{class}} objects, prior to the concatenation, thereby - ensuring that the input constructs are not changed by - the concatenation process. If False then some or all - input constructs might be changed in-place, but the - concatenation process will be faster. - - .. versionadded:: 3.15.1 - - :Returns: - - TODO - - """ - variable0 = variables[0] - if copy: - variable0 = variable0.copy() - - if len(variables) == 1: - return variable0 - - out = super().concatenate( - variables, - axis=axis, - cull_graph=cull_graph, - relaxed_units=relaxed_units, - copy=copy, - ) - - bounds = variable0.get_bounds(None) - if bounds is not None: - bounds = bounds.concatenate( - [v.get_bounds() for v in variables], - axis=axis, - cull_graph=cull_graph, - relaxed_units=relaxed_units, - copy=copy, - ) - out.set_bounds(bounds, copy=False) - - interior_ring = variable0.get_interior_ring(None) - if interior_ring is not None: - interior_ring = interior_ring.concatenate( - [v.get_interior_ring() for v in variables], - axis=axis, - cull_graph=cull_graph, - relaxed_units=relaxed_units, - copy=copy, - ) - out.set_interior_ring(interior_ring, copy=False) - - return out - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) def cos(self, bounds=True, inplace=False, i=False): @@ -2044,40 +1809,6 @@ def get_property(self, prop, default=ValueError(), bounds=False): return super().get_property(prop, default) - def file_locations(self): - """The locations of files containing parts of the data. - - Returns the locations of any files that may be required to - deliver the computed data array. - - .. versionadded:: 3.15.0 - - .. seealso:: `add_file_location`, `del_file_location` - - :Returns: - - `set` - The unique file locations as absolute paths with no - trailing path name component separator. - - **Examples** - - >>> d.file_locations() - {'/home/data1', 'file:///data2'} - - """ - out = super().file_locations() - - bounds = self.get_bounds(None) - if bounds is not None: - out.update(bounds.file_locations()) - - interior_ring = self.get_interior_ring(None) - if interior_ring is not None: - out.update(interior_ring.file_locations()) - - return out - @_inplace_enabled(default=False) def filled(self, fill_value=None, bounds=True, inplace=False): """Replace masked elements with a fill value. @@ -2184,45 +1915,6 @@ def flatten(self, axes=None, inplace=False): return v - def del_file_location(self, location): - """Remove a file location in-place. - - All data definitions that reference files will have references - to files in the given location removed from them. - - .. versionadded:: 3.15.0 - - .. seealso:: `add_file_location`, `file_locations` - - :Parameters: - - location: `str` - The file location to remove. - - :Returns: - - `str` - The removed location as an absolute path with no - trailing path name component separator. - - **Examples** - - >>> c.del_file_location('/data/model/') - '/data/model' - - """ - location = super().del_file_location(location) - - bounds = self.get_bounds(None) - if bounds is not None: - bounds.del_file_location(location) - - interior_ring = self.get_interior_ring(None) - if interior_ring is not None: - interior_ring.del_file_location(location) - - return location - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) def floor(self, bounds=True, inplace=False, i=False): @@ -2735,8 +2427,7 @@ def halo( def flip(self, axes=None, inplace=False, i=False): """Flip (reverse the direction of) data dimensions. - .. seealso:: `insert_dimension`, `squeeze`, `transpose`, - `unsqueeze` + .. seealso:: `insert_dimension`, `squeeze`, `transpose` :Parameters: @@ -3615,62 +3306,6 @@ def log(self, base=None, bounds=True, inplace=False, i=False): i=i, ) - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") - def squeeze(self, axes=None, inplace=False, i=False): - """Remove size one axes from the data array. - - By default all size one axes are removed, but particular size one - axes may be selected for removal. Corresponding axes are also - removed from the bounds data array, if present. - - .. seealso:: `flip`, `insert_dimension`, `transpose` - - :Parameters: - - axes: (sequence of) `int` - The positions of the size one axes to be removed. By - default all size one axes are removed. Each axis is - identified by its original integer position. Negative - integers counting from the last position are allowed. - - *Parameter example:* - ``axes=0`` - - *Parameter example:* - ``axes=-2`` - - *Parameter example:* - ``axes=[2, 0]`` - - {{inplace: `bool`, optional}} - - {{i: deprecated at version 3.0.0}} - - :Returns: - - `{{class}}` or `None` - The new construct with removed data axes. If the operation - was in-place then `None` is returned. - - **Examples** - - >>> f.shape - (1, 73, 1, 96) - >>> f.squeeze().shape - (73, 96) - >>> f.squeeze(0).shape - (73, 1, 96) - >>> g = f.squeeze([-3, 2]) - >>> g.shape - (73, 96) - >>> f.bounds.shape - (1, 73, 1, 96, 4) - >>> g.shape - (73, 96, 4) - - """ - return super().squeeze(axes=axes, inplace=inplace) - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) def trunc(self, bounds=True, inplace=False, i=False): @@ -3719,68 +3354,6 @@ def trunc(self, bounds=True, inplace=False, i=False): i=i, ) - # def identities(self, generator=False): - # """Return all possible identities. - # - # The identities comprise: - # - # * The "standard_name" property. - # * The "id" attribute, preceded by ``'id%'``. - # * The "cf_role" property, preceded by ``'cf_role='``. - # * The "axis" property, preceded by ``'axis='``. - # * The "long_name" property, preceded by ``'long_name='``. - # * All other properties (including "standard_name"), preceded by - # the property name and an ``'='``. - # * The coordinate type (``'X'``, ``'Y'``, ``'Z'`` or ``'T'``). - # * The netCDF variable name, preceded by ``'ncvar%'``. - # - # The identities of the bounds, if present, are included (with the - # exception of the bounds netCDF variable name). - # - # .. versionadded:: 3.0.0 - # - # .. seealso:: `id`, `identity` - # ODO - # :Returns: - # - # `list` - # The identities. - # - # **Examples** - # - # >>> f.properties() - # {'foo': 'bar', - # 'long_name': 'Air Temperature', - # 'standard_name': 'air_temperature'} - # >>> f.nc_get_variable() - # 'tas' - # >>> f.identities() - # ['air_temperature', - # 'long_name=Air Temperature', - # 'foo=bar', - # 'standard_name=air_temperature', - # 'ncvar%tas'] - # - # >>> f.properties() - # {} - # >>> f.bounds.properties() - # {'axis': 'Z', - # 'units': 'm'} - # >>> f.identities() - # ['axis=Z', 'units=m', 'ncvar%z'] - # - # """ - # identities = super().identities() - # - # bounds = self.get_bounds(None) - # if bounds is not None: - # identities.extend( - # [i for i in bounds.identities() if i not in identities] - # ) - # # TODO ncvar AND? - # - # return identities - @_deprecated_kwarg_check( "relaxed_identity", version="3.0.0", removed_at="4.0.0" ) @@ -4001,53 +3574,6 @@ def period(self, *value, **config): return bounds.period(*value, **config) - @_inplace_enabled(default=False) - def persist(self, bounds=True, inplace=False): - """Persist the underlying dask array into memory. - - This turns an underlying lazy dask array into a equivalent - chunked dask array, but now with the results fully computed. - - `persist` is particularly useful when using distributed - systems, because the results will be kept in distributed - memory, rather than returned to the local process. - - **Performance** - - `persist` causes all delayed operations to be computed. - - .. versionadded:: 3.14.0 - - .. seealso:: `array`, `datetime_array`, - `dask.array.Array.persist` - - :Parameters: - - bounds: `bool`, optional - If False then do not persist any bounds data. By - default any bound data are also persisted. - - {{inplace: `bool`, optional}} - - :Returns: - - `{{class}}` or `None` - The construct with persisted data. If the operation - was in-place then `None` is returned. - - **Examples** - - >>> g = f.persist() - - """ - return self._apply_superclass_data_oper( - _inplace_enabled_define_and_cleanup(self), - "persist", - bounds=bounds, - interior_ring=True, - inplace=inplace, - ) - @_inplace_enabled(default=False) def rechunk( self, @@ -4221,7 +3747,8 @@ def round(self, decimals=0, bounds=True, inplace=False, i=False): def roll(self, iaxis, shift, inplace=False, i=False): """Roll the data along an axis. - .. seealso:: `insert_dimension`, `flip`, `squeeze`, `transpose` + .. seealso:: `insert_dimension`, `flip`, `squeeze`, + `transpose` :Parameters: diff --git a/cf/mixin2/__init__.py b/cf/mixin2/__init__.py index 3dc304f232..0aca8bbb13 100644 --- a/cf/mixin2/__init__.py +++ b/cf/mixin2/__init__.py @@ -1,2 +1 @@ -from .cfanetcdf import CFANetCDF from .container import Container diff --git a/cf/mixin2/cfanetcdf.py b/cf/mixin2/cfanetcdf.py deleted file mode 100644 index ad1396ce71..0000000000 --- a/cf/mixin2/cfanetcdf.py +++ /dev/null @@ -1,507 +0,0 @@ -"""This class is not in the cf.mixin package because it needs to be -imported by cf.Data, and some of the other mixin classes in cf.mixin -themsleves import cf.Data, which would lead to a circular import -situation. - -""" - -from re import split - -from cfdm.mixin import NetCDFMixin - - -class CFANetCDF(NetCDFMixin): - """Mixin class for CFA-netCDF. - - .. versionadded:: 3.15.0 - - """ - - def cfa_del_aggregated_data(self): - """Remove the CFA-netCDF aggregation instruction terms. - - The aggregation instructions are stored in the - ``aggregation_data`` attribute of a CFA-netCDF aggregation - variable. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_get_aggregated_data`, - `cfa_has_aggregated_data`, - `cfa_set_aggregated_data` - - :Returns: - - `dict` - The removed CFA-netCDF aggregation instruction terms. - - **Examples** - - >>> f.cfa_set_aggregated_data( - ... {'location': 'cfa_location', - ... 'file': 'cfa_file', - ... 'address': 'cfa_address', - ... 'format': 'cfa_format', - ... 'tracking_id': 'tracking_id'} - ... ) - >>> f.cfa_has_aggregated_data() - True - >>> f.cfa_get_aggregated_data() - {'location': 'cfa_location', - 'file': 'cfa_file', - 'address': 'cfa_address', - 'format': 'c ', - 'tracking_id': 'tracking_id'} - >>> f.cfa_del_aggregated_data() - {'location': 'cfa_location', - 'file': 'cfa_file', - 'address': 'cfa_address', - 'format': 'cfa_format', - 'tracking_id': 'tracking_id'} - >>> f.cfa_has_aggregated_data() - False - >>> f.cfa_del_aggregated_data() - {} - >>> f.cfa_get_aggregated_data() - {} - - """ - return self._nc_del("cfa_aggregated_data", {}).copy() - - def cfa_get_aggregated_data(self): - """Return the CFA-netCDF aggregation instruction terms. - - The aggregation instructions are stored in the - ``aggregation_data`` attribute of a CFA-netCDF aggregation - variable. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_del_aggregated_data`, - `cfa_has_aggregated_data`, - `cfa_set_aggregated_data` - - :Returns: - - `dict` - The aggregation instruction terms and their - corresponding netCDF variable names in a dictionary - whose key/value pairs are the aggregation instruction - terms and their corresponding variable names. - - **Examples** - - >>> f.cfa_set_aggregated_data( - ... {'location': 'cfa_location', - ... 'file': 'cfa_file', - ... 'address': 'cfa_address', - ... 'format': 'cfa_format', - ... 'tracking_id': 'tracking_id'} - ... ) - >>> f.cfa_has_aggregated_data() - True - >>> f.cfa_get_aggregated_data() - {'location': 'cfa_location', - 'file': 'cfa_file', - 'address': 'cfa_address', - 'format': 'cfa_format', - 'tracking_id': 'tracking_id'} - >>> f.cfa_del_aggregated_data() - {'location': 'cfa_location', - 'file': 'cfa_file', - 'address': 'cfa_address', - 'format': 'cfa_format', - 'tracking_id': 'tracking_id'} - >>> f.cfa_has_aggregated_data() - False - >>> f.cfa_del_aggregated_data() - {} - >>> f.cfa_get_aggregated_data() - {} - - """ - out = self._nc_get("cfa_aggregated_data", default=None) - if out is not None: - return out.copy() - - return {} - - def cfa_has_aggregated_data(self): - """Whether any CFA-netCDF aggregation instruction terms have been set. - - The aggregation instructions are stored in the - ``aggregation_data`` attribute of a CFA-netCDF aggregation - variable. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_del_aggregated_data`, - `cfa_get_aggregated_data`, - `cfa_set_aggregated_data` - - :Returns: - - `bool` - `True` if the CFA-netCDF aggregation instruction terms - have been set, otherwise `False`. - - **Examples** - - >>> f.cfa_set_aggregated_data( - ... {'location': 'cfa_location', - ... 'file': 'cfa_file', - ... 'address': 'cfa_address', - ... 'format': 'cfa_format', - ... 'tracking_id': 'tracking_id'} - ... ) - >>> f.cfa_has_aggregated_data() - True - >>> f.cfa_get_aggregated_data() - {'location': 'cfa_location', - 'file': 'cfa_file', - 'address': 'cfa_address', - 'format': 'cfa_format', - 'tracking_id': 'tracking_id'} - >>> f.cfa_del_aggregated_data() - {'location': 'cfa_location', - 'file': 'cfa_file', - 'address': 'cfa_address', - 'format': 'cfa_format', - 'tracking_id': 'tracking_id'} - >>> f.cfa_has_aggregated_data() - False - >>> f.cfa_del_aggregated_data() - {} - >>> f.cfa_get_aggregated_data() - {} - - """ - return self._nc_has("cfa_aggregated_data") - - def cfa_set_aggregated_data(self, value): - """Set the CFA-netCDF aggregation instruction terms. - - The aggregation instructions are stored in the - ``aggregation_data`` attribute of a CFA-netCDF aggregation - variable. - - If there are any ``/`` (slash) characters in the netCDF - variable names then these act as delimiters for a group - hierarchy. By default, or if the name starts with a ``/`` - character and contains no others, the name is assumed to be in - the root group. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_del_aggregated_data`, - `cfa_get_aggregated_data`, - `cfa_has_aggregated_data` - - :Parameters: - - value: `str` or `dict` - The aggregation instruction terms and their - corresponding netCDF variable names. Either a - CFA-netCDF-compliant string value of an - ``aggregated_data`` attribute, or a dictionary whose - key/value pairs are the aggregation instruction terms - and their corresponding variable names. - - :Returns: - - `None` - - **Examples** - - >>> f.cfa_set_aggregated_data( - ... {'location': 'cfa_location', - ... 'file': 'cfa_file', - ... 'address': 'cfa_address', - ... 'format': 'cfa_format', - ... 'tracking_id': 'tracking_id'} - ... ) - >>> f.cfa_has_aggregated_data() - True - >>> f.cfa_get_aggregated_data() - {'location': 'cfa_location', - 'file': 'cfa_file', - 'address': 'cfa_address', - 'format': 'cfa_format', - 'tracking_id': 'tracking_id'} - >>> f.cfa_del_aggregated_data() - {'location': 'cfa_location', - 'file': 'cfa_file', - 'address': 'cfa_address', - 'format': 'cfa_format', - 'tracking_id': 'tracking_id'} - >>> f.cfa_has_aggregated_data() - False - >>> f.cfa_del_aggregated_data() - {} - >>> f.cfa_get_aggregated_data() - {} - - """ - if value: - if isinstance(value, str): - v = split("\s+", value) - value = {term[:-1]: var for term, var in zip(v[::2], v[1::2])} - else: - # 'value' is a dictionary - value = value.copy() - - self._nc_set("cfa_aggregated_data", value) - - def cfa_clear_file_substitutions(self): - """Remove all of the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_del_file_substitution`, - `cfa_file_substitutions`, - `cfa_has_file_substitutions`, - `cfa_update_file_substitutions` - - :Returns: - - `dict` - {{Returns cfa_clear_file_substitutions}} - - **Examples** - - >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) - >>> f.cfa_has_file_substitutions() - True - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/'} - >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/', '${base2}': '/home/data/'} - >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) - >>> f.cfa_file_substitutions() - {'${base}': '/new/location/', '${base2}': '/home/data/'} - >>> f.cfa_del_file_substitution('${base}') - {'${base}': '/new/location/'} - >>> f.cfa_clear_file_substitutions() - {'${base2}': '/home/data/'} - >>> f.cfa_has_file_substitutions() - False - >>> f.cfa_file_substitutions() - {} - >>> f.cfa_clear_file_substitutions() - {} - >>> print(f.cfa_del_file_substitution('base', None)) - None - - """ - return self._nc_del("cfa_file_substitutions", {}).copy() - - def cfa_del_file_substitution(self, base): - """Remove a CFA-netCDF file name substitution. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_clear_file_substitutions`, - `cfa_file_substitutions`, - `cfa_has_file_substitutions`, - `cfa_update_file_substitutions` - - :Parameters: - - {{cfa base: `str`}} - - :Returns: - - `dict` - {{Returns cfa_del_file_substitution}} - - **Examples** - - >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) - >>> f.cfa_has_file_substitutions() - True - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/'} - >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/', '${base2}': '/home/data/'} - >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) - >>> f.cfa_file_substitutions() - {'${base}': '/new/location/', '${base2}': '/home/data/'} - >>> f.cfa_del_file_substitution('${base}') - {'${base}': '/new/location/'} - >>> f.cfa_clear_file_substitutions() - {'${base2}': '/home/data/'} - >>> f.cfa_has_file_substitutions() - False - >>> f.cfa_file_substitutions() - {} - >>> f.cfa_clear_file_substitutions() - {} - >>> print(f.cfa_del_file_substitution('base')) - {} - - """ - if not (base.startswith("${") and base.endswith("}")): - base = f"${{{base}}}" - - subs = self.cfa_file_substitutions() - if base not in subs: - return {} - - out = {base: subs.pop(base)} - if subs: - self._nc_set("cfa_file_substitutions", subs) - else: - self._nc_del("cfa_file_substitutions", None) - - return out - - def cfa_file_substitutions(self): - """Return the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_clear_file_substitutions`, - `cfa_del_file_substitution`, - `cfa_has_file_substitutions`, - `cfa_update_file_substitutions` - :Returns: - - `dict` - The CFA-netCDF file name substitutions. - - **Examples** - - >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) - >>> f.cfa_has_file_substitutions() - True - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/'} - >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/', '${base2}': '/home/data/'} - >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) - >>> f.cfa_file_substitutions() - {'${base}': '/new/location/', '${base2}': '/home/data/'} - >>> f.cfa_del_file_substitution('${base}') - {'${base}': '/new/location/'} - >>> f.cfa_clear_file_substitutions() - {'${base2}': '/home/data/'} - >>> f.cfa_has_file_substitutions() - False - >>> f.cfa_file_substitutions() - {} - >>> f.cfa_clear_file_substitutions() - {} - >>> print(f.cfa_del_file_substitution('base', None)) - None - - """ - out = self._nc_get("cfa_file_substitutions", default=None) - if out is not None: - return out.copy() - - return {} - - def cfa_has_file_substitutions(self): - """Whether any CFA-netCDF file name substitutions have been set. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_clear_file_substitutions`, - `cfa_del_file_substitution`, - `cfa_file_substitutions`, - `cfa_update_file_substitutions` - - :Returns: - - `bool` - `True` if any CFA-netCDF file name substitutions have - been set, otherwise `False`. - - **Examples** - - >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) - >>> f.cfa_has_file_substitutions() - True - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/'} - >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/', '${base2}': '/home/data/'} - >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) - >>> f.cfa_file_substitutions() - {'${base}': '/new/location/', '${base2}': '/home/data/'} - >>> f.cfa_del_file_substitution('${base}') - {'${base}': '/new/location/'} - >>> f.cfa_clear_file_substitutions() - {'${base2}': '/home/data/'} - >>> f.cfa_has_file_substitutions() - False - >>> f.cfa_file_substitutions() - {} - >>> f.cfa_clear_file_substitutions() - {} - >>> print(f.cfa_del_file_substitution('base', None)) - None - - """ - return self._nc_has("cfa_file_substitutions") - - def cfa_update_file_substitutions(self, substitutions): - """Set CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_clear_file_substitutions`, - `cfa_del_file_substitution`, - `cfa_file_substitutions`, - `cfa_has_file_substitutions` - - :Parameters: - - {{cfa substitutions: `dict`}} - - :Returns: - - `None` - - **Examples** - - >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) - >>> f.cfa_has_file_substitutions() - True - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/'} - >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/', '${base2}': '/home/data/'} - >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) - >>> f.cfa_file_substitutions() - {'${base}': '/new/location/', '${base2}': '/home/data/'} - >>> f.cfa_del_file_substitution('${base}') - {'${base}': '/new/location/'} - >>> f.cfa_clear_file_substitutions() - {'${base2}': '/home/data/'} - >>> f.cfa_has_file_substitutions() - False - >>> f.cfa_file_substitutions() - {} - >>> f.cfa_clear_file_substitutions() - {} - >>> print(f.cfa_del_file_substitution('base', None)) - None - - """ - if not substitutions: - return - - substitutions = substitutions.copy() - for base, sub in tuple(substitutions.items()): - if not (base.startswith("${") and base.endswith("}")): - substitutions[f"${{{base}}}"] = substitutions.pop(base) - - subs = self.cfa_file_substitutions() - subs.update(substitutions) - self._nc_set("cfa_file_substitutions", subs) diff --git a/cf/read_write/netcdf/__init__.py b/cf/read_write/netcdf/__init__.py deleted file mode 100644 index 6fcf9698b2..0000000000 --- a/cf/read_write/netcdf/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .netcdfread import NetCDFRead -from .netcdfwrite import NetCDFWrite diff --git a/cf/read_write/netcdf/netcdfread.py b/cf/read_write/netcdf/netcdfread.py deleted file mode 100644 index 4ed7e3462d..0000000000 --- a/cf/read_write/netcdf/netcdfread.py +++ /dev/null @@ -1,674 +0,0 @@ -import cfdm -from packaging.version import Version - - -class NetCDFRead(cfdm.read_write.netcdf.NetCDFRead): - """A container for instantiating Fields from a netCDF dataset. - - .. versionadded:: 3.0.0 - - """ - - def _ncdimensions(self, ncvar, ncdimensions=None, parent_ncvar=None): - """Return a list of the netCDF dimensions corresponding to a - netCDF variable. - - If the variable has been compressed then the *implied - uncompressed* dimensions are returned. - - For a CFA variable, the netCDF dimensions are taken from the - 'aggregated_dimensions' netCDF attribute. - - .. versionadded:: 3.0.0 - - :Parameters: - - ncvar: `str` - The netCDF variable name. - - ncdimensions: sequence of `str`, optional - Use these netCDF dimensions, rather than retrieving them - from the netCDF variable itself. This allows the - dimensions of a domain variable to be parsed. Note that - this only parameter only needs to be used once because the - parsed domain dimensions are automatically stored in - `self.read_var['domain_ncdimensions'][ncvar]`. - - .. versionadded:: 3.11.0 - - parent_ncvar: `str`, optional - TODO - - .. versionadded:: TODO - - :Returns: - - `list` - The netCDF dimension names spanned by the netCDF variable. - - **Examples** - - >>> n._ncdimensions('humidity') - ['time', 'lat', 'lon'] - - For a variable compressed by gathering: - - dimensions: - lat=73; - lon=96; - landpoint=2381; - depth=4; - variables: - int landpoint(landpoint); - landpoint:compress="lat lon"; - float landsoilt(depth,landpoint); - landsoilt:long_name="soil temperature"; - landsoilt:units="K"; - - we would have - - >>> n._ncdimensions('landsoilt') - ['depth', 'lat', 'lon'] - - """ - - if not self._is_cfa_variable(ncvar): - return super()._ncdimensions( - ncvar, ncdimensions=ncdimensions, parent_ncvar=parent_ncvar - ) - - # Still here? Then we have a CFA variable. - ncdimensions = self.read_vars["variable_attributes"][ncvar][ - "aggregated_dimensions" - ].split() - - return list(map(str, ncdimensions)) - - def _get_domain_axes(self, ncvar, allow_external=False, parent_ncvar=None): - """Return the domain axis identifiers that correspond to a - netCDF variable's netCDF dimensions. - - For a CFA variable, the netCDF dimensions are taken from the - 'aggregated_dimensions' netCDF attribute. - - :Parameter: - - ncvar: `str` - The netCDF variable name. - - allow_external: `bool` - If `True` and *ncvar* is an external variable then return an - empty list. - - parent_ncvar: `str`, optional - TODO - - .. versionadded:: TODO - - :Returns: - - `list` - - **Examples** - - >>> r._get_domain_axes('areacello') - ['domainaxis0', 'domainaxis1'] - - >>> r._get_domain_axes('areacello', allow_external=True) - [] - - """ - if not self._is_cfa_variable(ncvar): - return super()._get_domain_axes( - ncvar=ncvar, - allow_external=allow_external, - parent_ncvar=parent_ncvar, - ) - - # ------------------------------------------------------------ - # Still here? Then we have a CFA-netCDF variable. - # ------------------------------------------------------------ - g = self.read_vars - - ncdimensions = g["variable_attributes"][ncvar][ - "aggregated_dimensions" - ].split() - - ncdim_to_axis = g["ncdim_to_axis"] - axes = [ - ncdim_to_axis[ncdim] - for ncdim in ncdimensions - if ncdim in ncdim_to_axis - ] - - return axes - - def _create_data( - self, - ncvar, - construct=None, - unpacked_dtype=False, - uncompress_override=None, - parent_ncvar=None, - coord_ncvar=None, - cfa_term=None, - compression_index=False, - ): - """Create data for a netCDF or CFA-netCDF variable. - - .. versionadded:: 3.0.0 - - :Parameters: - - ncvar: `str` - The name of the netCDF variable that contains the - data. See the *cfa_term* parameter. - - construct: optional - - unpacked_dtype: `False` or `numpy.dtype`, optional - - uncompress_override: `bool`, optional - - coord_ncvar: `str`, optional - - cfa_term: `dict`, optional - The name of a non-standard aggregation instruction - term from which to create the data. If set then - *ncvar* must be the value of the term in the - ``aggregation_data`` attribute. - - .. versionadded:: 3.15.0 - - compression_index: `bool`, optional - True if the data being created are compression - indices. - - .. versionadded:: 3.15.2 - - :Returns: - - `Data` - - """ - if not cfa_term and not self._is_cfa_variable(ncvar): - # Create data for a normal netCDF variable - data = super()._create_data( - ncvar=ncvar, - construct=construct, - unpacked_dtype=unpacked_dtype, - uncompress_override=uncompress_override, - parent_ncvar=parent_ncvar, - coord_ncvar=coord_ncvar, - ) - - # Set the CFA write status to True when there is exactly - # one dask chunk - if data.npartitions == 1: - data._cfa_set_write(True) - - return data - - # ------------------------------------------------------------ - # Still here? Create data for a CFA variable - # ------------------------------------------------------------ - if construct is not None: - # Remove the aggregation attributes from the construct - self.implementation.del_property( - construct, "aggregated_dimensions", None - ) - aggregated_data = self.implementation.del_property( - construct, "aggregated_data", None - ) - else: - aggregated_data = None - - if cfa_term: - term, term_ncvar = tuple(cfa_term.items())[0] - cfa_array, kwargs = self._create_cfanetcdfarray_term( - ncvar, term, term_ncvar - ) - else: - cfa_array, kwargs = self._create_cfanetcdfarray( - ncvar, - unpacked_dtype=unpacked_dtype, - coord_ncvar=coord_ncvar, - ) - - attributes = kwargs["attributes"] - data = self._create_Data( - cfa_array, - ncvar, - units=attributes.get("units"), - calendar=attributes.get("calendar"), - ) - - # Note: We don't cache elements from CFA variables, because - # the data are in fragment files which have not been - # opened and may not not even be openable (such as could - # be the case if a fragment file was on tape storage). - - # Set the CFA write status to True iff each non-aggregated - # axis has exactly one dask storage chunk - if cfa_term: - data._cfa_set_term(True) - else: - cfa_write = True - for n, numblocks in zip( - cfa_array.get_fragment_shape(), data.numblocks - ): - if n == 1 and numblocks > 1: - # Note: 'n == 1' is True for non-aggregated axes - cfa_write = False - break - - data._cfa_set_write(cfa_write) - - # Store the 'aggregated_data' attribute - if aggregated_data: - data.cfa_set_aggregated_data(aggregated_data) - - # Store the file substitutions - data.cfa_update_file_substitutions(kwargs.get("substitutions")) - - return data - - def _is_cfa_variable(self, ncvar): - """Return True if *ncvar* is a CFA aggregated variable. - - .. versionadded:: 3.14.0 - - :Parameters: - - ncvar: `str` - The name of the netCDF variable. - - :Returns: - - `bool` - Whether or not *ncvar* is a CFA variable. - - """ - g = self.read_vars - return ( - g["cfa"] - and ncvar in g["cfa_aggregated_data"] - and ncvar not in g["external_variables"] - ) - - def _customise_read_vars(self): - """Customise the read parameters. - - Take the opportunity to apply CFA updates to - `read_vars['variable_dimensions']` and - `read_vars['do_not_create_field']`. - - .. versionadded:: 3.0.0 - - """ - super()._customise_read_vars() - g = self.read_vars - - if not g["cfa"]: - return - - g["cfa_aggregated_data"] = {} - g["cfa_aggregation_instructions"] = {} - g["cfa_file_substitutions"] = {} - - # ------------------------------------------------------------ - # Still here? Then this is a CFA-netCDF file - # ------------------------------------------------------------ - if g["CFA_version"] < Version("0.6.2"): - raise ValueError( - f"Can't read file {g['filename']} that uses obsolete " - f"CFA conventions version CFA-{g['CFA_version']}. " - "(Note that cf version 3.13.1 can be used to read and " - "write CFA-0.4 files.)" - ) - - # Get the directory of the CFA-netCDF file being read - from os.path import abspath - from pathlib import PurePath - - g["cfa_dir"] = PurePath(abspath(g["filename"])).parent - - # Process the aggregation instruction variables, and the - # aggregated dimensions. - dimensions = g["variable_dimensions"] - attributes = g["variable_attributes"] - - for ncvar, attributes in attributes.items(): - if "aggregated_dimensions" not in attributes: - # This is not an aggregated variable - continue - - # Set the aggregated variable's dimensions as its - # aggregated dimensions - ncdimensions = attributes["aggregated_dimensions"].split() - dimensions[ncvar] = tuple(map(str, ncdimensions)) - - # Do not create fields/domains from aggregation - # instruction variables - parsed_aggregated_data = self._cfa_parse_aggregated_data( - ncvar, attributes.get("aggregated_data") - ) - for term_ncvar in parsed_aggregated_data.values(): - g["do_not_create_field"].add(term_ncvar) - - def _create_cfanetcdfarray( - self, - ncvar, - unpacked_dtype=False, - coord_ncvar=None, - term=None, - ): - """Create a CFA-netCDF variable array. - - .. versionadded:: 3.14.0 - - :Parameters: - - ncvar: `str` - The name of the CFA-netCDF aggregated variable. See - the *term* parameter. - - unpacked_dtype: `False` or `numpy.dtype`, optional - - coord_ncvar: `str`, optional - - term: `str`, optional - The name of a non-standard aggregation instruction - term from which to create the array. If set then - *ncvar* must be the value of the non-standard term in - the ``aggregation_data`` attribute. - - .. versionadded:: 3.15.0 - - :Returns: - - (`CFANetCDFArray`, `dict`) - The new `CFANetCDFArray` instance and dictionary of - the kwargs used to create it. - - """ - g = self.read_vars - - # Get the kwargs needed to instantiate a general netCDF array - # instance - kwargs = self._create_netcdfarray( - ncvar, - unpacked_dtype=unpacked_dtype, - coord_ncvar=coord_ncvar, - return_kwargs_only=True, - ) - - # Get rid of the incorrect shape. This will end up getting set - # correctly by the CFANetCDFArray instance. - kwargs.pop("shape", None) - aggregated_data = g["cfa_aggregated_data"][ncvar] - - standardised_terms = ("location", "file", "address", "format") - - instructions = [] - aggregation_instructions = {} - for t, term_ncvar in aggregated_data.items(): - if t not in standardised_terms: - continue - - aggregation_instructions[t] = g["cfa_aggregation_instructions"][ - term_ncvar - ] - instructions.append(f"{t}: {term_ncvar}") - - if t == "file": - kwargs["substitutions"] = g["cfa_file_substitutions"].get( - term_ncvar - ) - - kwargs["x"] = aggregation_instructions - kwargs["instructions"] = " ".join(sorted(instructions)) - - # Use the kwargs to create a CFANetCDFArray instance - if g["original_netCDF4"]: - array = self.implementation.initialise_CFANetCDF4Array(**kwargs) - else: - # h5netcdf - array = self.implementation.initialise_CFAH5netcdfArray(**kwargs) - - return array, kwargs - - def _create_cfanetcdfarray_term( - self, - parent_ncvar, - term, - ncvar, - ): - """Create a CFA-netCDF variable array. - - .. versionadded:: 3.14.0 - - :Parameters: - - parent_ncvar: `str` - The name of the CFA-netCDF aggregated variable. See - the *term* parameter. - - term: `str`, optional - The name of a non-standard aggregation instruction - term from which to create the array. If set then - *ncvar* must be the value of the non-standard term in - the ``aggregation_data`` attribute. - - .. versionadded:: 3.15.0 - - ncvar: `str` - The name of the CFA-netCDF aggregated variable. See - the *term* parameter. - - :Returns: - - (`CFANetCDFArray`, `dict`) - The new `CFANetCDFArray` instance and dictionary of - the kwargs used to create it. - - """ - g = self.read_vars - - # Get the kwargs needed to instantiate a general netCDF array - # instance - kwargs = self._create_netcdfarray( - ncvar, - return_kwargs_only=True, - ) - - # Get rid of the incorrect shape. This will end up getting set - # correctly by the CFANetCDFArray instance. - kwargs.pop("shape", None) - - instructions = [] - aggregation_instructions = {} - for t, term_ncvar in g["cfa_aggregated_data"][parent_ncvar].items(): - if t in ("location", term): - aggregation_instructions[t] = g[ - "cfa_aggregation_instructions" - ][term_ncvar] - instructions.append(f"{t}: {ncvar}") - - kwargs["term"] = term - kwargs["dtype"] = aggregation_instructions[term].dtype - kwargs["x"] = aggregation_instructions - kwargs["instructions"] = " ".join(sorted(instructions)) - - if g["original_netCDF4"]: - array = self.implementation.initialise_CFANetCDF4Array(**kwargs) - else: - # h5netcdf - array = self.implementation.initialise_CFAH5netcdfArray(**kwargs) - - return array, kwargs - - def _customise_field_ancillaries(self, parent_ncvar, f): - """Create customised field ancillary constructs. - - This method currently creates: - - * Field ancillary constructs derived from non-standardised - terms in CFA aggregation instructions. Each construct spans - the same domain axes as the parent field construct. - Constructs are never created for `Domain` instances. - - .. versionadded:: 3.15.0 - - :Parameters: - - parent_ncvar: `str` - The netCDF variable name of the parent variable. - - f: `Field` - The parent field construct. - - :Returns: - - `dict` - A mapping of netCDF variable names to newly-created - construct identifiers. - - **Examples** - - >>> n._customise_field_ancillaries('tas', f) - {} - - >>> n._customise_field_ancillaries('pr', f) - {'tracking_id': 'fieldancillary1'} - - """ - if not self._is_cfa_variable(parent_ncvar): - return {} - - # ------------------------------------------------------------ - # Still here? Then we have a CFA-netCDF variable: Loop round - # the aggregation instruction terms and convert each - # non-standard term into a field ancillary construct that - # spans the same domain axes as the parent field. - # ------------------------------------------------------------ - g = self.read_vars - - standardised_terms = ("location", "file", "address", "format") - - out = {} - for term, term_ncvar in g["cfa_aggregated_data"][parent_ncvar].items(): - if term in standardised_terms: - continue - - if g["variables"][term_ncvar].ndim != f.ndim: - # Can only create field ancillaries with the same rank - # as the field - continue - - # Still here? Then we've got a non-standard aggregation - # term from which we can create a field - # ancillary construct. - anc = self.implementation.initialise_FieldAncillary() - - self.implementation.set_properties( - anc, g["variable_attributes"][term_ncvar] - ) - anc.set_property("long_name", term) - - # Store the term name as the 'id' attribute. This will be - # used as the term name if the field field ancillary is - # written to disk as a non-standard CFA term. - anc.id = term - - data = self._create_data( - parent_ncvar, anc, cfa_term={term: term_ncvar} - ) - - self.implementation.set_data(anc, data, copy=False) - self.implementation.nc_set_variable(anc, term_ncvar) - - key = self.implementation.set_field_ancillary( - f, - anc, - axes=self.implementation.get_field_data_axes(f), - copy=False, - ) - out[term_ncvar] = key - - return out - - def _cfa_parse_aggregated_data(self, ncvar, aggregated_data): - """Parse a CFA-netCDF ``aggregated_data`` attribute. - - .. versionadded:: 3.15.0 - - :Parameters: - - ncvar: `str` - The netCDF variable name. - - aggregated_data: `str` or `None` - The CFA-netCDF ``aggregated_data`` attribute. - - :Returns: - - `dict` - The parsed attribute. - - """ - if not aggregated_data: - return {} - - g = self.read_vars - aggregation_instructions = g["cfa_aggregation_instructions"] - variable_attributes = g["variable_attributes"] - - # Loop round aggregation instruction terms - out = {} - for x in self._parse_x( - ncvar, - aggregated_data, - keys_are_variables=True, - ): - term, term_ncvar = tuple(x.items())[0] - term_ncvar = term_ncvar[0] - out[term] = term_ncvar - - if term_ncvar in aggregation_instructions: - # Already processed this term - continue - - variable = g["variables"][term_ncvar] - array = cfdm.netcdf_indexer( - variable, - mask=True, - unpack=True, - always_masked_array=False, - orthogonal_indexing=False, - copy=False, - ) - aggregation_instructions[term_ncvar] = array[...] - - if term == "file": - # Find URI substitutions that may be stored in the - # CFA file instruction variable's "substitutions" - # attribute - subs = variable_attributes[term_ncvar].get( - "substitutions", - ) - if subs: - # Convert the string "${base}: value" to the - # dictionary {"${base}": "value"} - s = subs.split() - subs = { - base[:-1]: sub for base, sub in zip(s[::2], s[1::2]) - } - - # Apply user-defined substitutions, which take - # precedence over those defined in the file. - subs.update(g["cfa_options"].get("substitutions", {})) - g["cfa_file_substitutions"][term_ncvar] = subs - - g["cfa_aggregated_data"][ncvar] = out - return out diff --git a/cf/read_write/netcdf/netcdfwrite.py b/cf/read_write/netcdf/netcdfwrite.py deleted file mode 100644 index c8bc9e254e..0000000000 --- a/cf/read_write/netcdf/netcdfwrite.py +++ /dev/null @@ -1,1043 +0,0 @@ -from os import remove - -import cfdm -import dask.array as da -import numpy as np -from cfdm.data.dask_utils import cfdm_to_memory - -from .netcdfread import NetCDFRead - - -class NetCDFWrite(cfdm.read_write.netcdf.NetCDFWrite): - """A container for writing Fields to a netCDF dataset.""" - - def __new__(cls, *args, **kwargs): - """Store the NetCDFRead class. - - .. note:: If a child class requires a different NetCDFRead class - than the one defined here, then it must be redefined in the - child class. - - """ - instance = super().__new__(cls) - instance._NetCDFRead = NetCDFRead - return instance - - def _unlimited(self, field, axis): - """Whether an axis is unlimited. - - If a CFA-netCDF file is being written then no axis can be - unlimited, i.e. `False` is always returned. - - .. versionadded:: 3.15.3 - - :Parameters: - - field: `Field` or `Domain` - - axis: `str` - Domain axis construct identifier, - e.g. ``'domainaxis1'``. - - :Returns: - - `bool` - - """ - if self.write_vars["cfa"]: - return False - - return super()._unlimited(field, axis) - - def _write_as_cfa(self, cfvar, construct_type, domain_axes): - """Whether or not to write as a CFA variable. - - .. versionadded:: 3.0.0 - - :Parameters: - - cfvar: cf instance that contains data - - construct_type: `str` - The construct type of the *cfvar*, or its parent if - *cfvar* is not a construct. - - .. versionadded:: 3.15.0 - - domain_axes: `None`, or `tuple` of `str` - The domain axis construct identifiers for *cfvar*. - - .. versionadded:: 3.15.0 - - :Returns: - - `bool` - True if the variable is to be written as a CFA - variable. - - """ - if construct_type is None: - # This prevents recursion whilst writing CFA-netCDF term - # variables. - return False - - g = self.write_vars - if not g["cfa"]: - return False - - data = self.implementation.get_data(cfvar, None) - if data is None: - return False - - cfa_options = g["cfa_options"] - for ctype, ndim in cfa_options.get("constructs", {}).items(): - # Write as CFA if it has an appropriate construct type ... - if ctype in ("all", construct_type): - # ... and then only if it satisfies the - # number-of-dimenions criterion and the data is - # flagged as OK. - if ndim is None or ndim == len(domain_axes): - cfa_get_write = data.cfa_get_write() - if not cfa_get_write and cfa_options["strict"]: - if g["mode"] == "w": - remove(g["filename"]) - - raise ValueError( - f"Can't write {cfvar!r} as a CFA-netCDF " - "aggregation variable. Possible reasons for this " - "include 1) there is more than one Dask chunk " - "per fragment, and 2) data values have been " - "changed relative to those in the fragments." - ) - - return cfa_get_write - - break - - return False - - def _customise_createVariable( - self, cfvar, construct_type, domain_axes, kwargs - ): - """Customise keyword arguments for - `netCDF4.Dataset.createVariable`. - - .. versionadded:: 3.0.0 - - :Parameters: - - cfvar: cf instance that contains data - - construct_type: `str` - The construct type of the *cfvar*, or its parent if - *cfvar* is not a construct. - - .. versionadded:: 3.15.0 - - domain_axes: `None`, or `tuple` of `str` - The domain axis construct identifiers for *cfvar*. - - .. versionadded:: 3.15.0 - - kwargs: `dict` - - :Returns: - - `dict` - Dictionary of keyword arguments to be passed to - `netCDF4.Dataset.createVariable`. - - """ - kwargs = super()._customise_createVariable( - cfvar, construct_type, domain_axes, kwargs - ) - - if self._write_as_cfa(cfvar, construct_type, domain_axes): - kwargs["dimensions"] = () - kwargs["chunksizes"] = None - - return kwargs - - def _write_data( - self, - data, - cfvar, - ncvar, - ncdimensions, - domain_axes=None, - unset_values=(), - compressed=False, - attributes={}, - construct_type=None, - ): - """Write a Data object. - - .. versionadded:: 3.0.0 - - :Parameters: - - data: `Data` - - cfvar: cf instance - - ncvar: `str` - - ncdimensions: `tuple` of `str` - - domain_axes: `None`, or `tuple` of `str` - The domain axis construct identifiers for *cfvar*. - - .. versionadded:: 3.15.0 - - unset_values: sequence of numbers - - attributes: `dict`, optional - The netCDF attributes for the constructs that have been - written to the file. - - construct_type: `str`, optional - The construct type of the *cfvar*, or its parent if - *cfvar* is not a construct. - - .. versionadded:: 3.15.0 - - :Returns: - - `None` - - """ - g = self.write_vars - - if self._write_as_cfa(cfvar, construct_type, domain_axes): - # -------------------------------------------------------- - # Write the data as CFA aggregated data - # -------------------------------------------------------- - self._create_cfa_data( - ncvar, - ncdimensions, - data, - cfvar, - ) - return - - # ------------------------------------------------------------ - # Still here? The write a normal (non-CFA) variable - # ------------------------------------------------------------ - if compressed: - # Write data in its compressed form - data = data.source().source() - - # Get the dask array - dx = da.asanyarray(data) - - # Convert the data type - new_dtype = g["datatype"].get(dx.dtype) - if new_dtype is not None: - dx = dx.astype(new_dtype) - - # VLEN variables can not be assigned to by masked arrays - # (https://github.com/Unidata/netcdf4-python/pull/465), so - # fill missing data in string (as opposed to char) data types. - if g["fmt"] == "NETCDF4" and dx.dtype.kind in "SU": - dx = dx.map_blocks( - self._filled_string_array, - fill_value="", - meta=np.array((), dx.dtype), - ) - - # Check for out-of-range values - if g["warn_valid"]: - if construct_type: - var = cfvar - else: - var = None - - dx = dx.map_blocks( - self._check_valid, - cfvar=var, - attributes=attributes, - meta=np.array((), dx.dtype), - ) - - da.store(dx, g["nc"][ncvar], compute=True, return_stored=False) - - def _write_dimension_coordinate( - self, f, key, coord, ncdim=None, coordinates=None - ): - """Write a coordinate variable and its bound variable to the - file. - - This also writes a new netCDF dimension to the file and, if - required, a new netCDF dimension for the bounds. - - .. versionadded:: 3.0.0 - - :Parameters: - - f: Field construct - - key: `str` - - coord: Dimension coordinate construct - - ncdim: `str` or `None` - The name of the netCDF dimension for this dimension - coordinate construct, including any groups - structure. Note that the group structure may be - different to the coordinate variable, and the - basename. - - .. versionadded:: 3.6.0 - - coordinates: `list` - This list may get updated in-place. - - .. versionadded:: 3.7.0 - - :Returns: - - `str` - The netCDF name of the dimension coordinate. - - """ - coord = self._change_reference_datetime(coord) - - return super()._write_dimension_coordinate( - f, key, coord, ncdim=ncdim, coordinates=coordinates - ) - - def _write_scalar_coordinate( - self, f, key, coord_1d, axis, coordinates, extra=None - ): - """Write a scalar coordinate and its bounds to the netCDF file. - - It is assumed that the input coordinate has size 1, but this is - not checked. - - If an equal scalar coordinate has already been written to the file - then the input coordinate is not written. - - .. versionadded:: 3.0.0 - - :Parameters: - - f: Field construct - - key: `str` - The coordinate construct key - - coord_1d: Coordinate construct - - axis: `str` - The field's axis identifier for the scalar coordinate. - - coordinates: `list` - - :Returns: - - coordinates: `list` - The updated list of netCDF auxiliary coordinate names. - - """ - if extra is None: - extra = {} - - coord_1d = self._change_reference_datetime(coord_1d) - - return super()._write_scalar_coordinate( - f, key, coord_1d, axis, coordinates, extra=extra - ) - - def _write_auxiliary_coordinate(self, f, key, coord, coordinates): - """Write auxiliary coordinates and bounds to the netCDF file. - - If an equal auxiliary coordinate has already been written to the - file then the input coordinate is not written. - - .. versionadded:: 3.0.0 - - :Parameters: - - f: Field construct - - key: `str` - - coord: Coordinate construct - - coordinates: `list` - - :Returns: - - coordinates: `list` - The list of netCDF auxiliary coordinate names updated in - place. - - **Examples:** - - >>> coordinates = _write_auxiliary_coordinate(f, 'aux2', coordinates) - - """ - coord = self._change_reference_datetime(coord) - - return super()._write_auxiliary_coordinate(f, key, coord, coordinates) - - def _change_reference_datetime(self, coord): - """Change the units of a reference date-time value. - - .. versionadded:: 3.0.0 - - :Parameters: - - coord: Coordinate instance - - :Returns: - - The coordinate construct with changed units. - - """ - reference_datetime = self.write_vars.get("reference_datetime") - if not reference_datetime or not coord.Units.isreftime: - return coord - - coord2 = coord.copy() - try: - coord2.reference_datetime = reference_datetime - except ValueError: - raise ValueError( - "Can't override coordinate reference date-time " - f"{coord.reference_datetime!r} with {reference_datetime!r}" - ) - else: - return coord2 - - def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar): - """Write a CFA variable to the netCDF file. - - Any CFA private variables required will be autmatically created - and written to the file. - - .. versionadded:: 3.0.0 - - :Parameters: - - ncvar: `str` - The netCDF name for the variable. - - ncdimensions: sequence of `str` - - netcdf_attrs: `dict` - - data: `Data` - - :Returns: - - `None` - - """ - g = self.write_vars - - ndim = data.ndim - - cfa = self._cfa_aggregation_instructions(data, cfvar) - - # ------------------------------------------------------------ - # Get the location netCDF dimensions. These always start with - # "f_{size}_loc". - # ------------------------------------------------------------ - location_ncdimensions = [] - for size in cfa["location"].shape: - l_ncdim = f"f_{size}_loc" - if l_ncdim not in g["dimensions"]: - # Create a new location dimension - self._write_dimension(l_ncdim, None, size=size) - - location_ncdimensions.append(l_ncdim) - - location_ncdimensions = tuple(location_ncdimensions) - - # ------------------------------------------------------------ - # Get the fragment netCDF dimensions. These always start with - # "f_". - # ------------------------------------------------------------ - aggregation_address = cfa["address"] - fragment_ncdimensions = [] - for ncdim, size in zip( - ncdimensions + ("extra",) * (aggregation_address.ndim - ndim), - aggregation_address.shape, - ): - f_ncdim = f"f_{ncdim}" - if f_ncdim not in g["dimensions"]: - # Create a new fragment dimension - self._write_dimension(f_ncdim, None, size=size) - - fragment_ncdimensions.append(f_ncdim) - - fragment_ncdimensions = tuple(fragment_ncdimensions) - - # ------------------------------------------------------------ - # Write the standardised aggregation instruction variables to - # the CFA-netCDF file - # ------------------------------------------------------------ - substitutions = data.cfa_file_substitutions() - substitutions.update(g["cfa_options"].get("substitutions", {})) - - aggregated_data = data.cfa_get_aggregated_data() - aggregated_data_attr = [] - - # Location - term = "location" - data = cfa[term] - self.implementation.nc_set_hdf5_chunksizes(data, data.shape) - term_ncvar = self._cfa_write_term_variable( - data, - aggregated_data.get(term, f"cfa_{term}"), - location_ncdimensions, - ) - aggregated_data_attr.append(f"{term}: {term_ncvar}") - - # File - term = "file" - if substitutions: - # Create the "substitutions" netCDF attribute - subs = [] - for base, sub in substitutions.items(): - subs.append(f"{base}: {sub}") - - attributes = {"substitutions": " ".join(sorted(subs))} - else: - attributes = None - - data = cfa[term] - self.implementation.nc_set_hdf5_chunksizes(data, data.shape) - term_ncvar = self._cfa_write_term_variable( - data, - aggregated_data.get(term, f"cfa_{term}"), - fragment_ncdimensions, - attributes=attributes, - ) - aggregated_data_attr.append(f"{term}: {term_ncvar}") - - # Address - term = "address" - - # Attempt to reduce addresses to a common scalar value - u = cfa[term].unique().compressed().persist() - if u.size == 1: - cfa[term] = u.squeeze() - dimensions = () - else: - dimensions = fragment_ncdimensions - - data = cfa[term] - self.implementation.nc_set_hdf5_chunksizes(data, data.shape) - term_ncvar = self._cfa_write_term_variable( - data, - aggregated_data.get(term, f"cfa_{term}"), - dimensions, - ) - aggregated_data_attr.append(f"{term}: {term_ncvar}") - - # Format - term = "format" - - # Attempt to reduce addresses to a common scalar value - u = cfa[term].unique().compressed().persist() - if u.size == 1: - cfa[term] = u.squeeze() - dimensions = () - else: - dimensions = fragment_ncdimensions - - data = cfa[term] - self.implementation.nc_set_hdf5_chunksizes(data, data.shape) - term_ncvar = self._cfa_write_term_variable( - data, - aggregated_data.get(term, f"cfa_{term}"), - dimensions, - ) - aggregated_data_attr.append(f"{term}: {term_ncvar}") - - # ------------------------------------------------------------ - # Look for non-standard CFA terms stored as field ancillaries - # on a field and write them to the CFA-netCDF file - # ------------------------------------------------------------ - if self.implementation.is_field(cfvar): - non_standard_terms = self._cfa_write_non_standard_terms( - cfvar, fragment_ncdimensions[:ndim], aggregated_data - ) - aggregated_data_attr.extend(non_standard_terms) - - # ------------------------------------------------------------ - # Add the CFA aggregation variable attributes - # ------------------------------------------------------------ - self._write_attributes( - None, - ncvar, - extra={ - "aggregated_dimensions": " ".join(ncdimensions), - "aggregated_data": " ".join(sorted(aggregated_data_attr)), - }, - ) - - def _check_valid(self, array, cfvar=None, attributes=None): - """Checks for array values outside of the valid range. - - Specifically, checks array for out-of-range values, as - defined by the valid_[min|max|range] attributes. - - .. versionadded:: 3.14.0 - - :Parameters: - - array: `numpy.ndarray` - The array to be checked. - - cfvar: construct - The CF construct containing the array. - - attributes: `dict` - The variable's CF properties. - - :Returns: - - `numpy.ndarray` - The input array, unchanged. - - """ - super()._check_valid(cfvar, array, attributes) - return array - - def _filled_string_array(self, array, fill_value=""): - """Fill a string array. - - .. versionadded:: 3.14.0 - - :Parameters: - - array: `numpy.ndarray` - The `numpy` array with string (byte or unicode) data - type. - - :Returns: - - `numpy.ndarray` - The string array array with any missing data replaced - by the fill value. - - """ - if np.ma.isMA(array): - return array.filled(fill_value) - - return array - - def _write_field_ancillary(self, f, key, anc): - """Write a field ancillary to the netCDF file. - - If an equal field ancillary has already been written to the file - then it is not re-written. - - .. versionadded:: 3.15.0 - - :Parameters: - - f: `Field` - - key: `str` - - anc: `FieldAncillary` - - :Returns: - - `str` - The netCDF variable name of the field ancillary - object. If no ancillary variable was written then an - empty string is returned. - - """ - if anc.data.cfa_get_term(): - # This field ancillary construct is to be written as a - # non-standard CFA term belonging to the parent field, or - # else not at all. - return "" - - return super()._write_field_ancillary(f, key, anc) - - def _cfa_write_term_variable( - self, data, ncvar, ncdimensions, attributes=None - ): - """Write a CFA aggregation instruction term variable - - .. versionadded:: 3.15.0 - - :Parameters: - - data `Data` - The data to write. - - ncvar: `str` - The netCDF variable name. - - ncdimensions: `tuple` of `str` - The variable's netCDF dimensions. - - attributes: `dict`, optional - Any attributes to attach to the variable. - - :Returns: - - `str` - The netCDF variable name of the CFA term variable. - - """ - create = not self._already_in_file(data, ncdimensions) - - if create: - # Create a new CFA term variable in the file - ncvar = self._netcdf_name(ncvar) - self._write_netcdf_variable( - ncvar, ncdimensions, data, None, extra=attributes - ) - else: - # This CFA term variable has already been written to the - # file - ncvar = self.write_vars["seen"][id(data)]["ncvar"] - - return ncvar - - def _cfa_write_non_standard_terms( - self, field, fragment_ncdimensions, aggregated_data - ): - """Write a non-standard CFA aggregation instruction term variable. - - Writes non-standard CFA terms stored as field ancillaries. - - .. versionadded:: 3.15.0 - - :Parameters: - - field: `Field` - - fragment_ncdimensions: `list` of `str` - - aggregated_data: `dict` - - """ - aggregated_data_attr = [] - terms = ["location", "file", "address", "format"] - for key, field_anc in self.implementation.get_field_ancillaries( - field - ).items(): - if not field_anc.data.cfa_get_term(): - continue - - data = self.implementation.get_data(field_anc, None) - if data is None: - continue - - # Check that the field ancillary has the same axes as its - # parent field, and in the same order. - if field.get_data_axes(key) != field.get_data_axes(): - continue - - # Still here? Then this field ancillary can be represented - # by a non-standard aggregation term. - - # Then transform the data so that it spans the fragment - # dimensions, with one value per fragment. If a chunk has - # more than one unique value then the fragment's value is - # missing data. - # - # '_cfa_unique' has its own call to 'cfdm_to_memory', so - # we can set '_force_to_memory=False'. - dx = data.to_dask_array(_force_to_memory=False) - dx_ind = tuple(range(dx.ndim)) - out_ind = dx_ind - dx = da.blockwise( - self._cfa_unique, - out_ind, - dx, - dx_ind, - adjust_chunks={i: 1 for i in out_ind}, - dtype=dx.dtype, - ) - - # Get the non-standard term name from the field - # ancillary's 'id' attribute - term = getattr(field_anc, "id", "term") - term = term.replace(" ", "_") - name = term - n = 0 - while term in terms: - n += 1 - term = f"{name}_{n}" - - terms.append(term) - - # Create the new CFA term variable - data = type(data)(dx) - self.implementation.nc_set_hdf5_chunksizes(data, data.shape) - term_ncvar = self._cfa_write_term_variable( - data=data, - ncvar=aggregated_data.get(term, f"cfa_{term}"), - ncdimensions=fragment_ncdimensions, - ) - - aggregated_data_attr.append(f"{term}: {term_ncvar}") - - return aggregated_data_attr - - @classmethod - def _cfa_unique(cls, a): - """Return the unique value of an array. - - If there are multiple unique vales then missing data is - returned. - - .. versionadded:: 3.15.0 - - :Parameters: - - a: `numpy.ndarray` - The array. - - :Returns: - - `numpy.ndarray` - A size 1 array containing the unique value, or missing - data if there is not a unique value. - - """ - a = cfdm_to_memory(a) - - out_shape = (1,) * a.ndim - a = np.unique(a) - if np.ma.isMA(a): - # Remove a masked element - a = a.compressed() - - if a.size == 1: - return a.reshape(out_shape) - - return np.ma.masked_all(out_shape, dtype=a.dtype) - - def _cfa_aggregation_instructions(self, data, cfvar): - """Convert data to standardised CFA aggregation instruction terms. - - .. versionadded:: 3.15.0 - - :Parameters: - - data: `Data` - The data to be converted to standardised CFA - aggregation instruction terms. - - cfvar: construct - The construct that contains the *data*. - - :Returns: - - `dict` - A dictionary whose keys are the standardised CFA - aggregation instruction terms, with values of `Data` - instances containing the corresponding variables. - - **Examples** - - >>> n._cfa_aggregation_instructions(data, cfvar) - {'location': , - 'file': , - 'format': , - 'address': } - - """ - from os.path import abspath, join, relpath - from pathlib import PurePath - from urllib.parse import urlparse - - g = self.write_vars - - # Define the CFA file susbstitutions, giving precedence over - # those set on the Data object to those provided by the CFA - # options. - substitutions = data.cfa_file_substitutions() - substitutions.update(g["cfa_options"].get("substitutions", {})) - - absolute_paths = g["cfa_options"].get("absolute_paths") - cfa_dir = g["cfa_dir"] - - # Size of the trailing dimension - n_trailing = 0 - - aggregation_file = [] - aggregation_address = [] - aggregation_format = [] - for indices in data.chunk_indices(): - file_details = self._cfa_get_file_details(data[indices]) - - if len(file_details) != 1: - if file_details: - raise ValueError( - f"Can't write {cfvar!r} as a CFA-netCDF " - "aggregation variable: Dask chunk defined by index " - f"{indices} spans two or more fragments. " - "A possible fix for this is to set chunks=None as " - "an argument of a prior call to cf.read" - ) - - raise ValueError( - f"Can't write {cfvar!r} as a CFA-netCDF " - "aggregation variable: Dask chunk defined by index " - f"{indices} spans zero fragments." - ) - - filenames, addresses, formats = file_details.pop() - - if len(filenames) > n_trailing: - n_trailing = len(filenames) - - filenames2 = [] - for filename in filenames: - uri = urlparse(filename) - uri_scheme = uri.scheme - if not uri_scheme: - filename = abspath(join(cfa_dir, filename)) - if absolute_paths: - filename = PurePath(filename).as_uri() - else: - filename = relpath(filename, start=cfa_dir) - elif not absolute_paths and uri_scheme == "file": - filename = relpath(uri.path, start=cfa_dir) - - if substitutions: - # Apply the CFA file susbstitutions - for base, sub in substitutions.items(): - filename = filename.replace(sub, base) - - filenames2.append(filename) - - aggregation_file.append(tuple(filenames2)) - aggregation_address.append(addresses) - aggregation_format.append(formats) - - # Pad each value of the aggregation instruction arrays so that - # it has 'n_trailing' elements - a_shape = data.numblocks - pad = None - if n_trailing > 1: - a_shape += (n_trailing,) - - # Pad the ... - for i, (filenames, addresses, formats) in enumerate( - zip(aggregation_file, aggregation_address, aggregation_format) - ): - n = n_trailing - len(filenames) - if n: - # This chunk has fewer fragment files than some - # others, so some padding is required. - pad = ("",) * n - aggregation_file[i] = filenames + pad - aggregation_format[i] = formats + pad - if isinstance(addresses[0], int): - pad = (-1,) * n - - aggregation_address[i] = addresses + pad - - # Reshape the 1-d aggregation instruction arrays to span the - # data dimensions, plus the extra trailing dimension if there - # is one. - aggregation_file = np.array(aggregation_file).reshape(a_shape) - aggregation_address = np.array(aggregation_address).reshape(a_shape) - aggregation_format = np.array(aggregation_format).reshape(a_shape) - - # Mask any padded elements - if pad: - aggregation_file = np.ma.where( - aggregation_file == "", np.ma.masked, aggregation_file - ) - mask = aggregation_file.mask - aggregation_address = np.ma.array(aggregation_address, mask=mask) - aggregation_format = np.ma.array(aggregation_format, mask=mask) - - # ------------------------------------------------------------ - # Create the location array - # ------------------------------------------------------------ - dtype = np.dtype(np.int32) - if ( - max(data.to_dask_array(_force_to_memory=False).chunksize) - > np.iinfo(dtype).max - ): - dtype = np.dtype(np.int64) - - ndim = data.ndim - aggregation_location = np.ma.masked_all( - (ndim, max(a_shape[:ndim])), dtype=dtype - ) - - for i, chunks in enumerate(data.chunks): - aggregation_location[i, : len(chunks)] = chunks - - # ------------------------------------------------------------ - # Return Data objects - # ------------------------------------------------------------ - data = type(data) - return { - "location": data(aggregation_location), - "file": data(aggregation_file), - "format": data(aggregation_format), - "address": data(aggregation_address), - } - - def _customise_write_vars(self): - """Customise the write parameters. - - .. versionadded:: 3.15.0 - - """ - g = self.write_vars - - if g.get("cfa"): - from os.path import abspath - from pathlib import PurePath - - # Find the absolute directory path of the output - # CFA-netCDF file URI - g["cfa_dir"] = PurePath(abspath(g["filename"])).parent - - def _cfa_get_file_details(self, data): - """Get the details of all files referenced by the data. - - .. versionadded:: 3.15.0 - - :Parameters: - - data: `Data` - The data - - :Returns: - - `set` of 3-tuples - A set containing 3-tuples giving the file names, - the addresses in the files, and the file formats. If - no files are required to compute the data then - an empty `set` is returned. - - **Examples** - - >>> n._cfa_get_file_details(data): - {(('/home/file.nc',), ('tas',), ('nc',))} - - >>> n._cfa_get_file_details(data): - {(('/home/file.pp',), (34556,), ('um',))} - - """ - out = [] - out_append = out.append - for a in data.todict().values(): - try: - out_append( - (a.get_filenames(), a.get_addresses(), a.get_formats()) - ) - except AttributeError: - pass - - return set(out) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 1cf5c0bc4d..4d268dab6d 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -1,13 +1,13 @@ import logging import os -import tempfile from glob import glob from os.path import isdir from re import Pattern from urllib.parse import urlparse -from cfdm import is_log_level_info -from numpy.ma.core import MaskError +import cfdm +from cfdm.read_write.exceptions import DatasetTypeError +from cfdm.read_write.netcdf import NetCDFRead from ..aggregate import aggregate as cf_aggregate from ..cfimplementation import implementation @@ -16,58 +16,12 @@ from ..fieldlist import FieldList from ..functions import _DEPRECATION_ERROR_FUNCTION_KWARGS, flat from ..query import Query -from .netcdf import NetCDFRead from .um import UMRead -_cached_temporary_files = {} - -# -------------------------------------------------------------------- -# Create an implementation container and initialise a read object for -# each format -# -------------------------------------------------------------------- -_implementation = implementation() -netcdf = NetCDFRead(_implementation) -UM = UMRead(_implementation) - - logger = logging.getLogger(__name__) -@_manage_log_level_via_verbosity -def read( - files, - external=None, - verbose=None, - warnings=False, - ignore_read_error=False, - aggregate=True, - nfields=None, - squeeze=False, - unsqueeze=False, - fmt=None, - cdl_string=False, - select=None, - extra=None, - recursive=False, - followlinks=False, - um=None, - chunk=True, - field=None, - height_at_top_of_model=None, - select_options=None, - follow_symlinks=False, - mask=True, - unpack=True, - warn_valid=False, - dask_chunks="storage-aligned", - store_hdf5_chunks=True, - domain=False, - cfa=None, - netcdf_backend=None, - storage_options=None, - cache=True, - chunks="auto", -): +class read(cfdm.read): """Read field or domain constructs from files. The following file formats are supported: netCDF, CFA-netCDF, CDL, @@ -229,7 +183,7 @@ def read( are read. Sub-directories are not read unless the *recursive* parameter is True. If any directories contain files that are not valid datasets then an exception will - be raised, unless the *ignore_read_error* parameter is + be raised, unless the *ignore_unknown_type* parameter is True. As a special case, if the `cdl_string` parameter is set to @@ -237,111 +191,27 @@ def read( value is assumed to be a string of CDL input rather than the above. - external: (sequence of) `str`, optional - Read external variables (i.e. variables which are named by - attributes, but are not present, in the parent file given - by the *filename* parameter) from the given external - files. Ignored if the parent file does not contain a - global "external_variables" attribute. Multiple external - files may be provided, which are searched in random order - for the required external variables. - - If an external variable is not found in any external - files, or is found in multiple external files, then the - relevant metadata construct is still created, but without - any metadata or data. In this case the construct's - `!is_external` method will return `True`. + {{read external: (sequence of) `str`, optional}} - *Parameter example:* - ``external='cell_measure.nc'`` + {{read extra: (sequence of) `str`, optional}} - *Parameter example:* - ``external=['cell_measure.nc']`` + {{read verbose: `int` or `str` or `None`, optional}} - *Parameter example:* - ``external=('cell_measure_A.nc', 'cell_measure_O.nc')`` - - extra: (sequence of) `str`, optional - Create extra, independent field constructs from netCDF - variables that correspond to particular types metadata - constructs. The *extra* parameter may be one, or a - sequence, of: - - ========================== =============================== - *extra* Metadata constructs - ========================== =============================== - ``'field_ancillary'`` Field ancillary constructs - ``'domain_ancillary'`` Domain ancillary constructs - ``'dimension_coordinate'`` Dimension coordinate constructs - ``'auxiliary_coordinate'`` Auxiliary coordinate constructs - ``'cell_measure'`` Cell measure constructs - ========================== =============================== - - This parameter replaces the deprecated *field* parameter. + {{read warnings: `bool`, optional}} - *Parameter example:* - To create field constructs from auxiliary coordinate - constructs: ``extra='auxiliary_coordinate'`` or - ``extra=['auxiliary_coordinate']``. + {{read file_type: (sequence of) `str`, optional}} - *Parameter example:* - To create field constructs from domain ancillary and - cell measure constructs: ``extra=['domain_ancillary', - 'cell_measure']``. - - An extra field construct created via the *extra* parameter - will have a domain limited to that which can be inferred - from the corresponding netCDF variable, but without the - connections that are defined by the parent netCDF data - variable. It is possible to create independent fields from - metadata constructs that do incorporate as much of the - parent field construct's domain as possible by using the - `~cf.Field.convert` method of a returned field construct, - instead of setting the *extra* parameter. - - verbose: `int` or `str` or `None`, optional - If an integer from ``-1`` to ``3``, or an equivalent string - equal ignoring case to one of: - - * ``'DISABLE'`` (``0``) - * ``'WARNING'`` (``1``) - * ``'INFO'`` (``2``) - * ``'DETAIL'`` (``3``) - * ``'DEBUG'`` (``-1``) - - set for the duration of the method call only as the minimum - cut-off for the verboseness level of displayed output (log) - messages, regardless of the globally-configured `cf.log_level`. - Note that increasing numerical value corresponds to increasing - verbosity, with the exception of ``-1`` as a special case of - maximal and extreme verbosity. - - Otherwise, if `None` (the default value), output messages will - be shown according to the value of the `cf.log_level` setting. - - Overall, the higher a non-negative integer or equivalent string - that is set (up to a maximum of ``3``/``'DETAIL'``) for - increasing verbosity, the more description that is printed to - convey how the contents of the netCDF file were parsed and - mapped to CF data model constructs. - - warnings: `bool`, optional - If True then print warnings when an output field construct - is incomplete due to structural non-compliance of the - dataset. By default such warnings are not displayed. - - ignore_read_error: `bool`, optional - If True then ignore any file which raises an IOError - whilst being read, as would be the case for an empty file, - unknown file format, etc. By default the IOError is - raised. - - fmt: `str`, optional - Only read files of the given format, ignoring all other - files. Valid formats are ``'NETCDF'`` for CF-netCDF files, - ``'CFA'`` for CFA-netCDF files, ``'UM'`` for PP or UM - fields files, and ``'CDL'`` for CDL text files. By default - files of any of these formats are read. + Valid file types are: + + ============ ============================================ + file type Description + ============ ============================================ + ``'netCDF'`` Binary netCDF-3 or netCDF-4 files + ``'CDL'`` Text CDL representations of netCDF files + ``'UM'`` UM fields files or PP files + ============ ============================================ + + .. versionadded:: NEXTVERSION cdl_string: `bool`, optional If True and the format to read is CDL, read a string @@ -359,6 +229,79 @@ def read( ignored as the format is assumed to be CDL, so in that case it is not necessary to also specify ``fmt='CDL'``. + um: `dict`, optional + For Met Office (UK) PP files and Met Office (UK) fields + files only, provide extra decoding instructions. This + option is ignored for input files which are not PP or + fields files. In most cases, how to decode a file is + inferrable from the file's contents, but if not then each + key/value pair in the dictionary sets a decoding option as + follows: + + * ``'fmt'``: `str` + + The file format (``'PP'`` or ``'FF'``) + + * ``'word_size'``: `int` + + The word size in bytes (``4`` or ``8``). + + * ``'endian'``: `str` + + The byte order (``'big'`` or ``'little'``). + + * ``'version'``: `int` or `str` + + The UM version to be used when decoding the + header. Valid versions are, for example, ``4.2``, + ``'6.6.3'`` and ``'8.2'``. In general, a given version + is ignored if it can be inferred from the header (which + is usually the case for files created by the UM at + versions 5.3 and later). The exception to this is when + the given version has a third element (such as the 3 in + 6.6.3), in which case any version in the header is + ignored. The default version is ``4.5``. + + * ``'height_at_top_of_model'``: `float` + + The height in metres of the upper bound of the top model + level. By default the height at top model is taken from + the top level's upper bound defined by BRSVD1 in the + lookup header. If the height can't be determined from + the header, or the given height is less than or equal to + 0, then a coordinate reference system will still be + created that contains the 'a' and 'b' formula term + values, but without an atmosphere hybrid height + dimension coordinate construct. + + .. note:: A current limitation is that if pseudolevels + and atmosphere hybrid height coordinates are + defined by same the lookup headers then the + height **can't be determined + automatically**. In this case the height may + be found after reading as the maximum value of + the bounds of the domain ancillary construct + containing the 'a' formula term. The file can + then be re-read with this height as a *um* + parameter. + + If format is specified as ``'PP'`` then the word size and + byte order default to ``4`` and ``'big'`` respectively. + + This parameter replaces the deprecated *umversion* and + *height_at_top_of_model* parameters. + + *Parameter example:* + To specify that the input files are 32-bit, big-endian + PP files: ``um={'fmt': 'PP'}`` + + *Parameter example:* + To specify that the input files are 32-bit, + little-endian PP files from version 5.1 of the UM: + ``um={'fmt': 'PP', 'endian': 'little', 'version': 5.1}`` + + .. versionadded:: 1.5 + aggregate: `bool` or `dict`, optional If True (the default) or a dictionary (possibly empty) then aggregate the field constructs read in from all input @@ -374,13 +317,9 @@ def read( If *aggregate* is False then the field constructs are not aggregated. - squeeze: `bool`, optional - If True then remove size 1 axes from each field construct's - data array. + {{read squeeze: `bool`, optional}} - unsqueeze: `bool`, optional - If True then insert size 1 axes from each field - construct's domain into its data array. + {{read unsqueeze: `bool`, optional}} select: (sequence of) `str` or `Query` or `re.Pattern`, optional Only return field constructs whose identities match the @@ -413,465 +352,53 @@ def read( This parameter replaces the deprecated *follow_symlinks* parameter. - mask: `bool`, optional - If True (the default) then mask by convention the data of - field and metadata constructs. - - A netCDF array is masked depending on the values of any of - the netCDF attributes ``_FillValue``, ``missing_value``, - ``_Unsigned``, ``valid_min``, ``valid_max``, and - ``valid_range``. - - The masking by convention of a PP or UM array depends on - the value of BMDI in the lookup header. A value other than - ``-1.0e30`` indicates the data value to be masked. - - See - https://ncas-cms.github.io/cf-python/tutorial.html#data-mask - for details. + {{read warn_valid: `bool`, optional}} .. versionadded:: 3.4.0 - unpack: `bool`, optional - If True, the default, then unpack arrays by convention - when the data is read from disk. - - Unpacking is determined by netCDF conventions for the - following variable attributes: ``add_offset``, - ``scale_factor``, and ``_Unsigned``. - - .. versionadded:: 1.11.2.0 - - warn_valid: `bool`, optional - If True then print a warning for the presence of - ``valid_min``, ``valid_max`` or ``valid_range`` properties - on field constructs and metadata constructs that have - data. By default no such warning is issued. - - "Out-of-range" data values in the file, as defined by any - of these properties, are automatically masked by default, - which may not be as intended. See the *mask* parameter for - turning off all automatic masking. - - See - https://ncas-cms.github.io/cf-python/tutorial.html#data-mask - for details. + {{read mask: `bool`, optional}} .. versionadded:: 3.4.0 - um: `dict`, optional - For Met Office (UK) PP files and Met Office (UK) fields - files only, provide extra decoding instructions. This - option is ignored for input files which are not PP or - fields files. In most cases, how to decode a file is - inferrable from the file's contents, but if not then each - key/value pair in the dictionary sets a decoding option as - follows: + {{read unpack: `bool`}} - ============================ ===================================== - Key Value - ============================ ===================================== - ``'fmt'`` The file format (``'PP'`` or - ``'FF'``) - - ``'word_size'`` The word size in bytes - (``4`` or ``8``). - - ``'endian'`` The byte order (``'big'`` or - ``'little'``). - - ``'version'`` The UM version to be used - when decoding the - header. Valid versions are, - for example, ``4.2``, - ``'6.6.3'`` and - ``'8.2'``. In general, a - given version is ignored if - it can be inferred from the - header (which is usually the - case for files created by - the UM at versions 5.3 and - later). The exception to - this is when the given - version has a third element - (such as the 3 in 6.6.3), in - which case any version in - the header is ignored. - - The default version is - ``4.5``. - - ``'height_at_top_of_model'`` The height (in metres) of - the upper bound of the top - model level. By default the - height at top model is taken - from the top level's upper - bound defined by BRSVD1 in - the lookup header. If the - height can't be determined - from the header, or the - given height is less than or - equal to 0, then a - coordinate reference system - will still be created that - contains the 'a' and 'b' - formula term values, but - without an atmosphere hybrid - height dimension coordinate - construct. - - .. note:: A current - limitation is that if - pseudolevels and - atmosphere hybrid height - coordinates are defined - by same the lookup - headers then the height - **can't be determined - automatically**. In this - case the height may be - found after reading as - the maximum value of the - bounds of the domain - ancillary construct - containing the 'a' - formula term. The file - can then be re-read with - this height as a *um* - parameter. - ============================ ===================================== + .. versionadded:: NEXTVERSION - If format is specified as ``'PP'`` then the word size and - byte order default to ``4`` and ``'big'`` respectively. - - This parameter replaces the deprecated *umversion* and - *height_at_top_of_model* parameters. - - *Parameter example:* - To specify that the input files are 32-bit, big-endian - PP files: ``um={'fmt': 'PP'}`` - - *Parameter example:* - To specify that the input files are 32-bit, - little-endian PP files from version 5.1 of the UM: - ``um={'fmt': 'PP', 'endian': 'little', 'version': 5.1}`` - - .. versionadded:: 1.5 - - dask_chunks: `str`, `int`, `None`, or `dict`, optional - Specify the Dask chunking for data. May be one of the - following: - - * ``'storage-aligned'`` - - This is the default. The Dask chunk size in bytes will - be as close as possible the size given by - `cf.chunksize`, favouring square-like chunk shapes, - with the added restriction that the entirety of each - storage chunk must also lie within exactly one Dask - chunk. - - When reading the data from disk, an entire storage chunk - will be read once per Dask storage chunk that contains - any part of it, so ensuring that a storage chunk lies - within only one Dask chunk can increase performance by - reducing the amount of disk access (particularly when - the data are stored remotely to the client). - - For instance, consider a file variable that has an array - of 64-bit floats with shape (400, 300, 60) and a storage - chunk shape of (100, 5, 60), giving 240 storage chunks - each of size 100*5*60*8 bytes = 0.23 MiB. Then: - - * If `cf.chunksize` returned 134217728 (i.e. 128 MiB), - then the storage-aligned Dask chunks will have shape - (400, 300, 60), giving 1 Dask chunk with size of 54.93 - MiB (compare with a Dask chunk shape of (400, 300, 60) - and size 54.93 MiB, if *dask_chunks* were ``'auto'``.) - - * If `cf.chunksize` returned 33554432 (i.e. 32 MiB), - then the storage-aligned Dask chunks will have shape - (200, 260, 60), giving 4 Dask chunks with a maximum - size of 23.80 MiB (compare with a Dask chunk shape of - (264, 264, 60) and maximum size 31.90 MiB, if - *dask_chunks* were ``'auto'``.) - - * If `cf.chunksize` returned 4194304 (i.e. 4 MiB), - then the storage-aligned Dask chunks will have shape - (100, 85, 60), giving 16 Dask chunks with a maximum - size of 3.89 MiB (compare with a Dask chunk shape of - (93, 93, 60) and maximum size 3.96 MiB, if - *dask_chunks* were ``'auto'``.) - - There are, however, some occasions when, for particular - data arrays in the file, the ``'auto'`` option will - automatically be used instead of storage-aligned Dask - chunks. This occurs when: - - * The data array in the file is stored contiguously. - - * The data array in the file is compressed by convention - (e.g. ragged array representations, compression by - gathering, subsampled coordinates, etc.). In this case - the Dask chunks are for the uncompressed data, and so - cannot be aligned with the storage chunks of the - compressed array in the file. - - * ``'storage-exact'`` - - Each Dask chunk will contain exactly one storage chunk - and each storage chunk will lie within exactly one Dask - chunk. - - For instance, consider a file variable that has an array - of 64-bit floats with shape (400, 300, 60) and a storage - chunk shape of (100, 5, 60) (i.e. there are 240 storage - chunks, each of size 0.23 MiB). Then the storage-exact - Dask chunks will also have shape (100, 5, 60) giving 240 - Dask chunks with a maximum size of 0.23 MiB. - - There are, however, some occasions when, for particular - data arrays in the file, the ``'auto'`` option will - automatically be used instead of storage-exact Dask - chunks. This occurs when: - - * The data array in the file is stored contiguously. - - * The data array in the file is compressed by convention - (e.g. ragged array representations, compression by - gathering, subsampled coordinates, etc.). In this case - the Dask chunks are for the uncompressed data, and so - cannot be aligned with the storage chunks of the - compressed array in the file. - - * ``auto`` - - The Dask chunk size in bytes will be as close as - possible to the size given by `cf.chunksize`, - favouring square-like chunk shapes. This may give - similar Dask chunk shapes as the ``'storage-aligned'`` - option, but without the guarantee that each storage - chunk will lie within exactly one Dask chunk. - - * A byte-size given by a `str` - - The Dask chunk size in bytes will be as close as - possible to the given byte-size, favouring square-like - chunk shapes. Any string value, accepted by the *chunks* - parameter of the `dask.array.from_array` function is - permitted. - - *Example:* - A Dask chunksize of 2 MiB may be specified as - ``'2097152'`` or ``'2 MiB'``. - - * `-1` or `None` - - There is no Dask chunking, i.e. every data array has one - Dask chunk regardless of its size. - - * Positive `int` - - Every dimension of all Dask chunks has this number of - elements. - - *Example:* - For 3-dimensional data, *dask_chunks* of `10` will - give Dask chunks with shape (10, 10, 10). - - * `dict` - - Each of dictionary key identifies a file dimension, with - a value that defines the Dask chunking for that - dimension whenever it is spanned by a data array. A file - dimension is identified in one of three ways: - - 1. the netCDF dimension name, preceded by ``ncdim%`` - (e.g. ``'ncdim%lat'``); - - 2. the value of the "standard name" attribute of a - CF-netCDF coordinate variable that spans the - dimension (e.g. ``'latitude'``); - - 3. the value of the "axis" attribute of a CF-netCDF - coordinate variable that spans the dimension - (e.g. ``'Y'``). - - The dictionary values may be a byte-size string, - ``'auto'``, `int` or `None`, with the same meanings as - those types for the *dask_chunks* parameter itself, but - applying only to the specified dimension. In addition, a - dictionary value may be a `tuple` or `list` of integers - that sum to the dimension size. - - Not specifying a file dimension in the dictionary is - equivalent to it being defined with a value of - ``'auto'``. - - *Example:* - ``{'T': '0.5 MiB', 'Z': 'auto', 'Y': [36, 37], 'X': - None}`` - - *Example:* - If a netCDF file contains dimensions ``time``, ``z``, - ``lat`` and ``lon``, then ``{'ncdim%time': 12, - 'ncdim%lat', None, 'ncdim%lon': None}`` will ensure - that, for all applicable data arrays, all ``time`` - axes have a `dask` chunksize of 12; all ``lat`` and - ``lon`` axes are not `dask` chunked; and all ``z`` - axes are `dask` chunked to comply as closely as - possible with the default `dask` chunk size. - - If the netCDF file also contains a ``time`` coordinate - variable with a "standard_name" attribute of - ``'time'`` and an "axis" attribute of ``'T'``, then - the same `dask` chunking could be specified with - either ``{'time': 12, 'ncdim%lat', None, 'ncdim%lon': - None}`` or ``{'T': 12, 'ncdim%lat', None, 'ncdim%lon': - None}``. - - .. versionadded:: 1.11.2.0 - - store_hdf5_chunks: `bool`, optional - If True (the default) then store the HDF5 chunking - strategy for each returned data array. The HDF5 chunking - strategy is then accessible via an object's - `nc_hdf5_chunksizes` method. When the HDF5 chunking - strategy is stored, it will be used when the data is - written to a new netCDF4 file with `cf.write` (unless - the strategy was modified prior to writing). - - If False, or if the file being read is not in netCDF4 - format, then no HDF5 chunking strategy is stored. - (i.e. an `nc_hdf5_chunksizes` method will return `None` - for all `Data` objects). In this case, when the data is - written to a new netCDF4 file, the HDF5 chunking strategy - will be determined by `cf.write`. - - See the `cf.write` *hdf5_chunks* parameter for details - on how the HDF5 chunking strategy is determined at the - time of writing. - - .. versionadded:: 1.11.2.0 - - domain: `bool`, optional - If True then return only the domain constructs that are - explicitly defined by CF-netCDF domain variables, ignoring - all CF-netCDF data variables. By default only the field - constructs defined by CF-netCDF data variables are - returned. - - CF-netCDF domain variables are only defined from CF-1.9, - so older datasets automatically contain no CF-netCDF - domain variables. - - The unique domain constructs of the dataset are easily - found with the `cf.unique_constructs` function. For - example:: - - >>> d = cf.read('file.nc', domain=True) - >>> ud = cf.unique_constructs(d) - >>> f = cf.read('file.nc') - >>> ufd = cf.unique_constructs(x.domain for x in f) - - Domain constructs can not be read from UM or PP datasets. + {{read domain: `bool`, optional}} .. versionadded:: 3.11.0 - cfa: `dict`, optional - Configure the reading of CFA-netCDF files. The dictionary - may have any subset of the following key/value pairs to - override the information read from the file: - - * ``'substitutions'``: `dict` - - A dictionary whose key/value pairs define text - substitutions to be applied to the fragment file - names. Each key may be specified with or without the - ``${*}`` syntax (where `*` represents any amount of any - characters). For instance, ``{'substitution': - 'replacement'}`` and ``{'${substitution}': 'replacement'}``' - are equivalent. The substitutions are used in - conjunction with, and take precedence over, any that are - stored in the CFA-netCDF file by the ``substitutions`` - attribute of the ``file`` fragement array variable. - - *Example:* - ``{'replacement': 'file:///data/'}`` + {{read netcdf_backend: `None` or (sequence of) `str`, optional}} - .. versionadded:: 3.15.0 - - netcdf_backend: `None` or `str`, optional - Specify which library to use for reading netCDF files. By - default, or if `None`, then the first one of `netCDF4` and - `h5netcdf` to successfully open the file netCDF file is - used. Setting *netcdf_backend* to one of ``'netCDF4'`` and - ``'h5netcdf'`` will force the use of that library. - - .. note:: The *netcdf_backend* parameter does not affect - the opening of netCDF fragment files that define - the data of aggregation variables. For these, it - is always the case that the first one of - `netCDF4` and `h5netcdf` to successfully open - the file is used. + .. versionadded:: NEXTVERSION - .. versionadded:: 1.11.2.0 + {{read storage_options: `dict` or `None`, optional}} - storage_options: `dict` or `None`, optional - Pass parameters to the backend file system driver, such as - username, password, server, port, etc. How the storage - options are interpreted depends on the location of the - file: + .. versionadded:: NEXTVERSION - **Local File System** + {{read cache: `bool`, optional}} - Storage options are ignored for local files. + .. versionadded:: NEXTVERSION - **HTTP(S)** + {{read dask_chunks: `str`, `int`, `None`, or `dict`, optional}} - Storage options are ignored for files available across the - network via OPeNDAP. + .. versionadded:: NEXTVERSION - **S3-compatible services** + {{read store_dataset_chunks: `bool`, optional}} - The backend used is `s3fs`, and the storage options are - used to initialise an `s3fs.S3FileSystem` file system - object. By default, or if `None`, then *storage_options* - is taken as ``{}``. + .. versionadded:: NEXTVERSION - If the ``'endpoint_url'`` key is not in *storage_options*, - nor in a dictionary defined by the ``'client_kwargs'`` key - (both of which are the case when *storage_options* is - `None`), then one will be automatically inserted for - accessing an S3 file. For example, for a file name of - ``'s3://store/data/file.nc'``, an ``'endpoint_url'`` key - with value ``'https://store'`` would be created. To - disable this, set ``'endpoint_url'`` to `None`. + {{read cfa: `dict`, optional}} - *Parameter example:* - For a file name of ``'s3://store/data/file.nc'``, the - following are equivalent: ``None``, ``{}``, - ``{'endpoint_url': 'https://store'}``, and - ``{'client_kwargs': {'endpoint_url': 'https://store'}}`` + .. versionadded:: 3.15.0 - *Parameter example:* - ``{'key': 'scaleway-api-key...', 'secret': - 'scaleway-secretkey...', 'endpoint_url': - 'https://s3.fr-par.scw.cloud', 'client_kwargs': - {'region_name': 'fr-par'}}`` + {{read cfa_write: sequence of `str`, optional}} - .. versionadded:: 1.11.2.0 + .. versionadded:: NEXTVERSION - cache: `bool`, optional - If True, the default, then cache the first and last array - elements of metadata constructs (not field constructs) for - fast future access. In addition, the second and - penultimate array elements will be cached from coordinate - bounds when there are two bounds per cell. For remote - data, setting *cache* to False may speed up the parsing of - the file. + {{read to_memory: (sequence of) `str`, optional}} - .. versionadded:: 1.11.2.0 + .. versionadded:: NEXTVERSION umversion: deprecated at version 3.0.0 Use the *um* parameter instead. @@ -891,15 +418,20 @@ def read( chunk: deprecated at version 3.14.0 Use the *dask_chunks* parameter instead. - chunks: deprecated at version 1.11.2.0 + chunks: deprecated at version NEXTVERSION Use the *dask_chunks* parameter instead. + fmt: deprecated at version NEXTVERSION + Use the *file_type* parameter instead. + + ignore_read_error: deprecated at version NEXTVERSION + Use the *file_type* parameter instead. + :Returns: `FieldList` or `DomainList` The field or domain constructs found in the input dataset(s). The list may be empty. - **Examples** >>> x = cf.read('file.nc') @@ -940,577 +472,389 @@ def read( """ - if field: - _DEPRECATION_ERROR_FUNCTION_KWARGS( - "cf.read", - {"field": field}, - "Use keyword 'extra' instead", - removed_at="4.0.0", - ) # pragma: no cover - - if select_options: - _DEPRECATION_ERROR_FUNCTION_KWARGS( - "cf.read", {"select_options": select_options}, removed_at="4.0.0" - ) # pragma: no cover - - if follow_symlinks: - _DEPRECATION_ERROR_FUNCTION_KWARGS( - "cf.read", - {"follow_symlinks": follow_symlinks}, - "Use keyword 'followlinks' instead.", - removed_at="4.0.0", - ) # pragma: no cover - - if height_at_top_of_model is not None: - _DEPRECATION_ERROR_FUNCTION_KWARGS( - "cf.read", - {"height_at_top_of_model": height_at_top_of_model}, - "Use keyword 'um' instead.", - removed_at="4.0.0", - ) # pragma: no cover - - if chunk is not True: - _DEPRECATION_ERROR_FUNCTION_KWARGS( - "cf.read", - {"chunk": chunk}, - "Use keyword 'dask_chunks' instead.", - version="3.14.0", - removed_at="5.0.0", - ) # pragma: no cover - - if chunks != "auto": - _DEPRECATION_ERROR_FUNCTION_KWARGS( - "cf.read", - {"chunk": chunk}, - "Use keyword 'dask_chunks' instead.", - version="3.14.0", - removed_at="5.0.0", - ) # pragma: no cover - - # Parse select - if isinstance(select, (str, Query, Pattern)): - select = (select,) - - # Manage input parameters where contradictions are possible: - if cdl_string and fmt: - if fmt == "CDL": - if is_log_level_info(logger): - logger.info( - "It is not necessary to set the cf.read fmt as 'CDL' when " - "cdl_string is True, since that implies CDL is the format." - ) # pragma: no cover - else: - raise ValueError( - "cdl_string can only be True when the format is CDL, though " - "fmt is ignored in that case so there is no need to set it." - ) - if squeeze and unsqueeze: - raise ValueError("squeeze and unsqueeze can not both be True") - if follow_symlinks and not recursive: - raise ValueError( - f"Can't set follow_symlinks={follow_symlinks!r} " - f"when recursive={recursive!r}" - ) - - info = is_log_level_info(logger) - - # Parse the 'cfa' parameter - if cfa is None: - cfa_options = {} - else: - cfa_options = cfa.copy() - keys = ("substitutions",) - if not set(cfa_options).issubset(keys): - raise ValueError( - "Invalid dictionary key to the 'cfa' parameter." - f"Valid keys are {keys}. Got: {cfa_options}" - ) - if "substitutions" in cfa_options: - substitutions = cfa_options["substitutions"].copy() - for base, sub in tuple(substitutions.items()): - if not (base.startswith("${") and base.endswith("}")): - # Add missing ${...} - substitutions[f"${{{base}}}"] = substitutions.pop(base) - else: - substitutions = {} - - cfa_options["substitutions"] = substitutions - - # Initialise the output list of fields/domains - if domain: - out = DomainList() - else: - out = FieldList() - - if isinstance(aggregate, dict): - aggregate_options = aggregate.copy() - aggregate = True - else: - aggregate_options = {} - - aggregate_options["copy"] = False - - # Parse the extra parameter - if extra is None: - extra = () - elif isinstance(extra, str): - extra = (extra,) - - ftypes = set() - - # Count the number of fields (in all files) and the number of - # files - field_counter = -1 - file_counter = 0 - - if cdl_string: - files2 = [] - - # 'files' input may be a single string or a sequence of them and to - # handle both cases it is easiest to convert former to a one-item seq. - if isinstance(files, str): - files = [files] - - for cdl_file in files: - c = tempfile.NamedTemporaryFile( - mode="w", - dir=tempfile.gettempdir(), - prefix="cf_", - suffix=".cdl", - ) + implementation = implementation() + + @_manage_log_level_via_verbosity + def __new__( + cls, + files, + external=None, + verbose=None, + warnings=False, + aggregate=True, + nfields=None, + squeeze=False, + unsqueeze=False, + file_type=None, + cdl_string=False, + select=None, + extra=None, + recursive=False, + followlinks=False, + um=None, + chunk=True, + field=None, + height_at_top_of_model=None, + select_options=None, + follow_symlinks=False, + mask=True, + unpack=True, + warn_valid=False, + dask_chunks="storage-aligned", + store_dataset_chunks=True, + domain=False, + cfa=None, + cfa_write=None, + to_memory=None, + netcdf_backend=None, + storage_options=None, + cache=True, + chunks="auto", + ignore_read_error=False, + fmt=None, + ): + """Read field or domain constructs from a dataset.""" + if field: + _DEPRECATION_ERROR_FUNCTION_KWARGS( + "cf.read", + {"field": field}, + "Use keyword 'extra' instead", + removed_at="4.0.0", + ) # pragma: no cover - c_name = c.name - with open(c_name, "w") as f: - f.write(cdl_file) + if select_options: + _DEPRECATION_ERROR_FUNCTION_KWARGS( + "cf.read", + {"select_options": select_options}, + removed_at="4.0.0", + ) # pragma: no cover - # ---------------------------------------------------------------- - # Need to cache the TemporaryFile object so that it doesn't get - # deleted too soon - # ---------------------------------------------------------------- - _cached_temporary_files[c_name] = c + if follow_symlinks: + _DEPRECATION_ERROR_FUNCTION_KWARGS( + "cf.read", + {"follow_symlinks": follow_symlinks}, + "Use keyword 'followlink' instead.", + removed_at="4.0.0", + ) # pragma: no cover - files2.append(c.name) + if height_at_top_of_model is not None: + _DEPRECATION_ERROR_FUNCTION_KWARGS( + "cf.read", + {"height_at_top_of_model": height_at_top_of_model}, + "Use keyword 'um' instead.", + removed_at="4.0.0", + ) # pragma: no cover - files = files2 + if chunk is not True: + _DEPRECATION_ERROR_FUNCTION_KWARGS( + "cf.read", + {"chunk": chunk}, + "Use keyword 'dask_chunks' instead.", + version="3.14.0", + removed_at="5.0.0", + ) # pragma: no cover - for file_glob in flat(files): - # Expand variables - file_glob = os.path.expanduser(os.path.expandvars(file_glob)) + if chunks != "auto": + _DEPRECATION_ERROR_FUNCTION_KWARGS( + "cf.read", + {"chunk": chunk}, + "Use keyword 'dask_chunks' instead.", + version="3.14.0", + removed_at="5.0.0", + ) # pragma: no cover - scheme = urlparse(file_glob).scheme - if scheme in ("https", "http", "s3"): - # Do not glob a remote URL - files2 = (file_glob,) - else: - # Glob files on disk - files2 = glob(file_glob) - - if not files2 and not ignore_read_error: - open(file_glob, "rb") - - files3 = [] - for x in files2: - if isdir(x): - # Walk through directories, possibly recursively - for path, subdirs, filenames in os.walk( - x, followlinks=followlinks - ): - files3.extend(os.path.join(path, f) for f in filenames) - if not recursive: - break - else: - files3.append(x) - - files2 = files3 - - for filename in files2: - if info: - logger.info(f"File: {filename}") # pragma: no cover + if fmt is not None: + _DEPRECATION_ERROR_FUNCTION_KWARGS( + "cf.read", + {"fmt": fmt}, + "Use keyword 'file_type' instead.", + version="NEXTVERSION", + removed_at="5.0.0", + ) # pragma: no cover - if um: - ftype = "UM" - else: - try: - ftype = file_type(filename) - except Exception as error: - if not ignore_read_error: - raise ValueError(error) - - logger.warning(f"WARNING: {error}") # pragma: no cover - continue - - if domain and ftype == "UM": - raise ValueError( - f"Can't read PP/UM file {filename} into domain constructs" - ) - - ftypes.add(ftype) - - # -------------------------------------------------------- - # Read the file - # -------------------------------------------------------- - file_contents = _read_a_file( - filename, - ftype=ftype, - external=external, - ignore_read_error=ignore_read_error, - verbose=verbose, - warnings=warnings, - aggregate=aggregate, - aggregate_options=aggregate_options, - selected_fmt=fmt, - um=um, - extra=extra, - height_at_top_of_model=height_at_top_of_model, - dask_chunks=dask_chunks, - store_hdf5_chunks=store_hdf5_chunks, - mask=mask, - unpack=unpack, - warn_valid=warn_valid, - select=select, - domain=domain, - cfa_options=cfa_options, - netcdf_backend=netcdf_backend, - storage_options=storage_options, - cache=cache, - ) + if ignore_read_error: + _DEPRECATION_ERROR_FUNCTION_KWARGS( + "cf.read", + {"ignore_read_error": ignore_read_error}, + "Use keyword 'file_type' instead.", + version="NEXTVERSION", + removed_at="5.0.0", + ) # pragma: no cover - # -------------------------------------------------------- - # Select matching fields (not from UM files, yet) - # -------------------------------------------------------- - if select and ftype != "UM": - file_contents = file_contents.select_by_identity(*select) + info = cfdm.is_log_level_info(logger) - # -------------------------------------------------------- - # Add this file's contents to that already read from other - # files - # -------------------------------------------------------- - out.extend(file_contents) + cls.netcdf = NetCDFRead(cls.implementation) + cls.um = UMRead(cls.implementation) - field_counter = len(out) - file_counter += 1 + # ------------------------------------------------------------ + # Parse the 'select' keyword parameter + # ------------------------------------------------------------ + if isinstance(select, (str, Query, Pattern)): + select = (select,) - if info: - logger.info( - f"Read {field_counter} field{_plural(field_counter)} from " - f"{file_counter} file{_plural(file_counter)}" - ) # pragma: no cover + # ------------------------------------------------------------ + # Parse the 'aggregate' keyword parameter + # ------------------------------------------------------------ + if isinstance(aggregate, dict): + aggregate_options = aggregate.copy() + aggregate = True + else: + aggregate_options = {} + + aggregate_options["copy"] = False + + # ------------------------------------------------------------ + # Parse the 'file_type' keyword parameter + # ------------------------------------------------------------ + netCDF_file_types = set(("netCDF", "CDL")) + UM_file_types = set(("UM",)) + if file_type is not None: + if isinstance(file_type, str): + file_type = (file_type,) + + file_type = set(file_type) + + # ------------------------------------------------------------ + # Parse the 'um' keyword parameter + # ------------------------------------------------------------ + if not um: + um = {} + + # ------------------------------------------------------------ + # Parse the 'cdl_string' keyword parameter + # ------------------------------------------------------------ + if cdl_string and file_type is not None: + raise ValueError("Can't set file_type when cdl_string=True") + + # ------------------------------------------------------------ + # Parse the 'follow_symlinks' and 'recursive' keyword + # parameters + # ------------------------------------------------------------ + if follow_symlinks and not recursive: + raise ValueError( + f"Can't set follow_symlinks={follow_symlinks!r} " + f"when recursive={recursive!r}" + ) - # ---------------------------------------------------------------- - # Aggregate the output fields/domains - # ---------------------------------------------------------------- - if aggregate and len(out) > 1: - org_len = len(out) # pragma: no cover + # Initialise the output list of fields/domains + if domain: + out = DomainList() + else: + out = FieldList() + + # Count the number of fields (in all files) and the number of + # files + field_counter = -1 + file_counter = 0 + + if cdl_string: + if isinstance(files, str): + files = (files,) + + files = [ + NetCDFRead.string_to_cdl(cdl_string) for cdl_string in files + ] + file_type = set(("CDL",)) + + for file_glob in flat(files): + # Expand variables + file_glob = os.path.expanduser(os.path.expandvars(file_glob)) + + scheme = urlparse(file_glob).scheme + if scheme in ("https", "http", "s3"): + # Do not glob a remote URL + files2 = (file_glob,) + else: + # Glob files on disk + files2 = glob(file_glob) + + if not files2: + # Trigger a FileNotFoundError error + open(file_glob) + + files3 = [] + for x in files2: + if isdir(x): + # Walk through directories, possibly recursively + for path, subdirs, filenames in os.walk( + x, followlinks=followlinks + ): + files3.extend( + os.path.join(path, f) for f in filenames + ) + if not recursive: + break + else: + files3.append(x) + + files2 = files3 + + # The types of all of the input files + ftypes = set() + + for filename in files2: + if info: + logger.info(f"File: {filename}") # pragma: no cover + + # ---------------------------------------------------- + # Read the file + # ---------------------------------------------------- + file_contents = [] + + # The type of this file + ftype = None + + # Record file type errors + file_format_errors = [] + + if ftype is None and ( + file_type is None + or file_type.intersection(netCDF_file_types) + ): + # Try to read as netCDF + try: + file_contents = super().__new__( + cls, + filename=filename, + external=external, + extra=extra, + verbose=verbose, + warnings=warnings, + mask=mask, + unpack=unpack, + warn_valid=warn_valid, + domain=domain, + storage_options=storage_options, + netcdf_backend=netcdf_backend, + dask_chunks=dask_chunks, + store_dataset_chunks=store_dataset_chunks, + cache=cache, + cfa=cfa, + cfa_write=cfa_write, + to_memory=to_memory, + squeeze=squeeze, + unsqueeze=unsqueeze, + file_type=file_type, + ) + except DatasetTypeError as error: + if file_type is None: + file_format_errors.append(error) + else: + file_format_errors = [] + ftype = "netCDF" + + if ftype is None and ( + file_type is None or file_type.intersection(UM_file_types) + ): + # Try to read as UM + try: + file_contents = cls.um.read( + filename, + um_version=um.get("version"), + verbose=verbose, + set_standard_name=False, + height_at_top_of_model=height_at_top_of_model, + fmt=um.get("fmt"), + word_size=um.get("word_size"), + endian=um.get("endian"), + select=select, + squeeze=squeeze, + unsqueeze=unsqueeze, + domain=domain, + file_type=file_type, + unpack=unpack, + ) + except DatasetTypeError as error: + if file_type is None: + file_format_errors.append(error) + else: + file_format_errors = [] + ftype = "UM" + + if file_format_errors: + error = "\n".join(map(str, file_format_errors)) + raise DatasetTypeError(f"\n{error}") + + if domain: + file_contents = DomainList(file_contents) + + file_contents = FieldList(file_contents) + + if ftype: + ftypes.add(ftype) + + # Select matching fields (only for netCDF files at + # this stage - we'll other it for other file types + # later) + if select and ftype == "netCDF": + file_contents = file_contents.select_by_identity(*select) + + # Add this file's contents to that already read from + # other files + out.extend(file_contents) + + field_counter = len(out) + file_counter += 1 + + # ---------------------------------------------------------------- + # Aggregate the output fields/domains + # ---------------------------------------------------------------- + if aggregate and len(out) > 1: + org_len = len(out) # pragma: no cover + + if "UM" in ftypes: + # Set defaults specific to UM fields + if "strict_units" not in aggregate_options: + aggregate_options["relaxed_units"] = True + + out = cf_aggregate(out, **aggregate_options) + + n = len(out) # pragma: no cover + if info: + logger.info( + f"{org_len} input field{cls._plural(org_len)} " + f"aggregated into {n} field{cls._plural(n)}" + ) # pragma: no cover - out = cf_aggregate(out, **aggregate_options) + # ---------------------------------------------------------------- + # Sort by netCDF variable name + # ---------------------------------------------------------------- + if len(out) > 1: + out.sort(key=lambda f: f.nc_get_variable("")) + + # ---------------------------------------------------------------- + # Add standard names to UM/PP fields (post aggregation) + # ---------------------------------------------------------------- + for f in out: + standard_name = f._custom.get("standard_name", None) + if standard_name is not None: + f.set_property("standard_name", standard_name, copy=False) + del f._custom["standard_name"] + + # ---------------------------------------------------------------- + # Select matching fields from UM files (post setting of their + # standard names) + # ---------------------------------------------------------------- + if select and "UM" in ftypes: + out = out.select_by_identity(*select) - n = len(out) # pragma: no cover if info: logger.info( - f"{org_len} input field{_plural(org_len)} aggregated into " - f"{n} field{_plural(n)}" + f"Read {field_counter} field{cls._plural(field_counter)} " + f"from {file_counter} file{cls._plural(file_counter)}" ) # pragma: no cover - # ---------------------------------------------------------------- - # Sort by netCDF variable name - # ---------------------------------------------------------------- - if len(out) > 1: - out.sort(key=lambda f: f.nc_get_variable("")) - - # ---------------------------------------------------------------- - # Add standard names to UM/PP fields (post aggregation) - # ---------------------------------------------------------------- - for f in out: - standard_name = f._custom.get("standard_name", None) - if standard_name is not None: - f.set_property("standard_name", standard_name, copy=False) - del f._custom["standard_name"] - - # ---------------------------------------------------------------- - # Select matching fields from UM/PP fields (post setting of - # standard names) - # ---------------------------------------------------------------- - if select and "UM" in ftypes: - out = out.select_by_identity(*select) - - # ---------------------------------------------------------------- - # Squeeze size one dimensions from the data arrays. Do one of: - # - # 1) Squeeze the fields, i.e. remove all size one dimensions from - # all field data arrays - # - # 2) Unsqueeze the fields, i.e. Include all size 1 domain - # dimensions in the data array. - # - # 3) Nothing - # ---------------------------------------------------------------- - if not domain: - if squeeze: - for f in out: - f.squeeze(inplace=True) - elif unsqueeze: - for f in out: - f.unsqueeze(inplace=True) - - if nfields is not None and len(out) != nfields: - raise ValueError( - f"{nfields} field{_plural(nfields)} requested but " - f"{len(out)} field/domain constucts found in " - f"file{_plural(file_counter)}" - ) - - return out - - -def _plural(n): # pragma: no cover - """Return a suffix which reflects a word's plural.""" - return "s" if n != 1 else "" # pragma: no cover - - -@_manage_log_level_via_verbosity -def _read_a_file( - filename, - ftype=None, - aggregate=True, - aggregate_options=None, - ignore_read_error=False, - verbose=None, - warnings=False, - external=None, - selected_fmt=None, - um=None, - extra=None, - height_at_top_of_model=None, - mask=True, - unpack=True, - warn_valid=False, - dask_chunks="storage-aligned", - store_hdf5_chunks=True, - select=None, - domain=False, - cfa_options=None, - netcdf_backend=None, - storage_options=None, - cache=True, -): - """Read the contents of a single file into a field list. - - :Parameters: - - filename: `str` - See `cf.read` for details. - - ftype: `str` - The file format to interpret the file. Recognised formats are - ``'netCDF'``, ``'CDL'``, ``'UM'`` and ``'PP'``. - - aggregate_options: `dict`, optional - See `cf.read` for details. - - ignore_read_error: `bool`, optional - See `cf.read` for details. - - mask: `bool`, optional - See `cf.read` for details. - - unpack: `bool`, optional - See `cf.read` for details. - - verbose: `int` or `str` or `None`, optional - See `cf.read` for details. - - select: optional - For `read. Ignored for a netCDF file. - - domain: `bool`, optional - See `cf.read` for details. - - cfa_options: `dict`, optional - See `cf.read` for details. - - .. versionadded:: 3.15.0 - - storage_options: `dict` or `None`, optional - See `cf.read` for details. - - .. versionadded:: 1.11.2.0 - - netcdf_backend: `str` or `None`, optional - See `cf.read` for details. - - .. versionadded:: 1.11.2.0 - - cache: `bool`, optional - See `cf.read` for details. - - .. versionadded:: 1.11.2.0 - - :Returns: - - `FieldList` or `DomainList` - The field or domain constructs in the dataset. - - """ - if aggregate_options is None: - aggregate_options = {} - - # Find this file's type - fmt = None - word_size = None - endian = None - height_at_top_of_model = None - umversion = 405 - - if um: - fmt = um.get("fmt") - word_size = um.get("word_size") - endian = um.get("endian") - umversion = um.get("version", umversion) - height_at_top_of_model = um.get("height_at_top_of_model") - - if fmt is not None: - fmt = fmt.upper() - - if umversion is not None: - umversion = float(str(umversion).replace(".", "0", 1)) - - extra_read_vars = { - "fmt": selected_fmt, - "ignore_read_error": ignore_read_error, - "cfa_options": cfa_options, - } - - # ---------------------------------------------------------------- - # Still here? Read the file into fields or domains. - # ---------------------------------------------------------------- - originally_cdl = ftype == "CDL" - if originally_cdl: - # Create a temporary netCDF file from input CDL - ftype = "netCDF" - cdl_filename = filename - filename = netcdf.cdl_to_netcdf(filename) - extra_read_vars["fmt"] = "NETCDF" - - if not netcdf.is_netcdf_file(filename): - error_msg = ( - f"Can't determine format of file {filename} generated " - f"from CDL file {cdl_filename}" - ) - if ignore_read_error: - logger.warning(error_msg) # pragma: no cover - return FieldList() - else: - raise IOError(error_msg) - - if ftype == "netCDF" and extra_read_vars["fmt"] in (None, "NETCDF", "CFA"): - # See https://github.com/NCAS-CMS/cfdm/issues/128 for context on the - # try/except here, which acts as a temporary fix pending decisions on - # the best way to handle CDL with only header or coordinate info. - try: - out = netcdf.read( - filename, - external=external, - extra=extra, - verbose=verbose, - warnings=warnings, - extra_read_vars=extra_read_vars, - mask=mask, - unpack=unpack, - warn_valid=warn_valid, - domain=domain, - storage_options=storage_options, - netcdf_backend=netcdf_backend, - dask_chunks=dask_chunks, - store_hdf5_chunks=store_hdf5_chunks, - cache=cache, - ) - except MaskError: - # Some data required for field interpretation is missing, - # manifesting downstream as a NumPy MaskError. - if originally_cdl: - raise ValueError( - "Unable to convert CDL without data to field construct(s) " - "because there is insufficient information provided by " - "the header and/or coordinates alone in this case." - ) - else: - raise ValueError( - "Unable to convert netCDF to field or domain construct " - "because there is missing data." - ) - - elif ftype == "UM" and extra_read_vars["fmt"] in (None, "UM"): - if domain: + if nfields is not None and len(out) != nfields: raise ValueError( - "Can't set domain=True when reading UM or PP datasets" + f"{nfields} field{cls._plural(nfields)} requested but " + f"{len(out)} field/domain constuct{cls._plural(len(out))}" + f" found in file{cls._plural(file_counter)}" ) - out = UM.read( - filename, - um_version=umversion, - verbose=verbose, - set_standard_name=False, - height_at_top_of_model=height_at_top_of_model, - fmt=fmt, - word_size=word_size, - endian=endian, - select=select, - ) - - # PP fields are aggregated intrafile prior to interfile - # aggregation - if aggregate: - # For PP fields, the default is strict_units=False - if "strict_units" not in aggregate_options: - aggregate_options["relaxed_units"] = True - - # ---------------------------------------------------------------- - # Return the fields - # ---------------------------------------------------------------- - if domain: - return DomainList(out) - - return FieldList(out) - - -def file_type(filename): - """Return the file format. + return out - :Parameters: - - filename: `str` - The file name. - - :Returns: - - `str` - The format type of the file. One of ``'netCDF'``, ``'UM'`` - or ``'CDL'``. - - **Examples** - - >>> file_type(filename) - 'netCDF' - - """ - # ---------------------------------------------------------------- - # NetCDF - # ---------------------------------------------------------------- - if netcdf.is_netcdf_file(filename): - return "netCDF" - - # ---------------------------------------------------------------- - # PP or FF - # ---------------------------------------------------------------- - if UM.is_um_file(filename): - return "UM" - - # ---------------------------------------------------------------- - # CDL - # ---------------------------------------------------------------- - if netcdf.is_cdl_file(filename): - return "CDL" - - # Still here? - raise IOError(f"Can't determine format of file {filename}") + @staticmethod + def _plural(n): # pragma: no cover + """Return a suffix which reflects a word's plural.""" + return "s" if n != 1 else "" # pragma: no cover diff --git a/cf/read_write/um/umread.py b/cf/read_write/um/umread.py index e73166eba1..112c0b857a 100644 --- a/cf/read_write/um/umread.py +++ b/cf/read_write/um/umread.py @@ -9,6 +9,7 @@ import dask.array as da import numpy as np from cfdm import Constructs, is_log_level_info +from cfdm.read_write.exceptions import DatasetTypeError from dask.array.core import getter, normalize_chunks from dask.base import tokenize from netCDF4 import date2num as netCDF4_date2num @@ -28,9 +29,6 @@ from ...umread_lib.umfile import File from ...units import Units -# import numpy as np - - logger = logging.getLogger(__name__) _cached_runid = {} @@ -491,6 +489,9 @@ def __init__( implementation=None, select=None, info=False, + squeeze=False, + unsqueeze=False, + unpack=True, **kwargs, ): """**Initialisation** @@ -540,11 +541,44 @@ def __init__( increasing verbosity, the more description that is printed about the read process. + squeeze: `bool`, optional + If True then remove all size 1 dimensions from field + construct data arrays, regardless of how the data are + stored in the dataset. If False (the default) then the + presence or not of size 1 dimensions is determined by + how the data are stored in its dataset. + + .. versionadded:: NEXTVERSION + + unsqueeze: `bool`, optional + If True then ensure that field construct data arrays + span all of the size 1 dimensions, regardless of how + the data are stored in the dataset. If False (the + default) then the presence or not of size 1 dimensions + is determined by how the data are stored in its + dataset. + + .. versionadded:: NEXTVERSION + + unpack: `bool`, optional + If True, the default, then unpack arrays by convention + when the data is read from disk. + + Unpacking is determined by netCDF conventions for the + following variable attributes ``add_offset`` and + ``scale_factor``, as applied to lookup header entries + BDATUM and BMKS respectively. + + .. versionadded:: NEXTVERSION + kwargs: *optional* Keyword arguments providing extra CF properties for each return field construct. """ + if squeeze and unsqueeze: + raise ValueError("'squeeze' and 'unsqueeze' can not both be True") + self._bool = False self.info = info @@ -557,6 +591,7 @@ def __init__( self.height_at_top_of_model = height_at_top_of_model self.byte_ordering = byte_ordering self.word_size = word_size + self.unpack = unpack self.atol = cf_atol() @@ -1114,6 +1149,16 @@ def __init__( self.fields.append(field) + # ------------------------------------------------------------ + # Squeeze/unsqueeze size 1 axes in field constructs + # ------------------------------------------------------------ + if unsqueeze: + for f in self.fields: + f.unsqueeze(inplace=True) + elif squeeze: + for f in self.fields: + f.squeeze(inplace=True) + self._bool = True def __bool__(self): @@ -1993,6 +2038,7 @@ def create_data(self): klass_name = UMArray().__class__.__name__ fmt = self.fmt + unpack = self.unpack if len(recs) == 1: # -------------------------------------------------------- @@ -2018,6 +2064,7 @@ def create_data(self): word_size=self.word_size, byte_ordering=self.byte_ordering, attributes=attributes, + unpack=unpack, ) key = f"{klass_name}-{tokenize(subarray)}" @@ -2071,6 +2118,7 @@ def create_data(self): word_size=word_size, byte_ordering=byte_ordering, attributes=attributes, + unpack=unpack, ) key = f"{klass_name}-{tokenize(subarray)}" @@ -2121,6 +2169,7 @@ def create_data(self): word_size=word_size, byte_ordering=byte_ordering, attributes=attributes, + unpack=unpack, ) key = f"{klass_name}-{tokenize(subarray)}" @@ -2150,7 +2199,7 @@ def create_data(self): # Create the Data object data = Data(dx, units=um_Units, fill_value=fill_value) - data._cfa_set_write(True) + data._nc_set_aggregation_write_status(True) self.data = data self.data_axes = data_axes @@ -3345,7 +3394,7 @@ class UMRead(cfdm.read_write.IORead): def read( self, filename, - um_version=405, + um_version=None, aggregate=True, endian=None, word_size=None, @@ -3355,6 +3404,12 @@ def read( chunk=True, verbose=None, select=None, + squeeze=False, + unsqueeze=False, + domain=False, + file_type=None, + ignore_unknown_type=False, + unpack=True, ): """Read fields from a PP file or UM fields file. @@ -3404,18 +3459,49 @@ def read( set_standard_name: `bool`, optional - select: (sequence of) `str` or `Query` or `re.Pattern`, optional - Only return field constructs whose identities match the - given values(s), i.e. those fields ``f`` for which - ``f.match_by_identity(*select)`` is `True`. See - `cf.Field.match_by_identity` for details. - - This is equivalent to, but faster than, not using the - *select* parameter but applying its value to the returned - field list with its `cf.FieldList.select_by_identity` - method. For example, ``fl = cf.read(file, - select='stash_code=3236')`` is equivalent to ``fl = - cf.read(file).select_by_identity('stash_code=3236')``. + select: (sequence of) `str` or `Query` or `re.Pattern`, optional + Only return field constructs whose identities match + the given values(s), i.e. those fields ``f`` for which + ``f.match_by_identity(*select)`` is `True`. See + `cf.Field.match_by_identity` for details. + + This is equivalent to, but faster than, not using the + *select* parameter but applying its value to the + returned field list with its + `cf.FieldList.select_by_identity` method. For example, + ``fl = cf.read(file, select='stash_code=3236')`` is + equivalent to ``fl = + cf.read(file).select_by_identity('stash_code=3236')``. + + squeeze: `bool`, optional + If True then remove all size 1 dimensions from field + construct data arrays, regardless of how the data are + stored in the dataset. If False (the default) then the + presence or not of size 1 dimensions is determined by + how the data are stored in its dataset. + + .. versionadded:: NEXTVERSION + + unsqueeze: `bool`, optional + If True then ensure that field construct data arrays + span all of the size 1 dimensions, regardless of how + the data are stored in the dataset. If False (the + default) then the presence or not of size 1 dimensions + is determined by how the data are stored in its + dataset. + + .. versionadded:: NEXTVERSION + + unpack: `bool`, optional + If True, the default, then unpack arrays by convention + when the data is read from disk. + + Unpacking is determined by netCDF conventions for the + following variable attributes ``add_offset`` and + ``scale_factor``, as applied to lookup header entries + BDATUM and BMKS respectively. + + .. versionadded:: NEXTVERSION :Returns: @@ -3428,6 +3514,12 @@ def read( >>> f = read('*/file[0-9].pp', um_version=708) """ + if domain: + raise ValueError( + "Can't read Domain constructs from UM or PP datasets " + "(only Field constructs)" + ) + if not _stash2standard_name: # -------------------------------------------------------- # Create the STASH code to standard_name conversion @@ -3440,6 +3532,14 @@ def read( else: byte_ordering = None + if fmt is not None: + fmt = fmt.upper() + + if um_version is None: + um_version = 405 + else: + um_version = float(str(um_version).replace(".", "0", 1)) + self.read_vars = { "filename": filename, "byte_ordering": byte_ordering, @@ -3454,6 +3554,18 @@ def read( else: byte_ordering = None + # ------------------------------------------------------------ + # Parse the 'file_type' keyword parameter + # ------------------------------------------------------------ + if file_type is not None: + if isinstance(file_type, str): + file_type = (file_type,) + + file_type = set(file_type) + if not file_type.intersection(("UM",)): + # Return now if there are valid file types + return [] + f = self.file_open(filename, parse=True) info = is_log_level_info(logger) @@ -3472,6 +3584,7 @@ def read( implementation=self.implementation, select=select, info=info, + unpack=unpack, ) for var in f.vars ] @@ -3519,13 +3632,15 @@ def _open_um_file( fmt=fmt, parse=parse, ) - except Exception as error: + except Exception: try: f.close_fd() except Exception: pass - raise Exception(error) + raise DatasetTypeError( + f"Can't interpret {filename} as a PP or UM dataset" + ) self._um_file = f return f diff --git a/cf/read_write/write.py b/cf/read_write/write.py index 23a8dda3cd..b6b3f55ae9 100644 --- a/cf/read_write/write.py +++ b/cf/read_write/write.py @@ -1,831 +1,7 @@ -import numpy +import cfdm from ..cfimplementation import implementation -from ..decorators import _manage_log_level_via_verbosity -from ..functions import ( - _DEPRECATION_ERROR_FUNCTION_KWARG, - _DEPRECATION_ERROR_FUNCTION_KWARG_VALUE, - CFA, - flat, -) -from .netcdf import NetCDFWrite -netcdf = NetCDFWrite(implementation()) - -@_manage_log_level_via_verbosity -def write( - fields, - filename, - fmt="NETCDF4", - mode="w", - overwrite=True, - global_attributes=None, - file_descriptors=None, - external=None, - Conventions=None, - datatype=None, - least_significant_digit=None, - endian="native", - compress=0, - fletcher32=False, - shuffle=True, - reference_datetime=None, - verbose=None, - cfa=False, - single=None, - double=None, - variable_attributes=None, - string=True, - warn_valid=True, - group=True, - coordinates=False, - omit_data=None, - cfa_options=None, -): - """Write field constructs to a netCDF file. - - **File format** - - See the *fmt* parameter for details on which output netCDF file - formats are supported. - - - **NetCDF variable and dimension names** - - These names are stored within constructs read a from dataset, or - may be set manually. They are used when writing a field construct - to the file. If a name has not been set then one will be - constructed (usually based on the standard name if it exists). The - names may be modified internally to prevent duplication in the - file. - - Each construct, or construct component, that corresponds to a - netCDF variable has the following methods to get, set and remove a - netCDF variable name: `!nc_get_variable`, `!nc_set_variable` and - `!nc_del_variable` method - - The domain axis construct has the following methods to get, set - and remove a netCDF dimension name: - `~cf.DomainAxis.nc_get_dimension`, - `~cf.DomainAxis.nc_set_dimension` and - `~cf.DomainAxis.nc_del_dimension`. - - - **NetCDF attributes** - - Field construct properties may be written as netCDF global - attributes and/or netCDF data variable attributes. See the - *file_descriptors*, *global_attributes* and *variable_attributes* - parameters for details. - - - **External variables** - - Metadata constructs marked as external are omitted from the file - and referred to via the netCDF "external_variables" global - attribute. However, omitted constructs may be written to an - external file (see the *external* parameter for details). - - - **NetCDF unlimited dimensions** - - Domain axis constructs that correspond to NetCDF unlimited - dimensions may be accessed with the - `~cf.DomainAxis.nc_is_unlimited` and - `~cf.DomainAxis.nc_set_unlimited` methods of a domain axis - construct. - - - **NetCDF-4 hierarchical groups** - - Hierarchical groups in CF provide a mechanism to structure - variables within netCDF-4 datasets with well defined rules for - resolving references to out-of-group netCDF variables and - dimensions. The group structure defined by a field construct's - netCDF interface will, by default, be recreated in the output - dataset. See the *group* parameter for details. - - - **NetCDF-4 HDF chunk sizes** - - HDF5 chunksizes may be set on contruct's data. See the - `~cf.Data.nc_hdf5_chunksizes`, `~cf.Data.nc_clear_hdf5_chunksizes` - and `~cf.Data.nc_set_hdf5_chunksizes` methods of a `Data` - instance. - - .. seealso:: `cf.read` - - :Parameters: - - fields: (arbitrarily nested sequence of) `Field` or `FieldList` - The field constructs to write to the file. - - filename: `str` - The output netCDF file name. Various type of expansion are - applied to the file names. - - Relative paths are allowed, and standard tilde and shell - parameter expansions are applied to the string. - - *Parameter example:* - The file file.nc in the user’s home directory could be - described by any of the following: '``$HOME/file.nc'``, - ``'${HOME}/file.nc'``, ``'~/file.nc'``, - ``'~/tmp/../file.nc'``. - - fmt: `str`, optional - The format of the output file. One of: - - ========================== ============================== - *fmt* Output file type - ========================== ============================== - ``'NETCDF4'`` NetCDF4 format file. This is - the default. - - ``'NETCDF4_CLASSIC'`` NetCDF4 classic format file - (see below) - - ``'NETCDF3_CLASSIC'`` NetCDF3 classic format file - (limited to file sizes less - than 2GB). - - ``'NETCDF3_64BIT_OFFSET'`` NetCDF3 64-bit offset format - file - - ``'NETCDF3_64BIT'`` An alias for - ``'NETCDF3_64BIT_OFFSET'`` - - ``'NETCDF3_64BIT_DATA'`` NetCDF3 64-bit offset format - file with extensions (see - below) - - ``'CFA'`` or ``'CFA4'`` Deprecated at version - 3.15.0. See the *cfa* - parameter. - - ``'CFA3'`` Deprecated at version - 3.15.0. See the *cfa* - parameter. - ========================== ============================== - - By default the format is ``'NETCDF4'``. - - ``'NETCDF3_64BIT_DATA'`` is a format that requires version - 4.4.0 or newer of the C library (use `cf.environment` to - see which version if the netCDF-C library is in use). It - extends the ``'NETCDF3_64BIT_OFFSET'`` binary format to - allow for unsigned/64 bit integer data types and 64-bit - dimension sizes. - - ``'NETCDF4_CLASSIC'`` files use the version 4 disk format - (HDF5), but omits features not found in the version 3 - API. They can be read by HDF5 clients. They can also be - read by netCDF3 clients only if they have been re-linked - against the netCDF4 library. - - ``'NETCDF4'`` files use the version 4 disk format (HDF5) - and use the new features of the version 4 API. - - mode: `str`, optional - Specify the mode of write access for the output file. One of: - - ======== ================================================= - *mode* Description - ======== ================================================= - ``'w'`` Open a new file for writing to. If it exists and - *overwrite* is True then the file is deleted - prior to being recreated. - - ``'a'`` Open an existing file for appending new - information to. The new information will be - incorporated whilst the original contents of the - file will be preserved. - - In practice this means that new fields will be - created, whilst the original fields will not be - edited at all. Coordinates on the fields, where - equal, will be shared as standard. - - For append mode, note the following: - - * Global attributes on the file - will remain the same as they were originally, - so will become inaccurate where appended fields - have incompatible attributes. To rectify this, - manually inspect and edit them as appropriate - after the append operation using methods such as - `nc_clear_global_attributes` and - `nc_set_global_attribute`. - - * Fields with incompatible ``featureType`` to - the original file cannot be appended. - - * At present fields with groups cannot be - appended, but this will be possible in a future - version. Groups can however be cleared, the - fields appended, and groups re-applied, via - methods such as `nc_clear_variable_groups` and - `nc_set_variable_groups`, to achieve the same - for now. - - * At present domain ancillary constructs of - appended fields may not be handled correctly - and can appear as extra fields. Set them on the - resultant fields using `set_domain_ancillary` - and similar methods if required. - - ``'r+'`` Alias for ``'a'``. - - ======== ================================================= - - By default the file is opened with write access mode - ``'w'``. - - overwrite: `bool`, optional - If False then raise an error if the output file - pre-exists. By default a pre-existing output file is - overwritten. - - Conventions: (sequence of) `str`, optional - Specify conventions to be recorded by the netCDF global - "Conventions" attribute. By default the current - conventions are always included, but if an older CF - conventions is defined then this is used instead. - - *Parameter example:* - ``Conventions='UGRID-1.0'`` - - *Parameter example:* - ``Conventions=['UGRID-1.0']`` - - *Parameter example:* - ``Conventions=['CMIP-6.2', 'UGRID-1.0']`` - - *Parameter example:* - ``Conventions='CF-1.7'`` - - *Parameter example:* - ``Conventions=['CF-1.7', 'UGRID-1.0']`` - - Note that if the "Conventions" property is set on a field - construct then it is ignored. - - file_descriptors: `dict`, optional - Create description of file contents netCDF global - attributes from the specified attributes and their - values. - - If any field construct has a property with the same name - then it will be written as a netCDF data variable - attribute, even if it has been specified by the - *global_attributes* parameter, or has been flagged as - global on any of the field constructs (see - `cf.Field.nc_global_attributes` for details). - - Identification of the conventions being adhered to by the - file are not specified as a file descriptor, but by the - *Conventions* parameter instead. - - *Parameter example:* - ``file_attributes={'title': 'my data'}`` - - *Parameter example:* - ``file_attributes={'history': 'created 2019-01-01', 'foo': 'bar'}`` - - global_attributes: (sequence of) `str`, optional - Create netCDF global attributes from the specified field - construct properties, rather than netCDF data variable - attributes. - - These attributes are in addition to the following field - construct properties, which are created as netCDF global - attributes by default: - - * the description of file contents properties (as defined - by the CF conventions), and - - * properties flagged as global on any of the field - constructs being written (see - `cf.Field.nc_global_attributes` for details). - - Note that it is not possible to create a netCDF global - attribute from a property that has different values for - different field constructs being written. In this case - the property will not be written as a netCDF global - attribute, even if it has been specified by the - *global_attributes* parameter or is one of the default - properties, but will appear as an attribute on the netCDF - data variable corresponding to each field construct that - contains the property. - - Any global attributes that are also specified as file - descriptors will not be written as netCDF global - variables, but as netCDF data variable attributes - instead. - - *Parameter example:* - ``global_attributes='project'`` - - *Parameter example:* - ``global_attributes=['project']`` - - *Parameter example:* - ``global_attributes=['project', 'experiment']`` - - variable_attributes: (sequence of) `str`, optional - Create netCDF data variable attributes from the specified - field construct properties. - - By default, all field construct properties that are not - created as netCDF global properties are created as - attributes netCDF data variables. See the - *global_attributes* parameter for details. - - Any field construct property named by the - *variable_attributes* parameter will always be created as - a netCDF data variable attribute - - *Parameter example:* - ``variable_attributes='project'`` - - *Parameter example:* - ``variable_attributes=['project']`` - - *Parameter example:* - ``variable_attributes=['project', 'doi']`` - - external: `str`, optional - Write metadata constructs that have data and are marked as - external to the named external file. Ignored if there are - no such constructs. - - endian: `str`, optional - The endian-ness of the output file. Valid values are - ``'little'``, ``'big'`` or ``'native'``. By default the - output is native endian. See the `netCDF4 package - `_ for more - details. - - *Parameter example:* - ``endian='big'`` - - compress: `int`, optional - Regulate the speed and efficiency of compression. Must be - an integer between ``0`` and ``9``. ``0`` means no - compression; ``1`` is the fastest, but has the lowest - compression ratio; ``9`` is the slowest but best - compression ratio. The default value is ``0``. An error is - raised if compression is requested for a netCDF3 output - file format. See the `netCDF4 package - `_ for more - details. - - *Parameter example:* - ``compress=4`` - - least_significant_digit: `int`, optional - Truncate the input field construct data arrays, but not - the data arrays of metadata constructs. For a given - positive integer, N the precision that is retained in the - compressed data is 10 to the power -N. For example, a - value of 2 will retain a precision of 0.01. In conjunction - with the *compress* parameter this produces 'lossy', but - significantly more efficient, compression. See the - `netCDF4 package - `_ for more - details. - - *Parameter example:* - ``least_significant_digit=3`` - - fletcher32: `bool`, optional - If True then the Fletcher-32 HDF5 checksum algorithm is - activated to detect compression errors. Ignored if - *compress* is ``0``. See the `netCDF4 package - `_ for details. - - shuffle: `bool`, optional - If True (the default) then the HDF5 shuffle filter (which - de-interlaces a block of data before compression by - reordering the bytes by storing the first byte of all of a - variable's values in the chunk contiguously, followed by - all the second bytes, and so on) is turned off. By default - the filter is applied because if the data array values are - not all wildly different, using the filter can make the - data more easily compressible. Ignored if the *compress* - parameter is ``0`` (which is its default value). See the - `netCDF4 package - `_ for more - details. - - datatype: `dict`, optional - Specify data type conversions to be applied prior to - writing data to disk. This may be useful as a means of - packing, or because the output format does not support a - particular data type (for example, netCDF3 classic files - do not support 64-bit integers). By default, input data - types are preserved. Any data type conversion is only - applied to the arrays on disk, and not to the input field - constructs themselves. - - Data types conversions are defined by `numpy.dtype` - objects in a dictionary whose keys are input data types - with values of output data types. - - *Parameter example:* - To convert 64-bit integers to 32-bit integers: - ``datatype={numpy.dtype('int64'): - numpy.dtype('int32')}``. - - single: `bool`, optional - If True then write 64-bit floats as 32-bit floats and - 64-bit integers as 32-bit integers. - - If False then write 32-bit floats as 64-bit floats and - 32-bit integers as 64-bit integers. - - By default, input data types are preserved. - - .. note:: ``single=True`` is exactly equivalent to - ``double=False``, as well as - ``datatype={numpy.dtype(float): - numpy.dtype('float32'), numpy.dtype(int): - numpy.dtype('int32')}``. - - ``single=False`` is exactly equivalent to - ``double=True``. - - double: `bool`, optional - If True then write 32-bit floats as 64-bit floats and - 32-bit integers as 64-bit integers. - - If False then write 64-bit floats as 32-bit floats and - 64-bit integers as 32-bit integers. - - By default, input data types are preserved. - - .. note:: ``double=True`` is exactly equivalent to - ``single=False``, as well as - ``datatype={numpy.dtype('float32'): - numpy.dtype(float), numpy.dtype('int32'): - numpy.dtype(int)}``. - - ``double=False`` is exactly equivalent to - ``single=True``. - - string: `bool`, optional - By default string-valued construct data are written as - netCDF arrays of type string if the output file format is - ``'NETCDF4'``, or of type char with an extra dimension - denoting the maximum string length for any other output - file format (see the *fmt* parameter). If *string* is False - then string-valued construct data are written as netCDF - arrays of type char with an extra dimension denoting the - maximum string length, regardless of the selected output - file format. - - verbose: `int` or `str` or `None`, optional - If an integer from ``-1`` to ``3``, or an equivalent string - equal ignoring case to one of: - - * ``'DISABLE'`` (``0``) - * ``'WARNING'`` (``1``) - * ``'INFO'`` (``2``) - * ``'DETAIL'`` (``3``) - * ``'DEBUG'`` (``-1``) - - set for the duration of the method call only as the minimum - cut-off for the verboseness level of displayed output (log) - messages, regardless of the globally-configured `cf.log_level`. - Note that increasing numerical value corresponds to increasing - verbosity, with the exception of ``-1`` as a special case of - maximal and extreme verbosity. - - Otherwise, if `None` (the default value), output messages will - be shown according to the value of the `cf.log_level` setting. - - Overall, the higher a non-negative integer or equivalent string - that is set (up to a maximum of ``3``/``'DETAIL'``) for - increasing verbosity, the more description that is printed to - convey how constructs map to output netCDF dimensions, variables - and attributes. - - warn_valid: `bool`, optional - If False then do not print a warning when writing - "out-of-range" data, as indicated by the values, if - present, of any of the ``valid_min``, ``valid_max`` or - ``valid_range`` properties on field and metadata - constructs that have data. By default a warning is printed - if any such construct has any of these properties in - combination with out-of-range data. - - The consequence of writing out-of-range data values is - that, by default, these values will be masked when the - file is subsequently read. - - *Parameter example:* - If a construct has ``valid_max`` property with value - ``100`` and data with maximum value ``999``, then the - resulting warning may be suppressed by setting - ``warn_valid=False``. - - .. versionadded:: 3.4.0 - - group: `bool`, optional - If False then create a "flat" netCDF file, i.e. one with - only the root group, regardless of any group structure - specified by the field constructs. By default any groups - defined by the netCDF interface of the field constructs - and its components will be created and populated. - - .. versionadded:: 3.6.0 - - coordinates: `bool`, optional - If True then include CF-netCDF coordinate variable names - in the 'coordinates' attribute of output data - variables. By default only auxiliary and scalar coordinate - variables are included. - - .. versionadded:: 3.7.0 - - omit_data: (sequence of) `str`, optional - Do not write the data of the named construct types. - - This does not affect the amount of netCDF variables and - dimensions that are written to the file, nor the netCDF - variables' attributes, but does not create data on disk - for the requested variables. The resulting file will be - smaller than it otherwise would have been, and when the - new file is read then the data of these variables will be - represented by an array of all missing data. - - The *omit_data* parameter may be one, or a sequence, of: - - ========================== =============================== - *omit_data* Construct types - ========================== =============================== - ``'field'`` Field constructs - ``'field_ancillary'`` Field ancillary constructs - ``'domain_ancillary'`` Domain ancillary constructs - ``'dimension_coordinate'`` Dimension coordinate constructs - ``'auxiliary_coordinate'`` Auxiliary coordinate constructs - ``'cell_measure'`` Cell measure constructs - ``'all'`` All of the above constructs - ========================== =============================== - - *Parameter example:* - To omit the data from only field constructs: - ``omit_data='field'`` or ``omit_data=['field']``. - - *Parameter example:* - To omit the data from domain ancillary and cell measure - constructs: ``omit_data=['domain_ancillary', - 'cell_measure']``. - - .. versionadded:: 3.14.0 - - cfa: `bool` or `dict`, optional - If True or a (possibly empty) dictionary then write the - constructs as CFA-netCDF aggregation variables, where - possible and where requested. - - The netCDF format of the CFA-netCDF file is determined by - the *fmt* parameter, as usual. - - If *cfa* is a dictionary then it is used to configure the - CFA write process. The default options when CFA writing is - enabled are ``{'constructs': 'field', 'absolute_paths': - True, 'strict': True, 'substitutions': {}}``, and the - dictionary may have any subset of the following key/value - pairs to override these defaults: - - * ``'constructs'``: `dict` or (sequence of) `str` - - The types of construct to be written as CFA-netCDF - aggregation variables. By default only field constructs - are written as CFA-netCDF aggregation variables. - - The types may be given as a (sequence of) `str`, which - may take any of the values allowed by the *omit_data* - parameter. Alternatively, the same types may be given as - keys to a `dict` whose values specify the number of - dimensions that a construct must also have if it is to - be written as a CFA-netCDF aggregation variable. A value - of `None` means no restriction on the number of - dimensions, which is equivalent to a value of - ``cf.ge(0)``. - - *Example:* - Equivalent ways to only write cell measure constructs - as CFA-netCDF aggregation variables: - ``'cell_measure``, ``['cell_measure']``, - ``{'cell_measure': None}``, ``{'cell_measure': - cf.ge(0)}`` - - *Example:* - Equivalent ways to only write field and auxiliary - coordinate constructs as CFA-netCDF aggregation - variables: ``('field', 'auxiliary_coordinate')`` and - ``{'field': None, 'auxiliary_coordinate': None}``. - - *Example:* - Equivalent ways to only write two-dimensional - auxiliary coordinate constructs as CFA-netCDF - aggregation variables: ``{'auxiliary_coordinate': - 2}`` and ``{'auxiliary_coordinate': cf.eq(2)}``. - - *Example:* - Only write auxiliary coordinate constructs with two or - more dimensions as CFA-netCDF variables, and also all - field constructs: ``{'field': None, - 'auxiliary_coordinate': cf.ge(2)}``. - - * ``'absolute_paths'``: `bool` - - How to write fragment file names. Set to True (the - default) for them to be written as fully qualified URIs, - or else set to False for them to be written as local - paths relative to the location of the CFA-netCDF file - being created. - - * ``'strict'``: `bool` - - If True (the default) then an exception is raised if it - is not possible to create a CFA aggregation variable - from data identified by the ``'constructs'`` option. If - False then a normal CF-netCDF variable will be written - in this case. - - * ``'substitutions'``: `dict` - - A dictionary whose key/value pairs define text - substitutions to be applied to the fragment file - names. Each key may be specified with or without the - ``${...}`` syntax. For instance, the following are - equivalent: ``{'base': 'sub'}``, ``{'${base}': 'sub'}``. - The substitutions are used in conjunction with, and take - precedence over, any that are also defined on individual - constructs (see `cf.Data.cfa_update_file_substitutions` - for details). - - Substitutions are stored in the output file by the - ``substitutions`` attribute of the ``file`` CFA - aggregation instruction variable. - - *Example:* - ``{'base': 'file:///data/'}`` - - .. versionadded:: 3.15.0 - - cfa_options: Deprecated at version 3.15.0 - Use the *cfa* parameter instead. - - :Returns: - - `None` - - **Examples** - - There are further worked examples - :ref:`in the tutorial `. - - >>> cf.write(f, 'file.nc') - - >>> cf.write(f, 'file.nc', fmt='NETCDF3_CLASSIC') - - >>> cf.write(f, 'file.nc', external='cell_measures.nc') - - >>> cf.write(f, 'file.nc', Conventions='CMIP-6.2') - - """ - if fmt in ("CFA", "CFA4", "CFA3"): - return _DEPRECATION_ERROR_FUNCTION_KWARG_VALUE( - "cf.write", - "fmt", - fmt, - "Use the 'cfa' keyword instead.", - version="3.15.0", - removed_at="5.0.0", - ) # pragma: no cover - - if cfa_options is not None: - return _DEPRECATION_ERROR_FUNCTION_KWARG( - "cf.write", - "cfa_options", - "Use keyword 'cfa' instead.", - version="3.15.0", - removed_at="5.0.0", - ) # pragma: no cover - - # Flatten the sequence of intput fields - fields = tuple(flat(fields)) - if fields: - # double and single - if datatype: - if single is not None: - raise ValueError("Can't set datatype and single") - if double is not None: - raise ValueError("Can't set datatype and double") - - if single is not None and double is not None: - raise ValueError("Can't set both the single and double parameters") - - if single is not None and not single: - double = True - - if double is not None and not double: - single = True - - if single: - datatype = { - numpy.dtype(float): numpy.dtype("float32"), - numpy.dtype(int): numpy.dtype("int32"), - } - - if double: - datatype = { - numpy.dtype("float32"): numpy.dtype(float), - numpy.dtype("int32"): numpy.dtype(int), - } - - # Extra write variables - extra_write_vars = {"reference_datetime": reference_datetime} - - # ------------------------------------------------------------ - # CFA - # ------------------------------------------------------------ - if isinstance(cfa, dict): - cfa_options = cfa.copy() - cfa = True - else: - cfa_options = {} - cfa = bool(cfa) - - if cfa: - # Add CFA to the Conventions - cfa_conventions = f"CFA-{CFA()}" - if not Conventions: - Conventions = cfa_conventions - elif isinstance(Conventions, str): - Conventions = (Conventions, cfa_conventions) - else: - Conventions = tuple(Conventions) + (cfa_conventions,) - - keys = ("constructs", "absolute_paths", "strict", "substitutions") - if not set(cfa_options).issubset(keys): - raise ValueError( - "Invalid dictionary key to the 'cfa_options' " - f"parameter. Valid keys are {keys}. Got: {cfa_options}" - ) - - cfa_options.setdefault("constructs", "field") - cfa_options.setdefault("absolute_paths", True) - cfa_options.setdefault("strict", True) - cfa_options.setdefault("substitutions", {}) - - constructs = cfa_options["constructs"] - if isinstance(constructs, dict): - cfa_options["constructs"] = constructs.copy() - else: - if isinstance(constructs, str): - constructs = (constructs,) - - cfa_options["constructs"] = {c: None for c in constructs} - - substitutions = cfa_options["substitutions"].copy() - for base, sub in tuple(substitutions.items()): - if not (base.startswith("${") and base.endswith("}")): - # Add missing ${...} - substitutions[f"${{{base}}}"] = substitutions.pop(base) - - cfa_options["substitutions"] = substitutions - - extra_write_vars["cfa"] = cfa - extra_write_vars["cfa_options"] = cfa_options - - netcdf.write( - fields, - filename, - fmt=fmt, - mode=mode, - overwrite=overwrite, - global_attributes=global_attributes, - variable_attributes=variable_attributes, - file_descriptors=file_descriptors, - external=external, - Conventions=Conventions, - datatype=datatype, - least_significant_digit=least_significant_digit, - endian=endian, - compress=compress, - shuffle=shuffle, - fletcher32=fletcher32, - verbose=verbose, - string=string, - warn_valid=warn_valid, - group=group, - coordinates=coordinates, - extra_write_vars=extra_write_vars, - omit_data=omit_data, - ) +class write(cfdm.write): + implementation = implementation() diff --git a/cf/test/create_test_files.py b/cf/test/create_test_files.py index 80f3be4a9f..2a0745294a 100644 --- a/cf/test/create_test_files.py +++ b/cf/test/create_test_files.py @@ -743,7 +743,7 @@ def _jj(shape, list_values): array[index] = i return array - n = netCDF4.Dataset(filename, "w", format="NETCDF3_CLASSIC") + n = netCDF4.Dataset(filename, "w") n.Conventions = f"CF-{VN}" @@ -855,7 +855,13 @@ def _jj(shape, list_values): temp2.coordinates = "aux7 aux8 aux9" temp2[...] = np.arange(2 * 3 * 9 * 6).reshape(2, 3, 9, 6) - temp3 = n.createVariable("temp3", "f8", ("time", "list3", "p")) + temp3 = n.createVariable( + "temp3", + "f8", + ("time", "list3", "p"), + complevel=1, + chunksizes=(2, 6, 4), + ) temp3.long_name = "temp3" temp3.units = "K" temp3.coordinates = "aux0 aux1 aux2 aux3 aux4 aux5 aux6 aux7 aux8 aux9" @@ -2222,6 +2228,90 @@ def _make_ugrid_2(filename): return filename +def _make_aggregation_value(filename): + """Create an aggregation variable with a 'value' fragment array.""" + n = netCDF4.Dataset(filename, "w") + + n.Conventions = f"CF-{VN}" + n.comment = "A netCDF file with a 'value' aggregation variable." + + n.createDimension("time", 12) + n.createDimension("level", 1) + n.createDimension("latitude", 73) + n.createDimension("longitude", 144) + n.createDimension("a_time", 2) + n.createDimension("a_level", 1) + n.createDimension("a_latitude", 1) + n.createDimension("a_longitude", 1) + n.createDimension("a_map_i2", 2) + n.createDimension("a_map_j4", 4) + n.createDimension("a_map_j1", 1) + + temperature = n.createVariable("temperature", "f8", ()) + temperature.standard_name = "air_temperature" + temperature.units = "K" + temperature.cell_methods = "time: mean" + temperature.ancillary_variables = "uid" + temperature.aggregated_dimensions = "time level latitude longitude" + temperature.aggregated_data = "location: fragment_location variable: fragment_variable map: fragment_map" + + uid = n.createVariable("uid", str, ()) + uid.long_name = "Fragment dataset unique identifiers" + uid.aggregated_dimensions = "time" + uid.aggregated_data = ( + "unique_value: fragment_value_uid map: fragment_map_uid" + ) + + time = n.createVariable("time", "f4", ("time",)) + time.standard_name = "time" + time.units = "days since 2001-01-01" + time[...] = [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334] + + level = n.createVariable("level", "f4", ("level",)) + level.standard_name = "height_above_mean_sea_level" + level.units = "m" + + latitude = n.createVariable("latitude", "f4", ("latitude",)) + latitude.standard_name = "latitude" + latitude.units = "degrees_north" + + longitude = n.createVariable("longitude", "f4", ("longitude",)) + longitude.standard_name = "longitude" + longitude.units = "degrees_east" + + # Fragment array variables + fragment_location = n.createVariable( + "fragment_location", + str, + ("a_time", "a_level", "a_latitude", "a_longitude"), + ) + fragment_location[0, 0, 0, 0] = "January-March.nc" + fragment_location[1, 0, 0, 0] = "April-December.nc" + + fragment_variable = n.createVariable("fragment_variable", str, ()) + fragment_variable[...] = "temperature" + + fragment_map = n.createVariable( + "fragment_map", "i4", ("a_map_j4", "a_map_i2") + ) + fragment_map[...] = [[3, 9], [1, -1], [73, -1], [144, -1]] + fragment_map[1:, 1] = np.ma.masked + + fragment_value_uid = n.createVariable( + "fragment_value_uid", str, ("a_time",) + ) + fragment_value_uid[0] = "04b9-7eb5-4046-97b-0bf8" + fragment_value_uid[1] = "05ee0-a183-43b3-a67-1eca" + + fragment_map_uid = n.createVariable( + "fragment_map_uid", "i4", ("a_map_j1", "a_map_i2") + ) + fragment_map_uid[...] = [3, 9] + + n.close() + return filename + + contiguous_file = _make_contiguous_file("DSG_timeSeries_contiguous.nc") indexed_file = _make_indexed_file("DSG_timeSeries_indexed.nc") indexed_contiguous_file = _make_indexed_contiguous_file( @@ -2252,6 +2342,8 @@ def _make_ugrid_2(filename): ugrid_1 = _make_ugrid_1("ugrid_1.nc") ugrid_2 = _make_ugrid_2("ugrid_2.nc") +aggregation_value = _make_aggregation_value("aggregation_value.nc") + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) cfdm.environment() diff --git a/cf/test/setup_create_field.py b/cf/test/setup_create_field.py index 34e4bdcd58..3e751fe47d 100644 --- a/cf/test/setup_create_field.py +++ b/cf/test/setup_create_field.py @@ -3,7 +3,7 @@ import os import unittest -import numpy +import numpy as np faulthandler.enable() # to debug seg faults and timeouts @@ -17,20 +17,18 @@ class create_fieldTest(unittest.TestCase): def test_create_field(self): # Dimension coordinates - dim1 = cf.DimensionCoordinate( - data=cf.Data(numpy.arange(10.0), "degrees") - ) + dim1 = cf.DimensionCoordinate(data=cf.Data(np.arange(10.0), "degrees")) dim1.standard_name = "grid_latitude" dim0 = cf.DimensionCoordinate( - data=cf.Data(numpy.arange(9.0) + 20, "degrees") + data=cf.Data(np.arange(9.0) + 20, "degrees") ) dim0.standard_name = "grid_longitude" dim0.data[-1] += 5 bounds = cf.Data( - numpy.array( - [dim0.data.array - 0.5, dim0.data.array + 0.5] - ).transpose((1, 0)) + np.array([dim0.data.array - 0.5, dim0.data.array + 0.5]).transpose( + (1, 0) + ) ) bounds[-2, 1] = 30 bounds[-1, :] = [30, 36] @@ -54,7 +52,7 @@ def test_create_field(self): aux2 = cf.AuxiliaryCoordinate( data=cf.Data( - numpy.arange(-45, 45, dtype="int32").reshape(10, 9), + np.arange(-45, 45, dtype="int32").reshape(10, 9), units="degree_N", ) ) @@ -62,7 +60,7 @@ def test_create_field(self): aux3 = cf.AuxiliaryCoordinate( data=cf.Data( - numpy.arange(60, 150, dtype="int32").reshape(9, 10), + np.arange(60, 150, dtype="int32").reshape(9, 10), units="degreesE", ) ) @@ -70,7 +68,7 @@ def test_create_field(self): aux4 = cf.AuxiliaryCoordinate( data=cf.Data( - numpy.array( + np.array( [ "alpha", "beta", @@ -97,12 +95,12 @@ def test_create_field(self): # Cell measures msr0 = cf.CellMeasure( - data=cf.Data(1 + numpy.arange(90.0).reshape(9, 10) * 1234, "km 2") + data=cf.Data(1 + np.arange(90.0).reshape(9, 10) * 1234, "km 2") ) msr0.measure = "area" # Data - data = cf.Data(numpy.arange(90.0).reshape(10, 9), "m s-1") + data = cf.Data(np.arange(90.0).reshape(10, 9), "m s-1") properties = {"standard_name": "eastward_wind"} @@ -133,8 +131,11 @@ def test_create_field(self): "grid_north_pole_longitude": 190.0, } ) + datum = cf.Datum(parameters={"earth_radius": 6371007}) + ref0 = cf.CoordinateReference( coordinate_conversion=coordinate_conversion, + datum=datum, coordinates=[x, y, lat, lon], ) @@ -156,10 +157,12 @@ def test_create_field(self): domain_ancillaries={"orog": orog_key, "a": ak, "b": bk}, ) ref1 = cf.CoordinateReference( - coordinate_conversion=coordinate_conversion, coordinates=[z] + coordinates=[z], + datum=datum, + coordinate_conversion=coordinate_conversion, ) - f.set_construct(ref1) + ref1 = f.set_construct(ref1) # Field ancillary variables g = cf.FieldAncillary() @@ -193,7 +196,7 @@ def test_create_field(self): f.flag_meanings = ["a", "bb", "ccc"] for cm in cf.CellMethod.create( - "grid_longitude: mean grid_latitude: max" + "grid_longitude: mean grid_latitude: maximum" ): f.set_construct(cm) diff --git a/cf/test/test_CFA.py b/cf/test/test_CFA.py index 6b005aef70..ee3cd11efa 100644 --- a/cf/test/test_CFA.py +++ b/cf/test/test_CFA.py @@ -10,6 +10,8 @@ faulthandler.enable() # to debug seg faults and timeouts +from cfdm.read_write.netcdf.netcdfwrite import AggregationError + import cf n_tmpfiles = 5 @@ -20,9 +22,9 @@ ( tmpfile1, tmpfile2, - tmpfile3, - tmpfile4, - tmpfile5, + nc_file, + cfa_file, + cfa_file2, ) = tmpfiles @@ -39,6 +41,8 @@ def _remove_tmpfiles(): class CFATest(unittest.TestCase): + """Unit test for aggregation variables.""" + netcdf3_fmts = [ "NETCDF3_CLASSIC", "NETCDF3_64BIT", @@ -48,33 +52,35 @@ class CFATest(unittest.TestCase): netcdf4_fmts = ["NETCDF4", "NETCDF4_CLASSIC"] netcdf_fmts = netcdf3_fmts + netcdf4_fmts + aggregation_value = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "aggregation_value.nc" + ) + def test_CFA_fmt(self): - """Test the cf.read 'fmt' and 'cfa' keywords.""" + """Test the cf.read 'fmt' keyword with cfa.""" f = cf.example_field(0) cf.write(f, tmpfile1) - f = cf.read(tmpfile1)[0] + f = cf.read(tmpfile1, cfa_write="field")[0] for fmt in self.netcdf_fmts: - cf.write(f, tmpfile2, fmt=fmt, cfa=True) - g = cf.read(tmpfile2) + cf.write(f, cfa_file, fmt=fmt, cfa="field") + g = cf.read(cfa_file) self.assertEqual(len(g), 1) self.assertTrue(f.equals(g[0])) def test_CFA_multiple_fragments(self): - """Test CFA with more than one fragment.""" + """Test aggregation variables with more than one fragment.""" f = cf.example_field(0) cf.write(f[:2], tmpfile1) cf.write(f[2:], tmpfile2) - a = cf.read([tmpfile1, tmpfile2]) - self.assertEqual(len(a), 1) - a = a[0] + a = cf.read(tmpfile1, cfa_write="field")[0] + b = cf.read(tmpfile2, cfa_write="field")[0] + a = cf.Field.concatenate([a, b], axis=0) - nc_file = tmpfile3 - cfa_file = tmpfile4 cf.write(a, nc_file) - cf.write(a, cfa_file, cfa=True) + cf.write(a, cfa_file, cfa="field") n = cf.read(nc_file) c = cf.read(cfa_file) @@ -84,249 +90,95 @@ def test_CFA_multiple_fragments(self): self.assertTrue(n[0].equals(c[0])) def test_CFA_strict(self): - """Test CFA 'strict' option to the cfa.write 'cfa' keyword.""" + """Test 'strict' option to the cf.write 'cfa' keyword.""" f = cf.example_field(0) - # By default, can't write as CF-netCDF those variables - # selected for CFA treatment, but which aren't suitable. - with self.assertRaises(ValueError): - cf.write(f, tmpfile1, cfa=True) + # By default, can't write in-memory arrays as aggregation + # variables + with self.assertRaises(AggregationError): + cf.write(f, cfa_file, cfa="field") # The previous line should have deleted the output file - self.assertFalse(os.path.exists(tmpfile1)) + self.assertFalse(os.path.exists(cfa_file)) - cf.write(f, tmpfile1, cfa={"strict": False}) - g = cf.read(tmpfile1) + cf.write(f, nc_file, cfa={"constructs": "field", "strict": False}) + g = cf.read(nc_file, cfa_write="field") self.assertEqual(len(g), 1) self.assertTrue(g[0].equals(f)) - cf.write(g, tmpfile2, cfa={"strict": True}) - g = cf.read(tmpfile2) + cf.write(g, cfa_file, cfa={"constructs": "field", "strict": True}) + g = cf.read(cfa_file) self.assertEqual(len(g), 1) self.assertTrue(g[0].equals(f)) - def test_CFA_field_ancillaries(self): - """Test creation of field ancillaries from non-standard CFA terms.""" - f = cf.example_field(0) - self.assertFalse(f.field_ancillaries()) - - a = f[:2] - b = f[2:] - a.set_property("foo", "bar_a") - b.set_property("foo", "bar_b") - cf.write(a, tmpfile1) - cf.write(b, tmpfile2) - - c = cf.read( - [tmpfile1, tmpfile2], aggregate={"field_ancillaries": "foo"} - ) - self.assertEqual(len(c), 1) - c = c[0] - self.assertEqual(len(c.field_ancillaries()), 1) - anc = c.field_ancillary() - self.assertTrue(anc.data.cfa_get_term()) - self.assertFalse(anc.data.cfa_get_write()) - - cf.write(c, tmpfile3, cfa=False) - c2 = cf.read(tmpfile3) - self.assertEqual(len(c2), 1) - self.assertFalse(c2[0].field_ancillaries()) - - cf.write(c, tmpfile4, cfa=True) - d = cf.read(tmpfile4) - self.assertEqual(len(d), 1) - d = d[0] - - self.assertEqual(len(d.field_ancillaries()), 1) - anc = d.field_ancillary() - self.assertTrue(anc.data.cfa_get_term()) - self.assertFalse(anc.data.cfa_get_write()) - self.assertTrue(d.equals(c)) - - cf.write(d, tmpfile5, cfa=False) - e = cf.read(tmpfile5) - self.assertEqual(len(e), 1) - self.assertFalse(e[0].field_ancillaries()) - - cf.write(d, tmpfile5, cfa=True) - e = cf.read(tmpfile5) - self.assertEqual(len(e), 1) - self.assertTrue(e[0].equals(d)) - - def test_CFA_substitutions_0(self): - """Test CFA substitution URI substitutions (0).""" + def test_CFA_uri_0(self): + """Test aggregation 'uri' option to cf.write.""" f = cf.example_field(0) cf.write(f, tmpfile1) - f = cf.read(tmpfile1)[0] - - cwd = os.getcwd() - - f.data.cfa_update_file_substitutions({"base": cwd}) - - cf.write( - f, - tmpfile2, - cfa={"absolute_paths": True}, - ) - - nc = netCDF4.Dataset(tmpfile2, "r") - cfa_file = nc.variables["cfa_file"] - self.assertEqual( - cfa_file.getncattr("substitutions"), - f"${{base}}: {cwd}", - ) - self.assertEqual( - cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" - ) - nc.close() - - g = cf.read(tmpfile2) - self.assertEqual(len(g), 1) - self.assertTrue(f.equals(g[0])) + f = cf.read(tmpfile1, cfa_write="field")[0] - def test_CFA_substitutions_1(self): - """Test CFA substitution URI substitutions (1).""" - f = cf.example_field(0) - cf.write(f, tmpfile1) - f = cf.read(tmpfile1)[0] + absuri_filename = PurePath(os.path.abspath(tmpfile1)).as_uri() + reluri_filename = os.path.basename(tmpfile1) - cwd = os.getcwd() - for base in ("base", "${base}"): + for uri, filename in zip( + ("absolute", "relative"), (absuri_filename, reluri_filename) + ): cf.write( f, - tmpfile2, - cfa={"absolute_paths": True, "substitutions": {base: cwd}}, + cfa_file, + cfa={"constructs": "field", "uri": uri}, ) - nc = netCDF4.Dataset(tmpfile2, "r") - cfa_file = nc.variables["cfa_file"] - self.assertEqual( - cfa_file.getncattr("substitutions"), - f"${{base}}: {cwd}", - ) - self.assertEqual( - cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" - ) + nc = netCDF4.Dataset(cfa_file, "r") + cfa_location = nc.variables["cfa_location"] + self.assertEqual(cfa_location[...], filename) nc.close() - g = cf.read(tmpfile2) - self.assertEqual(len(g), 1) - self.assertTrue(f.equals(g[0])) + g = cf.read(cfa_file) + self.assertEqual(len(g), 1) + g = g[0] + self.assertTrue(f.equals(g)) + self.assertEqual( + g.data.get_filenames(normalise=False), set([filename]) + ) - def test_CFA_substitutions_2(self): - """Test CFA substitution URI substitutions (2).""" + def test_CFA_uri_1(self): + """Test aggregation 'uri=default' option to cf.write.""" f = cf.example_field(0) cf.write(f, tmpfile1) - f = cf.read(tmpfile1)[0] - - cwd = os.getcwd() - - f.data.cfa_clear_file_substitutions() - f.data.cfa_update_file_substitutions({"base": cwd}) - - cf.write( - f, - tmpfile2, - cfa={ - "absolute_paths": True, - "substitutions": {"base2": "/bad/location"}, - }, - ) - - nc = netCDF4.Dataset(tmpfile2, "r") - cfa_file = nc.variables["cfa_file"] - self.assertEqual( - cfa_file.getncattr("substitutions"), - f"${{base2}}: /bad/location ${{base}}: {cwd}", - ) - self.assertEqual( - cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" - ) - nc.close() - - g = cf.read(tmpfile2) - self.assertEqual(len(g), 1) - self.assertTrue(f.equals(g[0])) - - f.data.cfa_clear_file_substitutions() - f.data.cfa_update_file_substitutions({"base": "/bad/location"}) - - cf.write( - f, - tmpfile2, - cfa={"absolute_paths": True, "substitutions": {"base": cwd}}, - ) - - nc = netCDF4.Dataset(tmpfile2, "r") - cfa_file = nc.variables["cfa_file"] - self.assertEqual( - cfa_file.getncattr("substitutions"), - f"${{base}}: {cwd}", - ) - self.assertEqual( - cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" - ) - nc.close() - - g = cf.read(tmpfile2) - self.assertEqual(len(g), 1) - self.assertTrue(f.equals(g[0])) - - f.data.cfa_clear_file_substitutions() - f.data.cfa_update_file_substitutions({"base2": "/bad/location"}) - - cf.write( - f, - tmpfile2, - cfa={"absolute_paths": True, "substitutions": {"base": cwd}}, - ) - - nc = netCDF4.Dataset(tmpfile2, "r") - cfa_file = nc.variables["cfa_file"] - self.assertEqual( - cfa_file.getncattr("substitutions"), - f"${{base2}}: /bad/location ${{base}}: {cwd}", - ) - self.assertEqual( - cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" - ) - nc.close() + f = cf.read(tmpfile1, cfa_write="field")[0] - g = cf.read(tmpfile2) - self.assertEqual(len(g), 1) - self.assertTrue(f.equals(g[0])) + absuri_filename = PurePath(os.path.abspath(tmpfile1)).as_uri() + reluri_filename = os.path.basename(tmpfile1) - def test_CFA_absolute_paths(self): - """Test CFA 'absolute_paths' option to the cfa.write 'cfa' keyword.""" - f = cf.example_field(0) - cf.write(f, tmpfile1) - f = cf.read(tmpfile1)[0] - - for absolute_paths, filename in zip( - (True, False), - ( - PurePath(os.path.abspath(tmpfile1)).as_uri(), - os.path.basename(tmpfile1), - ), + for uri, filename in zip( + ("absolute", "relative"), (absuri_filename, reluri_filename) ): - cf.write(f, tmpfile2, cfa={"absolute_paths": absolute_paths}) + cf.write( + f, + cfa_file, + cfa={"constructs": "field", "uri": uri}, + ) - nc = netCDF4.Dataset(tmpfile2, "r") - cfa_file = nc.variables["cfa_file"] - self.assertEqual(cfa_file[...], filename) - nc.close() + g = cf.read(cfa_file)[0] + cf.write( + g, + cfa_file2, + cfa="field", + ) - g = cf.read(tmpfile2) - self.assertEqual(len(g), 1) - self.assertTrue(f.equals(g[0])) + nc = netCDF4.Dataset(cfa_file2, "r") + cfa_location = nc.variables["cfa_location"] + self.assertEqual(cfa_location[...], filename) + nc.close() def test_CFA_constructs(self): - """Test choice of constructs to write as CFA-netCDF variables.""" + """Test aggregation 'constructs' option to cf.write.""" f = cf.example_field(1) - f.del_construct("T") + f.del_construct("time") f.del_construct("long_name=Grid latitude name") cf.write(f, tmpfile1) - f = cf.read(tmpfile1)[0] + f = cf.read(tmpfile1, cfa_write="all")[0] # No constructs cf.write(f, tmpfile2, cfa={"constructs": []}) @@ -359,7 +211,6 @@ def test_CFA_constructs(self): ["dimension_coordinate"], {"dimension_coordinate": None}, {"dimension_coordinate": 1}, - {"dimension_coordinate": cf.eq(1)}, ): cf.write(f, tmpfile2, cfa={"constructs": constructs}) nc = netCDF4.Dataset(tmpfile2, "r") @@ -385,7 +236,7 @@ def test_CFA_constructs(self): # Dimension and auxiliary constructs for constructs in ( ["dimension_coordinate", "auxiliary_coordinate"], - {"dimension_coordinate": None, "auxiliary_coordinate": cf.ge(2)}, + {"dimension_coordinate": None, "auxiliary_coordinate": 2}, ): cf.write(f, tmpfile2, cfa={"constructs": constructs}) nc = netCDF4.Dataset(tmpfile2, "r") @@ -410,60 +261,119 @@ def test_CFA_constructs(self): nc.close() - def test_CFA_PP(self): - """Test writing CFA-netCDF with PP format fragments.""" - f = cf.read("file1.pp")[0] - cf.write(f, tmpfile1, cfa=True) + def test_CFA_scalar(self): + """Test scalar aggregation variable.""" + f = cf.example_field(0) + f = f[0, 0].squeeze() + cf.write(f, tmpfile1) + g = cf.read(tmpfile1, cfa_write="field")[0] + cf.write(g, cfa_file, cfa="field") + h = cf.read(cfa_file)[0] + self.assertTrue(h.equals(f)) + + def test_CFA_value(self): + """Test the value fragment array variable.""" + write = True + for aggregation_value_file in (self.aggregation_value, cfa_file): + f = cf.read(aggregation_value_file, cfa_write="all") + self.assertEqual(len(f), 1) + f = f[0] + fa = f.field_ancillary() + self.assertEqual(fa.shape, (12,)) + self.assertEqual(fa.data.chunks, ((3, 9),)) + self.assertEqual( + fa.data.nc_get_aggregation_fragment_type(), "unique_value" + ) + self.assertEqual( + fa.data.nc_get_aggregated_data(), + { + "map": "fragment_map_uid", + "unique_value": "fragment_value_uid", + }, + ) - # Check that only the fields have been aggregated - nc = netCDF4.Dataset(tmpfile1, "r") - for ncvar, var in nc.variables.items(): - attrs = var.ncattrs() - if ncvar in ("UM_m01s15i201_vn405",): - self.assertFalse(var.ndim) - self.assertIn("aggregated_dimensions", attrs) - self.assertIn("aggregated_data", attrs) - else: - self.assertNotIn("aggregated_dimensions", attrs) - self.assertNotIn("aggregated_data", attrs) + nc = netCDF4.Dataset(aggregation_value_file, "r") + fragment_value_uid = nc.variables["fragment_value_uid"][...] + nc.close() - nc.close() + self.assertTrue((fa[:3].array == fragment_value_uid[0]).all()) + self.assertTrue((fa[3:].array == fragment_value_uid[1]).all()) - g = cf.read(tmpfile1) - self.assertEqual(len(g), 1) - self.assertTrue(f.equals(g[0])) + if write: + cf.write(f, cfa_file) + write = False - def test_CFA_multiple_files(self): - """Test storing multiple CFA frgament locations.""" + def test_CFA_cfa(self): + """Test the cf.write 'cfa' keyword.""" f = cf.example_field(0) cf.write(f, tmpfile1) - f = cf.read(tmpfile1)[0] - f.add_file_location("/new/location") + f = cf.read(tmpfile1, cfa_write="field")[0] + cf.write(f, tmpfile2, cfa="field") + g = cf.read(tmpfile2, cfa_write="field")[0] + + # Default of cfa="auto" - check that aggregation variable + # gets written + cf.write(g, cfa_file) + nc = netCDF4.Dataset(cfa_file, "r") + self.assertIsNotNone( + getattr(nc.variables["q"], "aggregated_data", None) + ) + nc.close() - cf.write(f, tmpfile2, cfa=True) - g = cf.read(tmpfile2) - self.assertEqual(len(g), 1) - g = g[0] - self.assertTrue(f.equals(g)) + cf.write(g, cfa_file, cfa={"constructs": {"auto": 2}}) + nc = netCDF4.Dataset(cfa_file, "r") - self.assertEqual(len(g.data.get_filenames()), 2) - self.assertEqual(len(g.get_filenames()), 3) + self.assertIsNotNone( + getattr(nc.variables["q"], "aggregated_data", None) + ) + nc.close() - def test_CFA_unlimited_dimension(self): - """Test CFA with unlimited dimensions""" - # Create a CFA file from a field that has an unlimited - # dimension and no metadata constructs spanning that dimension + cf.write( + g, + cfa_file, + cfa={ + "constructs": ["auto", "dimension_coordinate"], + "strict": False, + }, + ) + nc = netCDF4.Dataset(cfa_file, "r") + for ncvar in ("q", "lat", "lon"): + self.assertIsNotNone( + getattr(nc.variables[ncvar], "aggregated_data", None) + ) + + nc.close() + + # Check bad values of cfa + for cfa in (False, True, (), []): + with self.assertRaises(ValueError): + cf.write(g, cfa_file, cfa=cfa) + + def test_CFA_subspace(self): + """Test the writing subspaces of aggregations.""" f = cf.example_field(0) - d = f.domain_axis("X") - d.nc_set_unlimited(True) - f.del_construct("X") - cf.write(f, tmpfile1) - g = cf.read(tmpfile1) - cf.write(g, tmpfile2, cfa=True) - # Check that the CFA file can be read - h = cf.read(tmpfile2) - self.assertEqual(len(h), 1) + cf.write(f[:2], tmpfile1) + cf.write(f[2:], tmpfile2) + + a = cf.read(tmpfile1, cfa_write="field")[0] + b = cf.read(tmpfile2, cfa_write="field")[0] + c = cf.Field.concatenate([a, b], axis=0) + + cf.write(c, cfa_file, cfa="field") + + f = cf.read(cfa_file, cfa_write="field")[0] + cf.write(f[:2], cfa_file2, cfa="field") + g = cf.read(cfa_file2)[0] + self.assertTrue(g.equals(a)) + + cf.write(f[2:], cfa_file2, cfa="field") + g = cf.read(cfa_file2)[0] + self.assertTrue(g.equals(b)) + + # Can't straddle Dask chunks + with self.assertRaises(AggregationError): + cf.write(f[1:3], cfa_file2, cfa="field") if __name__ == "__main__": diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index dcb28cc85f..e007e89b39 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -1490,8 +1490,9 @@ def test_Data__getitem__(self): self.assertTrue(e.equals(f)) # Chained subspaces reading from disk - f = cf.read(self.filename)[0] + f = cf.read(self.filename, netcdf_backend="h5netcdf")[0] d = f.data + a = d[:1, [1, 3, 4], :][:, [True, False, True], ::-2].array b = d.array[:1, [1, 3, 4], :][:, [True, False, True], ::-2] self.assertTrue((a == b).all()) @@ -2387,7 +2388,6 @@ def test_Data_BINARY_AND_UNARY_OPERATORS(self): self.assertTrue( e.equals(cf.Data(a, "m"), verbose=1), message ) - # --- End: for for x in (cf.Data(2, "metre"), cf.Data(2.0, "metre")): self.assertTrue( @@ -4150,14 +4150,6 @@ def test_Data_flat(self): list(d.flat(ignore_masked=False)), [1, np.ma.masked, 3, 4] ) - def test_Data_tolist(self): - """Test the Data.tolist""" - for x in (1, [1, 2], [[1, 2], [3, 4]]): - d = cf.Data(x) - e = d.tolist() - self.assertEqual(e, np.array(x).tolist()) - self.assertTrue(d.equals(cf.Data(e))) - def test_Data_masked_invalid(self): """Test the `masked_invalid` Data method.""" a = np.array([0, 1, 2]) @@ -4453,33 +4445,6 @@ def test_Data__init__datetime(self): self.assertTrue((q == d).array.all()) self.assertTrue((d == q).array.all()) - def test_Data_get_filenames(self): - """Test `Data.get_filenames`.""" - d = cf.Data.ones((5, 8), float, chunks=4) - self.assertEqual(d.get_filenames(), set()) - - f = cf.example_field(0) - cf.write(f, file_A) - cf.write(f, file_B) - - a = cf.read(file_A, dask_chunks=4)[0].data - b = cf.read(file_B, dask_chunks=4)[0].data - b += 999 - c = cf.Data(b.array, units=b.Units, chunks=4) - - d = cf.Data.concatenate([a, a + 999, b, c], axis=1) - self.assertEqual(d.shape, (5, 32)) - - self.assertEqual(d.get_filenames(), set([file_A, file_B])) - self.assertEqual(d[:, 2:7].get_filenames(), set([file_A])) - self.assertEqual(d[:, 2:14].get_filenames(), set([file_A])) - self.assertEqual(d[:, 2:20].get_filenames(), set([file_A, file_B])) - self.assertEqual(d[:, 2:30].get_filenames(), set([file_A, file_B])) - self.assertEqual(d[:, 29:30].get_filenames(), set()) - - d[2, 3] = -99 - self.assertEqual(d[2, 3].get_filenames(), set([file_A])) - def test_Data__str__(self): """Test `Data.__str__`""" elements0 = (0, -1, 1) @@ -4588,26 +4553,6 @@ def test_Data_convert_reference_time(self): self.assertEqual(e.Units, units) self.assertTrue((e.array == [72, 48, 24, 0]).all()) - def test_Data_clear_after_dask_update(self): - """Test Data._clear_after_dask_update.""" - d = cf.Data([1, 2, 3], "m") - dx = d.to_dask_array() - - d.first_element() - d.second_element() - d.last_element() - - self.assertTrue(d._get_cached_elements()) - - _ALL = cf.Data._ALL - _CACHE = cf.Data._CACHE - - d._set_dask(dx, clear=_ALL ^ _CACHE) - self.assertTrue(d._get_cached_elements()) - - d._set_dask(dx, clear=_ALL) - self.assertFalse(d._get_cached_elements()) - def test_Data_has_deterministic_name(self): """Test Data.has_deterministic_name""" d = cf.Data([1, 2], "m") @@ -4640,108 +4585,6 @@ def test_Data_get_deterministic_name(self): with self.assertRaises(ValueError): d.get_deterministic_name() - def test_Data_cfa_aggregated_data(self): - """Test `Data` CFA aggregated_data methods""" - d = cf.Data(9) - aggregated_data = { - "location": "cfa_location", - "file": "cfa_file", - "address": "cfa_address", - "format": "cfa_format", - "tracking_id": "tracking_id", - } - - self.assertFalse(d.cfa_has_aggregated_data()) - self.assertIsNone(d.cfa_set_aggregated_data(aggregated_data)) - self.assertTrue(d.cfa_has_aggregated_data()) - self.assertEqual(d.cfa_get_aggregated_data(), aggregated_data) - self.assertEqual(d.cfa_del_aggregated_data(), aggregated_data) - self.assertFalse(d.cfa_has_aggregated_data()) - self.assertEqual(d.cfa_get_aggregated_data(), {}) - self.assertEqual(d.cfa_del_aggregated_data(), {}) - - def test_Data_cfa_file_substitutions(self): - """Test `Data` CFA file_substitutions methods""" - d = cf.Data(9) - self.assertFalse(d.cfa_has_file_substitutions()) - self.assertIsNone( - d.cfa_update_file_substitutions({"base": "file:///data/"}) - ) - self.assertTrue(d.cfa_has_file_substitutions()) - self.assertEqual( - d.cfa_file_substitutions(), {"${base}": "file:///data/"} - ) - - d.cfa_update_file_substitutions({"${base2}": "/home/data/"}) - self.assertEqual( - d.cfa_file_substitutions(), - {"${base}": "file:///data/", "${base2}": "/home/data/"}, - ) - - d.cfa_update_file_substitutions({"${base}": "/new/location/"}) - self.assertEqual( - d.cfa_file_substitutions(), - {"${base}": "/new/location/", "${base2}": "/home/data/"}, - ) - self.assertEqual( - d.cfa_del_file_substitution("${base}"), - {"${base}": "/new/location/"}, - ) - self.assertEqual( - d.cfa_clear_file_substitutions(), {"${base2}": "/home/data/"} - ) - self.assertFalse(d.cfa_has_file_substitutions()) - self.assertEqual(d.cfa_file_substitutions(), {}) - self.assertEqual(d.cfa_clear_file_substitutions(), {}) - self.assertEqual(d.cfa_del_file_substitution("base"), {}) - - def test_Data_file_location(self): - """Test `Data` file location methods""" - f = cf.example_field(0) - - self.assertEqual( - f.data.add_file_location("/data/model/"), "/data/model" - ) - - cf.write(f, file_A) - d = cf.read(file_A, dask_chunks=4)[0].data - self.assertGreater(d.npartitions, 1) - - e = d.copy() - location = os.path.dirname(os.path.abspath(file_A)) - - self.assertEqual(d.file_locations(), set((location,))) - self.assertEqual(d.add_file_location("/data/model/"), "/data/model") - self.assertEqual(d.file_locations(), set((location, "/data/model"))) - - # Check that we haven't changed 'e' - self.assertEqual(e.file_locations(), set((location,))) - - self.assertEqual(d.del_file_location("/data/model/"), "/data/model") - self.assertEqual(d.file_locations(), set((location,))) - d.del_file_location("/invalid") - self.assertEqual(d.file_locations(), set((location,))) - - def test_Data_todict(self): - """Test Data.todict.""" - d = cf.Data([1, 2, 3, 4], chunks=2) - key = d.to_dask_array(_force_mask_hardness=False).name - - x = d.todict() - self.assertIsInstance(x, dict) - self.assertIn((key, 0), x) - self.assertIn((key, 1), x) - - e = d[0] - x = e.todict() - self.assertIn((key, 0), x) - self.assertNotIn((key, 1), x) - - x = e.todict(optimize_graph=False) - self.assertIsInstance(x, dict) - self.assertIn((key, 0), x) - self.assertIn((key, 1), x) - def test_Data_masked_values(self): """Test Data.masked_values.""" array = np.array([[1, 1.1, 2, 1.1, 3]]) diff --git a/cf/test/test_Field.py b/cf/test/test_Field.py index e282389ec0..c1122ac338 100644 --- a/cf/test/test_Field.py +++ b/cf/test/test_Field.py @@ -930,7 +930,38 @@ def test_Field_cell_area(self): def test_Field_radius(self): f = self.f.copy() - with self.assertRaises(Exception): + # Radius exists in coordiante references + a = cf.Data(6371007.0, "m") + + r = f.radius(default=None) + self.assertEqual(r.Units, cf.Units("m")) + self.assertEqual(r, a) + + cr = f.coordinate_reference( + "standard_name:atmosphere_hybrid_height_coordinate" + ) + cr.datum.set_parameter("earth_radius", cf.Data(5678, "km")) + + with self.assertRaises(ValueError): + f.radius(default=None) + + cr = f.coordinate_reference( + "standard_name:atmosphere_hybrid_height_coordinate" + ) + cr.datum.del_parameter("earth_radius") + + cr = f.coordinate_reference( + "grid_mapping_name:rotated_latitude_longitude" + ) + cr.datum.set_parameter("earth_radius", cf.Data([123, 456], "m")) + + # Radius doesn't exist in in coordiante references + f = self.f.copy() + + for key in f.coordinate_references(todict=True): + f.del_construct(key) + + with self.assertRaises(ValueError): f.radius() for default in ("earth", cf.field._earth_radius): @@ -957,51 +988,9 @@ def test_Field_radius(self): with self.assertRaises(ValueError): f.radius(default=[12, 34]) - with self.assertRaises(ValueError): - f.radius(default=[[12, 34]]) - with self.assertRaises(ValueError): f.radius(default="qwerty") - cr = f.coordinate_reference( - "grid_mapping_name:rotated_latitude_longitude" - ) - cr.datum.set_parameter("earth_radius", a.copy()) - - r = f.radius(default=None) - self.assertEqual(r.Units, cf.Units("m")) - self.assertEqual(r, a) - - cr = f.coordinate_reference( - "standard_name:atmosphere_hybrid_height_coordinate" - ) - cr.datum.set_parameter("earth_radius", a.copy()) - - r = f.radius(default=None) - self.assertEqual(r.Units, cf.Units("m")) - self.assertEqual(r, a) - - cr = f.coordinate_reference( - "standard_name:atmosphere_hybrid_height_coordinate" - ) - cr.datum.set_parameter("earth_radius", cf.Data(5678, "km")) - - with self.assertRaises(ValueError): - f.radius(default=None) - - cr = f.coordinate_reference( - "standard_name:atmosphere_hybrid_height_coordinate" - ) - cr.datum.del_parameter("earth_radius") - - cr = f.coordinate_reference( - "grid_mapping_name:rotated_latitude_longitude" - ) - cr.datum.set_parameter("earth_radius", cf.Data([123, 456], "m")) - - with self.assertRaises(ValueError): - f.radius(default=None) - def test_Field_set_get_del_has_data(self): f = self.f.copy() @@ -1879,15 +1868,17 @@ def test_Field_match_by_construct(self): self.assertTrue(f.match_by_construct("X", "latitude", OR=OR)) self.assertTrue(f.match_by_construct("X", "Y", OR=OR)) self.assertTrue(f.match_by_construct("X", "Y", "latitude", OR=OR)) - self.assertTrue(f.match_by_construct("grid_latitude: max", OR=OR)) + self.assertTrue( + f.match_by_construct("grid_latitude: maximum", OR=OR) + ) self.assertTrue( f.match_by_construct( - "grid_longitude: mean grid_latitude: max", OR=OR + "grid_longitude: mean grid_latitude: maximum", OR=OR ) ) - self.assertTrue(f.match_by_construct("X", "method:max", OR=OR)) + self.assertTrue(f.match_by_construct("X", "method:maximum", OR=OR)) self.assertTrue( - f.match_by_construct("X", "grid_latitude: max", OR=OR) + f.match_by_construct("X", "grid_latitude: maximum", OR=OR) ) self.assertFalse(f.match_by_construct("qwerty")) @@ -1898,12 +1889,12 @@ def test_Field_match_by_construct(self): self.assertTrue(f.match_by_construct("X", "qwerty", OR=True)) self.assertTrue( f.match_by_construct( - "X", "qwerty", "method:max", "over:years", OR=True + "X", "qwerty", "method:maximum", "over:years", OR=True ) ) self.assertTrue( f.match_by_construct( - "X", "qwerty", "grid_latitude: max", "over:years", OR=True + "X", "qwerty", "grid_latitude: maximum", "over:years", OR=True ) ) @@ -2943,28 +2934,6 @@ def test_Field_subspace_ugrid(self): self.assertTrue(g.aux("X").data.range() < 30) self.assertTrue(g.aux("Y").data.range() < 50) - def test_Field_file_location(self): - f = cf.example_field(0) - - self.assertEqual(f.add_file_location("/data/model/"), "/data/model") - - cf.write(f, tmpfile) - f = cf.read(tmpfile)[0] - g = f.copy() - location = os.path.dirname(os.path.abspath(tmpfile)) - - self.assertEqual(f.file_locations(), set((location,))) - self.assertEqual(f.add_file_location("/data/model/"), "/data/model") - self.assertEqual(f.file_locations(), set((location, "/data/model"))) - - # Check that we haven't changed 'g' - self.assertEqual(g.file_locations(), set((location,))) - - self.assertEqual(f.del_file_location("/data/model/"), "/data/model") - self.assertEqual(f.file_locations(), set((location,))) - f.del_file_location("/invalid") - self.assertEqual(f.file_locations(), set((location,))) - def test_Field_pad_missing(self): """Test Field.pad_missing.""" f = cf.example_field(0) diff --git a/cf/test/test_NetCDF4Array.py b/cf/test/test_NetCDF4Array.py deleted file mode 100644 index 0d049ff497..0000000000 --- a/cf/test/test_NetCDF4Array.py +++ /dev/null @@ -1,171 +0,0 @@ -import atexit -import datetime -import faulthandler -import os -import tempfile -import unittest - -import numpy as np -from dask.base import tokenize - -faulthandler.enable() # to debug seg faults and timeouts - -import cf - -n_tmpfiles = 1 -tmpfiles = [ - tempfile.mkstemp("_test_NetCDF4Array.nc", dir=os.getcwd())[1] - for i in range(n_tmpfiles) -] -(tmpfile1,) = tmpfiles - - -def _remove_tmpfiles(): - """Try to remove defined temporary files by deleting their paths.""" - for f in tmpfiles: - try: - os.remove(f) - except OSError: - pass - - -atexit.register(_remove_tmpfiles) - - -class NetCDF4ArrayTest(unittest.TestCase): - n = cf.NetCDF4Array( - filename="filename.nc", - address="x", - shape=(5, 8), - dtype=np.dtype(float), - ) - - def test_NetCDF4Array_del_file_location(self): - a = cf.NetCDF4Array(("/data1/file1", "/data2/file2"), ("tas1", "tas2")) - b = a.del_file_location("/data1") - self.assertIsNot(b, a) - self.assertEqual(b.get_filenames(), ("/data2/file2",)) - self.assertEqual(b.get_addresses(), ("tas2",)) - - a = cf.NetCDF4Array( - ("/data1/file1", "/data2/file1", "/data2/file2"), - ("tas1", "tas1", "tas2"), - ) - b = a.del_file_location("/data2") - self.assertEqual(b.get_filenames(), ("/data1/file1",)) - self.assertEqual(b.get_addresses(), ("tas1",)) - - # Can't be left with no files - self.assertEqual(b.file_locations(), ("/data1",)) - with self.assertRaises(ValueError): - b.del_file_location("/data1/") - - def test_NetCDF4Array_file_locations(self): - a = cf.NetCDF4Array("/data1/file1") - self.assertEqual(a.file_locations(), ("/data1",)) - - a = cf.NetCDF4Array(("/data1/file1", "/data2/file2")) - self.assertEqual(a.file_locations(), ("/data1", "/data2")) - - a = cf.NetCDF4Array(("/data1/file1", "/data2/file2", "/data1/file2")) - self.assertEqual(a.file_locations(), ("/data1", "/data2", "/data1")) - - def test_NetCDF4Array_add_file_location(self): - a = cf.NetCDF4Array("/data1/file1", "tas") - b = a.add_file_location("/home/user") - self.assertIsNot(b, a) - self.assertEqual( - b.get_filenames(), ("/data1/file1", "/home/user/file1") - ) - self.assertEqual(b.get_addresses(), ("tas", "tas")) - - a = cf.NetCDF4Array(("/data1/file1", "/data2/file2"), ("tas1", "tas2")) - b = a.add_file_location("/home/user") - self.assertEqual( - b.get_filenames(), - ( - "/data1/file1", - "/data2/file2", - "/home/user/file1", - "/home/user/file2", - ), - ) - self.assertEqual(b.get_addresses(), ("tas1", "tas2", "tas1", "tas2")) - - a = cf.NetCDF4Array(("/data1/file1", "/data2/file1"), ("tas1", "tas2")) - b = a.add_file_location("/home/user") - self.assertEqual( - b.get_filenames(), - ("/data1/file1", "/data2/file1", "/home/user/file1"), - ) - self.assertEqual(b.get_addresses(), ("tas1", "tas2", "tas1")) - - a = cf.NetCDF4Array(("/data1/file1", "/data2/file1"), ("tas1", "tas2")) - b = a.add_file_location("/data1/") - self.assertEqual(b.get_filenames(), a.get_filenames()) - self.assertEqual(b.get_addresses(), a.get_addresses()) - - def test_NetCDF4Array__dask_tokenize__(self): - a = cf.NetCDF4Array("/data1/file1", "tas", shape=(12, 2), mask=False) - self.assertEqual(tokenize(a), tokenize(a.copy())) - - b = cf.NetCDF4Array("/home/file2", "tas", shape=(12, 2)) - self.assertNotEqual(tokenize(a), tokenize(b)) - - def test_NetCDF4Array_multiple_files(self): - f = cf.example_field(0) - cf.write(f, tmpfile1) - - # Create instance with non-existent file - n = cf.NetCDF4Array( - filename=os.path.join("/bad/location", os.path.basename(tmpfile1)), - address=f.nc_get_variable(), - shape=f.shape, - dtype=f.dtype, - ) - # Add file that exists - n = n.add_file_location(os.path.dirname(tmpfile1)) - - self.assertEqual(len(n.get_filenames()), 2) - self.assertTrue((n[...] == f.array).all()) - - def test_NetCDF4Array_shape(self): - shape = (12, 73, 96) - a = cf.NetCDF4Array("/home/file2", "tas", shape=shape) - self.assertEqual(a.shape, shape) - self.assertEqual(a.original_shape, shape) - a = a[::2] - self.assertEqual(a.shape, (shape[0] // 2,) + shape[1:]) - self.assertEqual(a.original_shape, shape) - - def test_NetCDF4Array_index(self): - shape = (12, 73, 96) - a = cf.NetCDF4Array("/home/file2", "tas", shape=shape) - self.assertEqual( - a.index(), - ( - slice( - None, - ), - ) - * len(shape), - ) - a = a[8:7:-1, 10:19:3, [15, 1, 4, 12]] - a = a[[0], [True, False, True], ::-2] - self.assertEqual(a.shape, (1, 2, 2)) - self.assertEqual( - a.index(), - (slice(8, 9, None), slice(10, 17, 6), slice(12, -1, -11)), - ) - - index = a.index(conform=False) - self.assertTrue((index[0] == [8]).all()) - self.assertTrue((index[1] == [10, 16]).all()) - self.assertTrue((index[2] == [12, 1]).all()) - - -if __name__ == "__main__": - print("Run date:", datetime.datetime.now()) - cf.environment() - print() - unittest.main(verbosity=2) diff --git a/cf/test/test_aggregate.py b/cf/test/test_aggregate.py index 53c0b9b938..e4684d6f87 100644 --- a/cf/test/test_aggregate.py +++ b/cf/test/test_aggregate.py @@ -326,7 +326,7 @@ def test_aggregate_relaxed_units(self): self.assertEqual(i.Units.__dict__, bad_units.__dict__) self.assertTrue((i.array == f.array).all()) - def test_aggregate_field_ancillaries(self): + def test_aggregate_promote_field_ancillaries(self): f = cf.example_field(0) self.assertFalse(f.field_ancillaries()) @@ -341,7 +341,7 @@ def test_aggregate_field_ancillaries(self): self.assertEqual(len(c.field_ancillaries()), 1) anc = c.field_ancillary() - self.assertEqual(anc.shape, c.shape) + self.assertEqual(anc.shape, f.shape[:1]) self.assertTrue((anc[:2] == "bar_a").all()) self.assertTrue((anc[2:] == "bar_b").all()) diff --git a/cf/test/test_functions.py b/cf/test/test_functions.py index ad7a59c4f3..fa7dc24458 100644 --- a/cf/test/test_functions.py +++ b/cf/test/test_functions.py @@ -32,9 +32,9 @@ def test_keyword_deprecation(self): # Use as test case 'i' kwarg, the deprecated old name for # 'inplace': f = cf.example_field(0) - f.squeeze(inplace=True) # new way to specify operation tested below + f.flip(inplace=True) # new way to specify operation tested below with self.assertRaises(cf.functions.DeprecationError): - f.squeeze(i=True) + f.flip(i=True) def test_aliases(self): self.assertEqual(cf.log_level(), cf.LOG_LEVEL()) @@ -367,9 +367,6 @@ def test_size(self): x = da.arange(9) self.assertEqual(cf.size(x), x.size) - def test_CFA(self): - self.assertEqual(cf.CFA(), cf.__cfa_version__) - def test_normalize_slice(self): self.assertEqual(cf.normalize_slice(slice(1, 4), 8), slice(1, 4, 1)) self.assertEqual(cf.normalize_slice(slice(None), 8), slice(0, 8, 1)) diff --git a/cf/test/test_pp.py b/cf/test/test_pp.py index ce75fe1b81..08a85a4eef 100644 --- a/cf/test/test_pp.py +++ b/cf/test/test_pp.py @@ -112,7 +112,7 @@ def test_PP_WGDOS_UNPACKING(self): f = cf.read(self.ppfile)[0] - for cfa in (False, True): + for cfa in (None, "auto"): cf.write(f, tmpfile, cfa=cfa) g = cf.read(tmpfile)[0] diff --git a/cf/test/test_read_write.py b/cf/test/test_read_write.py index 613280eaf3..d4a0c1726e 100644 --- a/cf/test/test_read_write.py +++ b/cf/test/test_read_write.py @@ -9,6 +9,7 @@ import unittest import numpy as np +from cfdm.read_write.exceptions import DatasetTypeError faulthandler.enable() # to debug seg faults and timeouts @@ -42,11 +43,13 @@ def _remove_tmpfiles(): atexit.register(_remove_tmpfiles) +filename = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "test_file.nc" +) + class read_writeTest(unittest.TestCase): - filename = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "test_file.nc" - ) + filename = filename broken_bounds = os.path.join( os.path.dirname(os.path.abspath(__file__)), "broken_bounds.cdl" @@ -58,6 +61,7 @@ class read_writeTest(unittest.TestCase): chunk_sizes = (100000, 300) + f = cf.read(filename)[0] f0 = cf.example_field(0) f1 = cf.example_field(1) @@ -171,9 +175,8 @@ def test_read_directory(self): def test_read_select(self): # select on field list - f = cf.read(self.filename, select="eastward_wind") - g = cf.read(self.filename) - self.assertTrue(f.equals(g, verbose=2), "Bad read with select keyword") + f = cf.read(self.filename, select="eastward_wind")[0] + self.assertTrue(f.equals(self.f)) def test_read_squeeze(self): # select on field list @@ -188,7 +191,7 @@ def test_read_aggregate(self): cf.read(self.filename, aggregate={}) def test_read_extra(self): - # Test field keyword of cf.read + # Test 'extra' keyword of cf.read filename = self.filename f = cf.read(filename) @@ -242,8 +245,7 @@ def test_read_write_format(self): cf.write(self.f1, tmpfile) f = cf.read(tmpfile)[0] - # TODO: reinstate "CFA" at version > 3.14 - for fmt in self.netcdf_fmts: # + ["CFA"]: + for fmt in self.netcdf_fmts: cf.write(f, tmpfile2, fmt=fmt) g = cf.read(tmpfile2, verbose=0) self.assertEqual(len(g), 1) @@ -256,7 +258,7 @@ def test_read_write_format(self): def test_write_netcdf_mode(self): """Test the `mode` parameter to `write`, notably append mode.""" - g = cf.read(self.filename) # note 'g' has one field + g = self.f.copy() # Test special case #1: attempt to append fields with groups # (other than 'root') which should be forbidden. Using fmt="NETCDF4" @@ -264,16 +266,16 @@ def test_write_netcdf_mode(self): # # Note: this is not the most natural test to do first, but putting # it before the rest reduces spurious seg faults for me, so... - g[0].nc_set_variable_groups(["forecast", "model"]) + g.nc_set_variable_groups(["forecast", "model"]) cf.write(g, tmpfile, fmt="NETCDF4", mode="w") # 1. overwrite to wipe f = cf.read(tmpfile) with self.assertRaises(ValueError): - cf.write(g[0], tmpfile, fmt="NETCDF4", mode="a") + cf.write(g, tmpfile, fmt="NETCDF4", mode="a") # Test special case #2: attempt to append fields with contradictory # featureType to the original file: - g[0].nc_clear_variable_groups() - g[0].nc_set_global_attribute("featureType", "profile") + g.nc_clear_variable_groups() + g.nc_set_global_attribute("featureType", "profile") cf.write( g, tmpfile, @@ -286,20 +288,20 @@ def test_write_netcdf_mode(self): with self.assertRaises(ValueError): cf.write(h, tmpfile, fmt="NETCDF4", mode="a") # Now remove featureType attribute for subsquent tests: - g_attrs = g[0].nc_clear_global_attributes() + g_attrs = g.nc_clear_global_attributes() del g_attrs["featureType"] - g[0].nc_set_global_attributes(g_attrs) + g.nc_set_global_attributes(g_attrs) # Set a non-trivial (i.e. not only 'Conventions') global attribute to # make the global attribute testing more robust: add_global_attr = ["remark", "A global comment."] - original_global_attrs = g[0].nc_global_attributes() + original_global_attrs = g.nc_global_attributes() original_global_attrs[add_global_attr[0]] = None # -> None on fields - g[0].nc_set_global_attribute(*add_global_attr) + g.nc_set_global_attribute(*add_global_attr) # First test a bad mode value: with self.assertRaises(ValueError): - cf.write(g[0], tmpfile, mode="g") + cf.write(g, tmpfile, mode="g") g_copy = g.copy() @@ -318,7 +320,7 @@ def test_write_netcdf_mode(self): new_length = 1 # since 1 == len(g) self.assertEqual(len(f), new_length) # Ignore as 'remark' should be 'None' on the field as tested below - self.assertTrue(f[0].equals(g[0], ignore_properties=["remark"])) + self.assertTrue(f[0].equals(g, ignore_properties=["remark"])) self.assertEqual( f[0].nc_global_attributes(), original_global_attrs ) @@ -536,11 +538,11 @@ def test_write_netcdf_mode(self): cf.write(g, tmpfile, fmt=fmt, mode="w") # 1. overwrite to wipe cf.write(g_copy, tmpfile, fmt=fmt, mode="a") # 2. now append f = cf.read(tmpfile) - self.assertEqual(len(f), 2 * len(g)) + self.assertEqual(len(f), 2) self.assertTrue( any( [ - file_field.equals(g[0], ignore_properties=["remark"]) + file_field.equals(g, ignore_properties=["remark"]) for file_field in f ] ) @@ -550,9 +552,8 @@ def test_write_netcdf_mode(self): ) def test_read_write_netCDF4_compress_shuffle(self): - f = cf.read(self.filename)[0] - # TODODASK: reinstate "CFA4" at version > 3.14 - for fmt in ("NETCDF4", "NETCDF4_CLASSIC"): # , "CFA4"): + f = self.f + for fmt in ("NETCDF4", "NETCDF4_CLASSIC"): cf.write(f, tmpfile, fmt=fmt, compress=1, shuffle=True) g = cf.read(tmpfile)[0] self.assertTrue( @@ -561,7 +562,7 @@ def test_read_write_netCDF4_compress_shuffle(self): ) def test_write_datatype(self): - f = cf.read(self.filename)[0] + f = self.f self.assertEqual(f.dtype, np.dtype(float)) cf.write( f, @@ -570,36 +571,23 @@ def test_write_datatype(self): datatype={np.dtype(float): np.dtype("float32")}, ) g = cf.read(tmpfile)[0] - self.assertEqual( - g.dtype, - np.dtype("float32"), - "datatype read in is " + str(g.dtype), - ) + self.assertEqual(g.dtype, np.dtype("float32")) # Keyword single - f = cf.read(self.filename)[0] self.assertEqual(f.dtype, np.dtype(float)) cf.write(f, tmpfile, fmt="NETCDF4", single=True) g = cf.read(tmpfile)[0] - self.assertEqual( - g.dtype, - np.dtype("float32"), - "datatype read in is " + str(g.dtype), - ) + self.assertEqual(g.dtype, np.dtype("float32")) # Keyword double f = g self.assertEqual(f.dtype, np.dtype("float32")) - cf.write(f, tmpfile2, fmt="NETCDF4", double=True) - g = cf.read(tmpfile2)[0] - self.assertEqual( - g.dtype, np.dtype(float), "datatype read in is " + str(g.dtype) - ) + cf.write(f, tmpfile1, fmt="NETCDF4", double=True) + g = cf.read(tmpfile1)[0] + self.assertEqual(g.dtype, np.dtype(float)) - for single in (True, False): - for double in (True, False): - with self.assertRaises(Exception): - cf.write(g, double=double, single=single) + with self.assertRaises(Exception): + cf.write(g, double=True, single=True) datatype = {np.dtype(float): np.dtype("float32")} with self.assertRaises(Exception): @@ -684,7 +672,7 @@ def test_read_CDL(self): check=True, ) - f0 = cf.read(self.filename)[0] + f0 = self.f # Case (1) as above, so read in and check the fields are as should be f = cf.read(tmpfile)[0] @@ -709,6 +697,9 @@ def test_read_CDL(self): def test_read_cdl_string(self): """Test the `cdl_string` keyword of the `read` function.""" + f = self.f0 + cf.write(f, tmpfile0) + # Test CDL in full, header-only and coordinate-only type: tempfile_to_option_mapping = { tmpfile: None, @@ -718,41 +709,28 @@ def test_read_cdl_string(self): for tempf, option in tempfile_to_option_mapping.items(): # Set up the CDL string to test... - command_to_run = ["ncdump", self.filename, ">", tempf] + command_to_run = ["ncdump", tmpfile0, ">", tempf] if option: command_to_run.insert(1, option) + subprocess.run(" ".join(command_to_run), shell=True, check=True) - with open(tempf, "r") as file: - cdl_string_1 = file.read() - - # ... and now test it as an individual string input - f_from_str = cf.read(cdl_string_1, cdl_string=True) - f_from_file = cf.read(tempf) # len 1 so only one field to check - self.assertEqual(len(f_from_str), len(f_from_file)) - self.assertEqual(f_from_str[0], f_from_file[0]) - - # ... and test further by inputting it in duplicate as a sequence - f_from_str = cf.read([cdl_string_1, cdl_string_1], cdl_string=True) - f_from_file = cf.read(tempf) # len 1 so only one field to check - self.assertEqual(len(f_from_str), 2 * len(f_from_file)) - self.assertEqual(f_from_str[0], f_from_file[0]) - self.assertEqual(f_from_str[1], f_from_file[0]) - - # Check compatibility with the `fmt` kwarg. - f0 = cf.read(cdl_string_1, cdl_string=True, fmt="CDL") # fine - self.assertEqual(len(f0), len(f_from_file)) - self.assertEqual(f0[0], f_from_file[0]) - # If the 'fmt' and 'cdl_string' values contradict each other, - # alert the user to this. Note that the default fmt is None but - # it then gets interpreted as NETCDF, so default fmt is fine and - # it is tested in f_from_str above where fmt is not set. + with open(tempf, "rt") as fh: + cdl_string_1 = fh.read() + + for cdl_input in (cdl_string_1, (cdl_string_1,)): + f_from_str = cf.read(cdl_input, cdl_string=True) + self.assertEqual(len(f_from_str), 1) + self.assertEqual(f_from_str[0], f) + + # Check compatibility with the 'file_type' kwarg. + for file_type in ("netCDF", "CDL", "UM", ()): with self.assertRaises(ValueError): - f0 = cf.read(cdl_string_1, cdl_string=True, fmt="NETCDF") + cf.read(cdl_string_1, cdl_string=True, file_type=file_type) # If the user forgets the cdl_string=True argument they will - # accidentally attempt to create a file with a very long name of - # the CDL string, which will in most, if not all, cases result in - # an "OSError: [Errno 36] File name too long" error: + # accidentally attempt to create a file with a very long name + # of the CDL string, which will in most, if not all, cases + # result in an "OSError: [Errno 36] File name too long" error: with self.assertRaises(OSError): cf.read(cdl_string_1) @@ -790,7 +768,7 @@ def test_read_broken_bounds(self): self.assertEqual(len(f), 2) def test_write_coordinates(self): - f = cf.example_field(0) + f = self.f0 cf.write(f, tmpfile, coordinates=True) g = cf.read(tmpfile) @@ -799,7 +777,7 @@ def test_write_coordinates(self): self.assertTrue(g[0].equals(f)) def test_read_write_domain(self): - f = cf.read(self.filename)[0] + f = self.f d = f.domain # 1 domain @@ -846,7 +824,7 @@ def test_read_write_domain(self): def test_write_omit_data(self): """Test the `omit_data` parameter to `write`.""" - f = cf.example_field(1) + f = self.f1 cf.write(f, tmpfile) cf.write(f, tmpfile, omit_data="all") @@ -878,16 +856,69 @@ def test_write_omit_data(self): self.assertTrue(np.ma.count(g.construct("grid_latitude").array)) @unittest.skipUnless( - False, "URL TEST: UNRELIABLE FLAKEY URL DESTINATION. TODO REPLACE URL" + True, "URL TEST: UNRELIABLE FLAKEY URL DESTINATION. TODO REPLACE URL" ) def test_read_url(self): """Test reading urls.""" for scheme in ("http", "https"): - remote = f"{scheme}://psl.noaa.gov/thredds/dodsC/Datasets/cru/crutem5/Monthlies/air.mon.anom.nobs.nc" + remote = f"{scheme}:///psl.noaa.gov/thredds/dodsC/Datasets/cru/crutem5/Monthlies/air.mon.anom.nobs.nc" # Check that cf can access it f = cf.read(remote) self.assertEqual(len(f), 1) + def test_read_file_type(self): + """Test the cf.read 'file_type' keyword.""" + # netCDF file + for file_type in ( + None, + "netCDF", + ("netCDF",), + ("netCDF", "CDL"), + ("netCDF", "bad value"), + ): + f = cf.read(self.filename, file_type=file_type) + self.assertEqual(len(f), 1) + + for file_type in ("CDL", "bad value", ()): + f = cf.read(self.filename, file_type=file_type) + self.assertEqual(len(f), 0) + + # CDL file + subprocess.run( + " ".join(["ncdump", self.filename, ">", tmpfile]), + shell=True, + check=True, + ) + for file_type in ( + None, + "CDL", + ("netCDF", "CDL"), + ("CDL", "bad value"), + ): + f = cf.read(tmpfile, file_type=file_type) + self.assertEqual(len(f), 1) + + for file_type in ("netCDF", "bad value", ()): + f = cf.read(tmpfile, file_type=file_type) + self.assertEqual(len(f), 0) + + # UM file + for file_type in (None, "UM", ("UM",), ("UM", "bad value")): + f = cf.read("file1.pp", file_type=file_type) + self.assertEqual(len(f), 1) + + for file_type in ("netCDF", "bad value", ()): + f = cf.read("file1.pp", file_type=file_type) + self.assertEqual(len(f), 0) + + # Not a netCDF, CDL, or UM file + with self.assertRaises(DatasetTypeError): + f = cf.read("test_read_write.py") + + for file_type in ("netCDF", "CDL", "bad value", ()): + f = cf.read("test_read_write.py", file_type=file_type) + self.assertEqual(len(f), 0) + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) diff --git a/cf/umread_lib/umfile.py b/cf/umread_lib/umfile.py index 94fbb33043..7183653e64 100644 --- a/cf/umread_lib/umfile.py +++ b/cf/umread_lib/umfile.py @@ -2,6 +2,7 @@ from functools import cmp_to_key import numpy +from cfdm.read_write.exceptions import DatasetTypeError from . import cInterface from .extraData import ExtraDataUnpacker @@ -132,7 +133,9 @@ def _detect_file_type(self): file_type_obj = c.detect_file_type(self.fd) except Exception: self.close_fd() - raise IOError(f"File {self.path} has unsupported format") + raise DatasetTypeError( + f"Can't open {self.path} as a PP or UM dataset" + ) d = c.file_type_obj_to_dict(file_type_obj) self.fmt = d["fmt"] diff --git a/docs/source/installation.rst b/docs/source/installation.rst index a39acd77d6..8ea7cd5d11 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -206,14 +206,14 @@ Required * `h5netcdf `_, version 1.3.0 newer. -* `h5py `_, version 3.10.0 or newer. +* `h5py `_, version 3.12.0 or newer. * `s3fs `_, version 2024.6.0 or newer. * `scipy `_, version 1.10.0 or newer. -* `cfdm `_, version 1.11.2.0 or up to, - but not including, 1.11.3.0. +* `cfdm `_, version 1.12.0.0 or up to, + but not including, 1.12.1.0. * `cfunits `_, version 3.3.7 or newer. diff --git a/requirements.txt b/requirements.txt index 94886c0d57..755493c32b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,12 @@ netCDF4>=1.6.5 cftime>=1.6.2 numpy>=1.22,<2.0 -cfdm>=1.11.2.0, <1.11.3.0 +cfdm>=1.12.0.0, <1.12.1.0 psutil>=0.6.0 cfunits>=3.3.7 dask>=2024.6.0,<=2024.7.1 packaging>=20.0 scipy>=1.10.0 h5netcdf>=1.3.0 -h5py>=3.10.0 +h5py>=3.12.0 s3fs>=2024.6.0 diff --git a/setup.py b/setup.py index 326252b4af..af397e6c93 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess from distutils.command.build import build -from setuptools import setup +from setuptools import find_packages, setup def find_package_data_files(directory): @@ -300,26 +300,7 @@ def compile(): "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ], - packages=[ - "cf", - "cf.mixin", - "cf.mixin2", - "cf.data", - "cf.data.array", - "cf.data.array.abstract", - "cf.data.array.mixin", - "cf.data.collapse", - "cf.data.fragment", - "cf.data.fragment.mixin", - "cf.data.mixin", - "cf.docstring", - "cf.read_write", - "cf.read_write.um", - "cf.read_write.netcdf", - "cf.regrid", - "cf.umread_lib", - "cf.test", - ], + packages=find_packages(), package_data={"cf": package_data}, scripts=["scripts/cfa"], python_requires=">=3.8", diff --git a/test_release b/test_release index 6c6ac71dc0..16b2567253 100755 --- a/test_release +++ b/test_release @@ -16,9 +16,9 @@ vn=$1 dir=~/tmp/test_cf-python rm -fr $dir -tar ztvf dist/cf-python-$vn.tar.gz +tar ztvf dist/cf_python-$vn.tar.gz -pip install dist/cf-python-$vn.tar.gz -t $dir --no-deps +pip install dist/cf_python-$vn.tar.gz -t $dir --no-deps export PYTHONPATH=$dir:$PYTHONPATH export PATH=$dir/bin:$PATH diff --git a/upload_to_pypi b/upload_to_pypi index 4957506498..b22fda2b6e 100755 --- a/upload_to_pypi +++ b/upload_to_pypi @@ -19,7 +19,6 @@ fi version=$1 tarball=dist/cf_python-${version}.tar.gz -mv dist/cf-python-${version}.tar.gz dist/$tarball if [[ ! -f $tarball ]] ; then echo "Tar ball for version $version does not exist: $tarball"