From ce39fd5cfa04117ada0189b178ab66643bec23b7 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 7 Nov 2024 17:05:39 +0000 Subject: [PATCH 01/51] dev --- cf/__init__.py | 21 +- cf/aggregate.py | 8 - cf/cfimplementation.py | 78 +- cf/data/array/__init__.py | 5 +- cf/data/array/aggregatedarray.py | 17 + cf/data/array/cfah5netcdfarray.py | 20 +- cf/data/array/cfanetcdf4array.py | 20 +- cf/data/array/mixin/__init__.py | 2 +- cf/data/array/umarray.py | 1 + cf/data/data.py | 1044 ++++++-------- cf/data/fragment/__init__.py | 10 +- cf/field.py | 308 +---- cf/functions.py | 9 +- cf/mixin2/cfanetcdf.py | 976 ++++++------- cf/read_write/netcdf/netcdfread.py | 1984 ++++++++++++++------------- cf/read_write/netcdf/netcdfwrite.py | 1735 +++++++++++------------ cf/read_write/read.py | 61 +- cf/read_write/um/umread.py | 2 +- cf/read_write/write.py | 107 +- cf/test/create_test_files.py | 82 ++ cf/test/setup_create_field.py | 11 +- cf/test/test_CFA.py | 962 ++++++++++--- cf/test/test_Data.py | 158 --- cf/test/test_Field.py | 131 +- cf/test/test_functions.py | 3 - cf/test/test_pp.py | 2 +- 26 files changed, 4014 insertions(+), 3743 deletions(-) create mode 100644 cf/data/array/aggregatedarray.py diff --git a/cf/__init__.py b/cf/__init__.py index 9e630d86ea..dc525e0f87 100644 --- a/cf/__init__.py +++ b/cf/__init__.py @@ -105,7 +105,7 @@ raise ImportError(_error0 + str(error1)) __cf_version__ = cfdm.core.__cf_version__ -__cfa_version__ = "0.6.2" +#__cfa_version__ = "0.6.2" from packaging.version import Version import importlib.util @@ -206,8 +206,8 @@ ) # Check the version of cfdm -_minimum_vn = "1.11.2.0" -_maximum_vn = "1.11.3.0" +_minimum_vn = "1.12.0.0" +_maximum_vn = "1.12.1.0" _cfdm_version = Version(cfdm.__version__) if not Version(_minimum_vn) <= _cfdm_version < Version(_maximum_vn): raise RuntimeError( @@ -273,10 +273,11 @@ from .field import Field from .data import Data from .data.array import ( + AggregatedArray, BoundsFromNodesArray, CellConnectivityArray, - CFAH5netcdfArray, - CFANetCDF4Array, +# CFAH5netcdfArray, +# CFANetCDF4Array, FullArray, GatheredArray, H5netcdfArray, @@ -290,11 +291,11 @@ UMArray, ) -from .data.fragment import ( - FullFragmentArray, - NetCDFFragmentArray, - UMFragmentArray, -) +#from .data.fragment import ( +# FullFragmentArray, +# NetCDFFragmentArray, +# UMFragmentArray, +#) from .aggregate import aggregate, climatology_cells from .query import ( diff --git a/cf/aggregate.py b/cf/aggregate.py index c5207e5427..632e99c0a2 100644 --- a/cf/aggregate.py +++ b/cf/aggregate.py @@ -2088,13 +2088,6 @@ def promote_to_field_ancillary(self, properties): ancillary construct that spans the entire domain, with the constant value of the property. - The `Data` of any new field ancillary construct is marked - as a CFA term, meaning that it will only be written to disk if - the parent field construct is written as a CFA aggregation - variable, and in that case the field ancillary is written as a - non-standard CFA aggregation instruction variable, rather than - a CF-netCDF ancillary variable. - If a domain construct is being aggregated then it is always returned unchanged. @@ -2125,7 +2118,6 @@ def promote_to_field_ancillary(self, properties): data = Data( FullArray(value, shape=f.shape, dtype=np.array(value).dtype) ) - data._cfa_set_term(True) field_anc = FieldAncillary( data=data, properties={"long_name": prop}, copy=False diff --git a/cf/cfimplementation.py b/cf/cfimplementation.py index 118e73ce7a..6ae37806cb 100644 --- a/cf/cfimplementation.py +++ b/cf/cfimplementation.py @@ -27,10 +27,11 @@ ) from .data import Data from .data.array import ( + AggregatedArray, BoundsFromNodesArray, CellConnectivityArray, - CFAH5netcdfArray, - CFANetCDF4Array, +# CFAH5netcdfArray, + # CFANetCDF4Array, GatheredArray, H5netcdfArray, NetCDF4Array, @@ -114,49 +115,50 @@ def set_construct(self, parent, construct, axes=None, copy=True, **kwargs): parent, construct, axes=axes, copy=copy, **kwargs ) - def initialise_CFANetCDF4Array(self, **kwargs): - """Return a `CFANetCDF4Array` instance. - - :Parameters: - - kwargs: optional - Initialisation parameters to pass to the new instance. - - :Returns: - - `CFANetCDF4Array` - - """ - cls = self.get_class("CFANetCDF4Array") - return cls(**kwargs) - - def initialise_CFAH5netcdfArray(self, **kwargs): - """Return a `CFAH5netcdfArray` instance. - - .. versionadded:: NEXTVERSION - - :Parameters: - - kwargs: optional - Initialisation parameters to pass to the new instance. - - :Returns: - - `CFAH5netcdfArray` - - """ - cls = self.get_class("CFAH5netcdfArray") - return cls(**kwargs) +# def initialise_CFANetCDF4Array(self, **kwargs): +# """Return a `CFANetCDF4Array` instance. +# +# :Parameters: +# +# kwargs: optional +# Initialisation parameters to pass to the new instance. +# +# :Returns: +# +# `CFANetCDF4Array` +# +# """ +# cls = self.get_class("CFANetCDF4Array") +# return cls(**kwargs) +# +# def initialise_CFAH5netcdfArray(self, **kwargs): +# """Return a `CFAH5netcdfArray` instance. +# +# .. versionadded:: NEXTVERSION +# +# :Parameters: +# +# kwargs: optional +# Initialisation parameters to pass to the new instance. +# +# :Returns: +# +# `CFAH5netcdfArray` +# +# """ +# cls = self.get_class("CFAH5netcdfArray") +# return cls(**kwargs) _implementation = CFImplementation( cf_version=CF(), + AggregatedArray=AggregatedArray, AuxiliaryCoordinate=AuxiliaryCoordinate, CellConnectivity=CellConnectivity, CellMeasure=CellMeasure, CellMethod=CellMethod, - CFAH5netcdfArray=CFAH5netcdfArray, - CFANetCDF4Array=CFANetCDF4Array, +# CFAH5netcdfArray=CFAH5netcdfArray, +# CFANetCDF4Array=CFANetCDF4Array, CoordinateReference=CoordinateReference, DimensionCoordinate=DimensionCoordinate, Domain=Domain, @@ -214,8 +216,6 @@ def implementation(): 'CellConnectivityArray': cf.data.array.cellconnectivityarray.CellConnectivityArray, 'CellMeasure': cf.cellmeasure.CellMeasure, 'CellMethod': cf.cellmethod.CellMethod, - 'CFAH5netcdfArray': cf.data.array.cfah5netcdfarray.CFAH5netcdfArray, - 'CFANetCDF4Array': cf.data.array.cfanetcdf4array.CFANetCDF4Array, 'CoordinateReference': cf.coordinatereference.CoordinateReference, 'DimensionCoordinate': cf.dimensioncoordinate.DimensionCoordinate, 'Domain': cf.domain.Domain, diff --git a/cf/data/array/__init__.py b/cf/data/array/__init__.py index cd2c53766b..17a75a4312 100644 --- a/cf/data/array/__init__.py +++ b/cf/data/array/__init__.py @@ -1,7 +1,8 @@ +from .aggregatedarray import AggregatedArray from .boundsfromnodesarray import BoundsFromNodesArray from .cellconnectivityarray import CellConnectivityArray -from .cfah5netcdfarray import CFAH5netcdfArray -from .cfanetcdf4array import CFANetCDF4Array +#from .cfah5netcdfarray import CFAH5netcdfArray +#from .cfanetcdf4array import CFANetCDF4Array from .fullarray import FullArray from .gatheredarray import GatheredArray from .h5netcdfarray import H5netcdfArray diff --git a/cf/data/array/aggregatedarray.py b/cf/data/array/aggregatedarray.py new file mode 100644 index 0000000000..3707325bb0 --- /dev/null +++ b/cf/data/array/aggregatedarray.py @@ -0,0 +1,17 @@ +import cfdm + +from ...mixin_container import Container +from .mixin import ArrayMixin, FileArrayMixin + + +class AggregatedArray( + FileArrayMixin, + ArrayMixin, + Container, + cfdm.AggregatedArray +): + """An array stored in a CF aggregation variable. + + .. versionadded:: NEXTVERSION + + """ diff --git a/cf/data/array/cfah5netcdfarray.py b/cf/data/array/cfah5netcdfarray.py index 47c58bff06..30e1cd0500 100644 --- a/cf/data/array/cfah5netcdfarray.py +++ b/cf/data/array/cfah5netcdfarray.py @@ -1,10 +1,10 @@ -from .h5netcdfarray import H5netcdfArray -from .mixin import CFAMixin - - -class CFAH5netcdfArray(CFAMixin, H5netcdfArray): - """A CFA-netCDF array accessed with `h5netcdf` - - .. versionadded:: NEXTVERSION - - """ +#from .h5netcdfarray import H5netcdfArray +#from .mixin import CFAMixin +# +# +#class CFAH5netcdfArray(CFAMixin, H5netcdfArray): +# """A CFA-netCDF array accessed with `h5netcdf` +# +# .. versionadded:: NEXTVERSION +# +# """ diff --git a/cf/data/array/cfanetcdf4array.py b/cf/data/array/cfanetcdf4array.py index b3b6b69d7a..65e53d8abd 100644 --- a/cf/data/array/cfanetcdf4array.py +++ b/cf/data/array/cfanetcdf4array.py @@ -1,10 +1,10 @@ -from .mixin import CFAMixin -from .netcdf4array import NetCDF4Array - - -class CFANetCDF4Array(CFAMixin, NetCDF4Array): - """A CFA-netCDF array accessed with `netCDF4`. - - .. versionadded:: NEXTVERSION - - """ +#from .mixin import CFAMixin +#from .netcdf4array import NetCDF4Array +# +# +#class CFANetCDF4Array(CFAMixin, NetCDF4Array): +# """A CFA-netCDF array accessed with `netCDF4`. +# +# .. versionadded:: NEXTVERSION +# +# """ diff --git a/cf/data/array/mixin/__init__.py b/cf/data/array/mixin/__init__.py index 5bf63658df..f8f0ebca66 100644 --- a/cf/data/array/mixin/__init__.py +++ b/cf/data/array/mixin/__init__.py @@ -1,6 +1,6 @@ from .activestoragemixin import ActiveStorageMixin from .arraymixin import ArrayMixin -from .cfamixin import CFAMixin +#from .cfamixin import CFAMixin from .compressedarraymixin import CompressedArrayMixin from .filearraymixin import FileArrayMixin diff --git a/cf/data/array/umarray.py b/cf/data/array/umarray.py index a560365d9b..e0d113bddb 100644 --- a/cf/data/array/umarray.py +++ b/cf/data/array/umarray.py @@ -11,6 +11,7 @@ class UMArray( FileArrayMixin, cfdm.data.mixin.IndexMixin, cfdm.data.mixin.FileArrayMixin, + cfdm.data.abstract.FileArray, Array, ): """A sub-array stored in a PP or UM fields file.""" diff --git a/cf/data/data.py b/cf/data/data.py index 9a273f15ab..a009348076 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -32,7 +32,7 @@ free_memory, parse_indices, ) -from ..mixin2 import CFANetCDF, Container +from ..mixin2 import Container from ..units import Units from .collapse import Collapse from .dask_utils import ( @@ -68,7 +68,7 @@ _dtype_bool = np.dtype(bool) -class Data(DataClassDeprecationsMixin, CFANetCDF, Container, cfdm.Data): +class Data(DataClassDeprecationsMixin, Container, cfdm.Data): """An N-dimensional data array with units and masked values. * Contains an N-dimensional, indexable and broadcastable array with @@ -132,20 +132,6 @@ class Data(DataClassDeprecationsMixin, CFANetCDF, Container, cfdm.Data): """ - # Constants used to specify which components should be cleared - # when a new dask array is set. See `_clear_after_dask_update` for - # details. - # - # These must constants must have values 2**N (N>=1), except for - # `_NONE` which must be 0, and `_ALL` which must be the sum of - # other constants. It is therefore convenient to define these - # constants in binary. - _NONE = 0b000 - _ARRAY = 0b001 - _CACHE = 0b010 - _CFA = 0b100 - _ALL = 0b111 - def __new__(cls, *args, **kwargs): """Store component classes.""" instance = super().__new__(cls) @@ -702,42 +688,42 @@ def __setitem__(self, indices, value): return - def _cfa_del_write(self): - """Set the CFA write status of the data to `False`. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_get_write`, `_cfa_set_write` - - :Returns: - - `bool` - The CFA status prior to deletion. - - """ - return self._custom.pop("cfa_write", False) - - def _cfa_set_term(self, value): - """Set the CFA aggregation instruction term status. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_get_term`, `cfa_set_term` - - :Parameters: - - status: `bool` - The new CFA aggregation instruction term status. - - :Returns: - - `None` - - """ - if not value: - self._custom.pop("cfa_term", None) - - self._custom["cfa_term"] = bool(value) + # def _cfa_del_write(self): + # """Set the CFA write status of the data to `False`. + # + # .. versionadded:: 3.15.0 + # + # .. seealso:: `cfa_get_write`, `_cfa_set_write` + # + # :Returns: + # + # `bool` + # The CFA status prior to deletion. + # + # """ + # return self._custom.pop("cfa_write", False) + # + # def _cfa_set_term(self, value): + # """Set the CFA aggregation instruction term status. + # + # .. versionadded:: 3.15.0 + # + # .. seealso:: `cfa_get_term`, `cfa_set_term` + # + # :Parameters: + # + # status: `bool` + # The new CFA aggregation instruction term status. + # + # :Returns: + # + # `None` + # + # """ + # if not value: + # self._custom.pop("cfa_term", None) + # + # self._custom["cfa_term"] = bool(value) def _is_abstract_Array_subclass(self, array): """Whether or not an array is a type of Array. @@ -753,29 +739,29 @@ def _is_abstract_Array_subclass(self, array): """ return isinstance(array, cfdm.Array) - def _cfa_set_write(self, status): - """Set the CFA write status of the data. - - If and only if the CFA write status is True then it may be - possible to write the data as an aggregation variable to a - CFA-netCDF file. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_get_write`, `cfa_set_write`, - `_cfa_del_write`, `cf.read`, `cf.write`, - - :Parameters: - - status: `bool` - The new CFA write status. - - :Returns: - - `None` - - """ - self._custom["cfa_write"] = bool(status) + # def _cfa_set_write(self, status): + # """Set the CFA write status of the data. + # + # If and only if the CFA write status is True then it may be + # possible to write the data as an aggregation variable to a + # CFA-netCDF file. + # + # .. versionadded:: 3.15.0 + # + # .. seealso:: `cfa_get_write`, `cfa_set_write`, + # `_cfa_del_write`, `cf.read`, `cf.write`, + # + # :Parameters: + # + # status: `bool` + # The new CFA write status. + # + # :Returns: + # + # `None` + # + # """ + # self._custom["cfa_write"] = bool(status) def _update_deterministic(self, other): """Update the deterministic name status. @@ -1685,109 +1671,109 @@ def ceil(self, inplace=False, i=False): d._set_dask(dx) return d - def cfa_get_term(self): - """The CFA aggregation instruction term status. - - If True then the data represents that of a non-standard CFA - aggregation instruction variable. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_set_term` - - :Returns: - - `bool` - - **Examples** - - >>> d = cf.Data([1, 2]) - >>> d.cfa_get_term() - False - - """ - return bool(self._custom.get("cfa_term", False)) - - def cfa_get_write(self): - """The CFA write status of the data. - - If and only if the CFA write status is True then it may be - possible to write the data as an aggregation variable to a - CFA-netCDF file. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_set_write`, `cf.read`, `cf.write` - - :Returns: - - `bool` - - **Examples** - - >>> d = cf.Data([1, 2]) - >>> d.cfa_get_write() - False - - """ - return bool(self._custom.get("cfa_write", False)) - - def cfa_set_term(self, status): - """Set the CFA aggregation instruction term status. - - If True then the data represents that of a non-standard CFA - aggregation instruction variable. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_get_term` - - :Parameters: - - status: `bool` - The new CFA aggregation instruction term status. - - :Returns: - - `None` - - """ - if status: - raise ValueError( - "'cfa_set_term' only allows the CFA aggregation instruction " - "term write status to be set to False" - ) - - self._custom.pop("cfa_term", False) - - def cfa_set_write(self, status): - """Set the CFA write status of the data. - - If and only if the CFA write status is True then it may be - possible to write the data as an aggregation variable to a - CFA-netCDF file. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_get_write`, `cf.read`, `cf.write` - - :Parameters: - - status: `bool` - The new CFA write status. - - :Returns: - - `None` - - """ - if status: - raise ValueError( - "'cfa_set_write' only allows the CFA write status to be " - "set to False" - ) - - self._cfa_del_write() + # def cfa_get_term(self): + # """The CFA aggregation instruction term status. + # + # If True then the data represents that of a non-standard CFA + # aggregation instruction variable. + # + # .. versionadded:: 3.15.0 + # + # .. seealso:: `cfa_set_term` + # + # :Returns: + # + # `bool` + # + # **Examples** + # + # >>> d = cf.Data([1, 2]) + # >>> d.cfa_get_term() + # False + # + # """ + # return bool(self._custom.get("cfa_term", False)) + # + # def cfa_get_write(self): + # """The CFA write status of the data. + # + # If and only if the CFA write status is True then it may be + # possible to write the data as an aggregation variable to a + # CFA-netCDF file. + # + # .. versionadded:: 3.15.0 + # + # .. seealso:: `cfa_set_write`, `cf.read`, `cf.write` + # + # :Returns: + # + # `bool` + # + # **Examples** + # + # >>> d = cf.Data([1, 2]) + # >>> d.cfa_get_write() + # False + # + # """ + # return bool(self._custom.get("cfa_write", False)) + # + # def cfa_set_term(self, status): + # """Set the CFA aggregation instruction term status. + # + # If True then the data represents that of a non-standard CFA + # aggregation instruction variable. + # + # .. versionadded:: 3.15.0 + # + # .. seealso:: `cfa_get_term` + # + # :Parameters: + # + # status: `bool` + # The new CFA aggregation instruction term status. + # + # :Returns: + # + # `None` + # + # """ + # if status: + # raise ValueError( + # "'cfa_set_term' only allows the CFA aggregation instruction " + # "term write status to be set to False" + # ) + # + # self._custom.pop("cfa_term", False) + # + # def cfa_set_write(self, status): + # """Set the CFA write status of the data. + # + # If and only if the CFA write status is True then it may be + # possible to write the data as an aggregation variable to a + # CFA-netCDF file. + # + # .. versionadded:: 3.15.0 + # + # .. seealso:: `cfa_get_write`, `cf.read`, `cf.write` + # + # :Parameters: + # + # status: `bool` + # The new CFA write status. + # + # :Returns: + # + # `None` + # + # """ + # if status: + # raise ValueError( + # "'cfa_set_write' only allows the CFA write status to be " + # "set to False" + # ) + # + # self._cfa_del_write() @_inplace_enabled(default=False) def convolution_filter( @@ -2193,46 +2179,46 @@ def _asreftime(self, inplace=False): return d - def _clear_after_dask_update(self, clear=None): - """Remove components invalidated by updating the `dask` array. - - Removes or modifies components that can't be guaranteed to be - consistent with an updated `dask` array. See the *clear* - parameter for details. - - .. versionadded:: NEXTVERSION - - .. seealso:: `_del_Array`, `_del_cached_elements`, - `_set_dask`, `_cfa_del_write` - - :Parameters: - - clear: `int` or `None`, optional - Specify which components to remove, determined by - sequentially combining an integer value of *clear* - with the relevant class-level constants (such as - ``{{class}}._ARRAY``), using the bitwise AND (&) - operator. If ``clear & `` is - True then the corresponding component is cleared. The - default value of `None` is equivalent to *clear* being - set to ``{{class}}._ALL``. - - The bitwise OR (^) operator can be used to retain a - component (or components) but remove all others. For - instance, if *clear* is ``{{class}}._ALL ^ - {{class}}._CACHE`` then all components except the - cached array values will be removed. - - :Returns: - - `int` TODODASK - - """ - clear = super()._clear_after_dask_update(clear) - - if clear & self._CFA: - # Set the CFA write status to False - self._cfa_del_write() + # def _clear_after_dask_update(self, clear=None): + # """Remove components invalidated by updating the `dask` array. + # + # Removes or modifies components that can't be guaranteed to be + # consistent with an updated `dask` array. See the *clear* + # parameter for details. + # + # .. versionadded:: NEXTVERSION + # + # .. seealso:: `_del_Array`, `_del_cached_elements`, + # `_set_dask`, `_cfa_del_write` + # + # :Parameters: + # + # clear: `int` or `None`, optional + # Specify which components to remove, determined by + # sequentially combining an integer value of *clear* + # with the relevant class-level constants (such as + # ``{{class}}._ARRAY``), using the bitwise AND (&) + # operator. If ``clear & `` is + # True then the corresponding component is cleared. The + # default value of `None` is equivalent to *clear* being + # set to ``{{class}}._ALL``. + # + # The bitwise OR (^) operator can be used to retain a + # component (or components) but remove all others. For + # instance, if *clear* is ``{{class}}._ALL ^ + # {{class}}._CACHE`` then all components except the + # cached array values will be removed. + # + # :Returns: + # + # `int` TODODASK + # + # """ + # clear = super()._clear_after_dask_update(clear) + # + # if clear & self._CFA: + # # Set the CFA write status to False + # self._cfa_del_write() def _combined_units(self, data1, method, inplace): """Combines by given method the data's units with other units. @@ -2891,243 +2877,6 @@ def _regrid( return d - @classmethod - def concatenate( - cls, data, axis=0, cull_graph=False, relaxed_units=False, copy=True - ): - """Join a sequence of data arrays together. - - .. seealso:: `cull_graph` - - :Parameters: - - data: sequence of `Data` - The data arrays to be concatenated. Concatenation is - carried out in the order given. Each data array must have - equivalent units and the same shape, except in the - concatenation axis. Note that scalar arrays are treated as - if they were one dimensional. - - axis: `int`, optional - The axis along which the arrays will be joined. The - default is 0. Note that scalar arrays are treated as if - they were one dimensional. - - .. note:: If the axis specified is cyclic, it will become - non-cyclic in the output. - - {{cull_graph: `bool`, optional}} - - .. versionadded:: 3.14.0 - - {{relaxed_units: `bool`, optional}} - - .. versionadded:: 3.14.1 - - copy: `bool`, optional - If True (the default) then make copies of the data, if - required, prior to the concatenation, thereby ensuring - that the input data arrays are not changed by the - concatenation process. If False then some or all input - data arrays might be changed in-place, but the - concatenation process will be faster. - - .. versionadded:: 3.15.1 - - :Returns: - - `Data` - The concatenated data. - - **Examples** - - >>> d = cf.Data([[1, 2], [3, 4]], 'km') - >>> e = cf.Data([[5.0, 6.0]], 'metre') - >>> f = cf.Data.concatenate((d, e)) - >>> print(f.array) - [[ 1. 2. ] - [ 3. 4. ] - [ 0.005 0.006]] - >>> f.equals(cf.Data.concatenate((d, e), axis=-2)) - True - - >>> e = cf.Data([[5.0], [6.0]], 'metre') - >>> f = cf.Data.concatenate((d, e), axis=1) - >>> print(f.array) - [[ 1. 2. 0.005] - [ 3. 4. 0.006]] - - >>> d = cf.Data(1, 'km') - >>> e = cf.Data(50.0, 'metre') - >>> f = cf.Data.concatenate((d, e)) - >>> print(f.array) - [ 1. 0.05] - - >>> e = cf.Data([50.0, 75.0], 'metre') - >>> f = cf.Data.concatenate((d, e)) - >>> print(f.array) - [ 1. 0.05 0.075] - - """ - data = tuple(data) - if len(data) < 2: - raise ValueError( - "Can't concatenate: Must provide at least two data arrays" - ) - - if cull_graph: - # Remove unnecessary components from the graph, which may - # improve performance, and because complicated task graphs - # can sometimes confuse da.concatenate. - for d in data: - d.cull_graph() - - data0 = data[0] - units0 = data0.Units - - if copy: - data0 = data0.copy() - copied = True - else: - copied = False - - processed_data = [] - for index, data1 in enumerate(data): - # Turn any scalar array into a 1-d array - if not data1.ndim: - if not copied: - data1 = data1.copy() - copied = True - - data1.insert_dimension(inplace=True) - - # Check and conform, if necessary, the units of all inputs - units1 = data1.Units - if ( - relaxed_units - and not units0.isvalid - and not units1.isvalid - and units0.__dict__ == units1.__dict__ - ): - # Allow identical invalid units to be equal - pass - elif units0.equals(units1): - pass - elif units0.equivalent(units1): - if not copied: - data1 = data1.copy() - copied = True - - data1.Units = units0 - else: - raise ValueError( - "Can't concatenate: All the input arrays must have " - "equivalent units" - ) - - processed_data.append(data1) - copied = not copy # to avoid making two copies in a given case - - # Get data as dask arrays and apply concatenation - # operation. We can set '_asanyarray=False' because at compute - # time the concatenation operation does not need to access the - # actual data. - dxs = [d.to_dask_array(_asanyarray=False) for d in processed_data] - dx = da.concatenate(dxs, axis=axis) - - # Set the CFA write status - # - # Assume at first that all input data instances have True - # status, but ... - cfa = cls._CFA - for d in processed_data: - if not d.cfa_get_write(): - # ... the CFA write status is False when any input - # data instance has False status ... - cfa = cls._NONE - break - - if cfa != cls._NONE: - non_concat_axis_chunks0 = list(processed_data[0].chunks) - non_concat_axis_chunks0.pop(axis) - for d in processed_data[1:]: - non_concat_axis_chunks = list(d.chunks) - non_concat_axis_chunks.pop(axis) - if non_concat_axis_chunks != non_concat_axis_chunks0: - # ... the CFA write status is False when any two - # input data instances have different chunk - # patterns for the non-concatenated axes. - cfa = cls._NONE - break - - # Define the __asanyarray__ status - asanyarray = processed_data[0].__asanyarray__ - for d in processed_data[1:]: - if d.__asanyarray__ != asanyarray: - # If and only if any two input Data objects have - # different __asanyarray__ values, then set - # asanyarray=True on the concatenation. - asanyarray = True - break - - # Set the new dask array - data0._set_dask(dx, clear=cls._ALL ^ cfa, asanyarray=asanyarray) - - # Set appropriate cached elements - cached_elements = {} - for i in (0, -1): - element = processed_data[i]._get_cached_elements().get(i) - if element is not None: - cached_elements[i] = element - - if cached_elements: - data0._set_cached_elements(cached_elements) - - # Set whether or not the concatenated name is deterministic - deterministic = True - for d in processed_data: - if not d.has_deterministic_name(): - deterministic = False - break - - data0._update_deterministic(deterministic) - - # Set the CFA-netCDF aggregated data instructions and file - # name substitutions by combining them from all of the input - # data instances, giving precedence to those towards the left - # hand side of the input list. - if data0.cfa_get_write(): - aggregated_data = {} - substitutions = {} - for d in processed_data[::-1]: - aggregated_data.update(d.cfa_get_aggregated_data()) - substitutions.update(d.cfa_file_substitutions()) - - if aggregated_data: - data0.cfa_set_aggregated_data(aggregated_data) - - if substitutions: - data0.cfa_update_file_substitutions(substitutions) - - # Set the CFA aggregation instruction term status - if data0.cfa_get_term(): - for d in processed_data[1:]: - if not d.cfa_get_term(): - data0.cfa_set_term(False) - break - - # Manage cyclicity of axes: if join axis was cyclic, it is no - # longer. - axis = data0._parse_axes(axis)[0] - if axis in data0.cyclic(): - logger.warning( - f"Concatenating along a cyclic axis ({axis}) therefore the " - "axis has been set as non-cyclic in the output." - ) - data0.cyclic(axes=axis, iscyclic=False) - - return data0 - def __add__(self, other): """The binary arithmetic operation ``+`` @@ -3526,6 +3275,119 @@ def is_masked(self): return bool(dx.any()) + @classmethod + def _concatenate_conform_units(cls, data1, units0, relaxed_units, copy): + """Check and conform the units of data prior to concatenation. + + This is a helper function for `concatenate` that may be easily + overridden in sublcasses, to allow for customisation of the + concatenation process. + + .. versionadded:: NEXTVERSION + + .. seealso:: `concatenate` + + :Parameters: + + data1: `{{class}}` + Data with units. + + units0: `Units` + The units to conform *data1* to. + + {{relaxed_units: `bool`, optional}} + + copy: `bool` + If False then modify *data1* in-place. Otherwise a + copy of it is modified. + + :Returns: + + `{{class}}` + Returns *data1*, possibly modified so that it conforms + to *units0*. If *copy* is False and *data1* is + modified, then it is done so in-place. + + """ + # Check and conform, if necessary, the units of all inputs + units1 = data1.Units + if ( + relaxed_units + and not units0.isvalid + and not units1.isvalid + and units0.__dict__ == units1.__dict__ + ): + # Allow identical invalid units to be equal + pass + elif units0.equals(units1): + pass + elif units0.equivalent(units1): + if copy: + data1 = data1.copy() + + data1.Units = units0 + else: + raise ValueError( + "Can't concatenate: All the input arrays must have " + "equivalent units. Got {units0!r} and {units1!r}" + ) + + return data1 + + @classmethod + def _concatenate_post_process( + cls, concatenated_data, axis, conformed_data + ): + """Post-process concatenated data. + + This is a helper function for `concatenate` that may be easily + overridden in sublcasses, to allow for customisation of the + concatenation process. + + .. versionadded:: NEXTVERSION + + .. seealso:: `concatenate` + + :Parameters: + + concatenated_data: `{{class}}` + The concatenated data array. + + axis: `int` + The axis of concatenation. + + conformed_data: sequence of `{{class}}` + The ordered sequence of data arrays that were + concatenated. + + :Returns: + + `{{class}}` + Returns *concatenated_data*, possibly modified + in-place. + + """ + # Manage cyclicity of axes: if join axis was cyclic, it is no + # longer. + axis = concatenated_data._parse_axes(axis)[0] + if axis in concatenated_data.cyclic(): + logger.warning( + f"Concatenating along a cyclic axis ({axis}) therefore the " + "axis has been set as non-cyclic in the output." + ) + concatenated_data.cyclic(axes=axis, iscyclic=False) + + # Set whether or not the concatenated name is deterministic + deterministic = True + for d in conformed_data: + if not d.has_deterministic_name(): + deterministic = False + break + + concatenated_data._update_deterministic(deterministic) + + return concatenated_data + @_inplace_enabled(default=False) def arctan(self, inplace=False): """Take the trigonometric inverse tangent of the data element- @@ -4293,57 +4155,57 @@ def get_deterministic_name(self): units._canonical_calendar, ) - def add_file_location(self, location): - """Add a new file location in-place. - - All data definitions that reference files are additionally - referenced from the given location. - - .. versionadded:: 3.15.0 - - .. seealso:: `del_file_location`, `file_locations` - - :Parameters: - - location: `str` - The new location. - - :Returns: - - `str` - The new location as an absolute path with no trailing - path name component separator. - - **Examples** - - >>> d.add_file_location('/data/model/') - '/data/model' - - """ - location = abspath(location).rstrip(sep) - - updated = False - - # The dask graph is never going to be computed, so we can set - # '_asanyarray=False'. - dsk = self.todict(_asanyarray=False) - for key, a in dsk.items(): - try: - dsk[key] = a.add_file_location(location) - except AttributeError: - # This chunk doesn't contain a file array - continue - - # This chunk contains a file array and the dask graph has - # been updated - updated = True - - if updated: - dx = self.to_dask_array(_asanyarray=False) - dx = da.Array(dsk, dx.name, dx.chunks, dx.dtype, dx._meta) - self._set_dask(dx, clear=self._NONE, asanyarray=None) - - return location + # def add_file_location(self, location): + # """Add a new file location in-place. + # + # All data definitions that reference files are additionally + # referenced from the given location. + # + # .. versionadded:: 3.15.0 + # + # .. seealso:: `del_file_location`, `file_locations` + # + # :Parameters: + # + # location: `str` + # The new location. + # + # :Returns: + # + # `str` + # The new location as an absolute path with no trailing + # path name component separator. + # + # **Examples** + # + # >>> d.add_file_location('/data/model/') + # '/data/model' + # + # """ + # location = abspath(location).rstrip(sep) + # + # updated = False + # + # # The dask graph is never going to be computed, so we can set + # # '_asanyarray=False'. + # dsk = self.todict(_asanyarray=False) + # for key, a in dsk.items(): + # try: + # dsk[key] = a.add_file_location(location) + # except AttributeError: + # # This chunk doesn't contain a file array + # continue + # + # # This chunk contains a file array and the dask graph has + # # been updated + # updated = True + # + # if updated: + # dx = self.to_dask_array(_asanyarray=False) + # dx = da.Array(dsk, dx.name, dx.chunks, dx.dtype, dx._meta) + # self._set_dask(dx, clear=self._NONE, asanyarray=None) + # + # return location def set_units(self, value): """Set the units. @@ -6216,40 +6078,40 @@ def has_deterministic_name(self): """ return self._custom.get("has_deterministic_name", False) - def file_locations(self): - """The locations of files containing parts of the data. - - Returns the locations of any files that may be required to - deliver the computed data array. - - .. versionadded:: 3.15.0 - - .. seealso:: `add_file_location`, `del_file_location` - - :Returns: - - `set` - The unique file locations as absolute paths with no - trailing path name component separator. - - **Examples** - - >>> d.file_locations() - {'/home/data1', 'file:///data2'} - - """ - out = set() - - # The dask graph is never going to be computed, so we can set - # '_asanyarray=False'. - for key, a in self.todict(_asanyarray=False).items(): - try: - out.update(a.file_locations()) - except AttributeError: - # This chunk doesn't contain a file array - pass - - return out + # def file_locations(self): + # """The locations of files containing parts of the data. + # + # Returns the locations of any files that may be required to + # deliver the computed data array. + # + # .. versionadded:: 3.15.0 + # + # .. seealso:: `add_file_location`, `del_file_location` + # + # :Returns: + # + # `set` + # The unique file locations as absolute paths with no + # trailing path name component separator. + # + # **Examples** + # + # >>> d.file_locations() + # {'/home/data1', 'file:///data2'} + # + # """ + # out = set() + # + # # The dask graph is never going to be computed, so we can set + # # '_asanyarray=False'. + # for key, a in self.todict(_asanyarray=False).items(): + # try: + # out.update(a.file_locations()) + # except AttributeError: + # # This chunk doesn't contain a file array + # pass + # + # return out def flat(self, ignore_masked=True): """Return a flat iterator over elements of the data array. @@ -6770,57 +6632,57 @@ def masked_invalid(self, inplace=False): d._set_dask(dx) return d - def del_file_location(self, location): - """Remove a file location in-place. - - All data definitions that reference files will have references - to files in the given location removed from them. - - .. versionadded:: 3.15.0 - - .. seealso:: `add_file_location`, `file_locations` - - :Parameters: - - location: `str` - The file location to remove. - - :Returns: - - `str` - The removed location as an absolute path with no - trailing path name component separator. - - **Examples** - - >>> d.del_file_location('/data/model/') - '/data/model' - - """ - location = abspath(location).rstrip(sep) - - updated = False - - # The dask graph is never going to be computed, so we can set - # '_asanyarray=False'. - dsk = self.todict(_asanyarray=False) - for key, a in dsk.items(): - try: - dsk[key] = a.del_file_location(location) - except AttributeError: - # This chunk doesn't contain a file array - continue - - # This chunk contains a file array and the dask graph has - # been updated - updated = True - - if updated: - dx = self.to_dask_array(_asanyarray=False) - dx = da.Array(dsk, dx.name, dx.chunks, dx.dtype, dx._meta) - self._set_dask(dx, clear=self._NONE, asanyarray=None) - - return location + # def del_file_location(self, location): + # """Remove a file location in-place. + # + # All data definitions that reference files will have references + # to files in the given location removed from them. + # + # .. versionadded:: 3.15.0 + # + # .. seealso:: `add_file_location`, `file_locations` + # + # :Parameters: + # + # location: `str` + # The file location to remove. + # + # :Returns: + # + # `str` + # The removed location as an absolute path with no + # trailing path name component separator. + # + # **Examples** + # + # >>> d.del_file_location('/data/model/') + # '/data/model' + # + # """ + # location = abspath(location).rstrip(sep) + # + # updated = False + # + # # The dask graph is never going to be computed, so we can set + # # '_asanyarray=False'. + # dsk = self.todict(_asanyarray=False) + # for key, a in dsk.items(): + # try: + # dsk[key] = a.del_file_location(location) + # except AttributeError: + # # This chunk doesn't contain a file array + # continue + # + # # This chunk contains a file array and the dask graph has + # # been updated + # updated = True + # + # if updated: + # dx = self.to_dask_array(_asanyarray=False) + # dx = da.Array(dsk, dx.name, dx.chunks, dx.dtype, dx._meta) + # self._set_dask(dx, clear=self._NONE, asanyarray=None) + # + # return location @classmethod def masked_all( diff --git a/cf/data/fragment/__init__.py b/cf/data/fragment/__init__.py index b7315107d4..c97dacf225 100644 --- a/cf/data/fragment/__init__.py +++ b/cf/data/fragment/__init__.py @@ -1,5 +1,5 @@ -from .fullfragmentarray import FullFragmentArray -from .h5netcdffragmentarray import H5netcdfFragmentArray -from .netcdffragmentarray import NetCDFFragmentArray -from .netcdf4fragmentarray import NetCDF4FragmentArray -from .umfragmentarray import UMFragmentArray +#from .fullfragmentarray import FullFragmentArray +#from .h5netcdffragmentarray import H5netcdfFragmentArray +#from .netcdffragmentarray import NetCDFFragmentArray +#from .netcdf4fragmentarray import NetCDF4FragmentArray +#from .umfragmentarray import UMFragmentArray diff --git a/cf/field.py b/cf/field.py index d9059cf235..48f80bf42f 100644 --- a/cf/field.py +++ b/cf/field.py @@ -2604,164 +2604,6 @@ def cell_area( return w - def cfa_clear_file_substitutions( - self, - ): - """Remove all of the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Returns: - - `dict` - {{Returns cfa_clear_file_substitutions}} - - **Examples** - - >>> f.cfa_clear_file_substitutions() - {} - - """ - out = super().cfa_clear_file_substitution() - - for c in self.constructs.filter_by_data(todict=True).values(): - out.update(c.cfa_clear_file_substitutions()) - - return out - - def cfa_del_file_substitution( - self, - base, - constructs=True, - ): - """Remove a CFA-netCDF file name substitution. - - .. versionadded:: 3.15.0 - - :Parameters: - - {{cfa base: `str`}} - - constructs: `bool`, optional - If True (the default) then metadata constructs also - have the file substitutions removed from them. - - :Returns: - - `dict` - {{Returns cfa_del_file_substitution}} - - **Examples** - - >>> f.cfa_del_file_substitution('base') - - """ - super().cfa_del_file_substitution(base) - - if constructs: - for c in self.constructs.filter_by_data(todict=True).values(): - c.cfa_del_file_substitution(base) - - def cfa_file_substitutions(self, constructs=True): - """Return the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Returns: - - `dict` - {{Returns cfa_file_substitutions}} - - **Examples** - - >>> f.cfa_file_substitutions() - {} - - """ - out = super().cfa_file_substitutions() - - if constructs: - for c in self.constructs.filter_by_data(todict=True).values(): - out.update(c.cfa_file_substitutions()) - - return out - - def del_file_location( - self, - location, - constructs=True, - ): - """Remove a file location in-place. - - All data definitions that reference files will have references - to files in the given location removed from them. - - .. versionadded:: 3.15.0 - - .. seealso:: `add_file_location`, `file_locations` - - :Parameters: - - location: `str` - The file location to remove. - - constructs: `bool`, optional - If True (the default) then metadata constructs also - have the new file location removed from them. - - :Returns: - - `str` - The removed location as an absolute path with no - trailing path name component separator. - - **Examples** - - >>> d.del_file_location('/data/model/') - '/data/model' - - """ - location = abspath(location).rstrip(sep) - super().del_file_location(location) - - if constructs: - for c in self.constructs.filter_by_data(todict=True).values(): - c.del_file_location(location) - - return location - - def cfa_update_file_substitutions( - self, - substitutions, - constructs=True, - ): - """Set CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Parameters: - - {{cfa substitutions: `dict`}} - - constructs: `bool`, optional - If True (the default) then metadata constructs also - have the file substitutions set on them. - - :Returns: - - `None` - - **Examples** - - >>> f.cfa_update_file_substitutions({'base': '/data/model'}) - - """ - super().cfa_update_file_substitutions(substitutions) - - if constructs: - for c in self.constructs.filter_by_data(todict=True).values(): - c.cfa_update_file_substitutions(substitutions) - def get_domain(self): """Return the domain. @@ -10855,40 +10697,40 @@ def cumsum( return f - def file_locations(self, constructs=True): - """The locations of files containing parts of the data. - - Returns the locations of any files that may be required to - deliver the computed data array. - - .. versionadded:: 3.15.0 - - .. seealso:: `add_file_location`, `del_file_location` - - :Parameters: - - constructs: `bool`, optional - If True (the default) then the file locations from - metadata constructs are also returned. - - :Returns: - - `set` - The unique file locations as absolute paths with no - trailing path name component separator. - - **Examples** - - >>> f.file_locations() - {'/home/data1', 'file:///data2'} - - """ - out = super().file_locations() - if constructs: - for c in self.constructs.filter_by_data(todict=True).values(): - out.update(c.file_locations()) - - return out + # def file_locations(self, constructs=True): + # """The locations of files containing parts of the data. + # + # Returns the locations of any files that may be required to + # deliver the computed data array. + # + # .. versionadded:: 3.15.0 + # + # .. seealso:: `add_file_location`, `del_file_location` + # + # :Parameters: + # + # constructs: `bool`, optional + # If True (the default) then the file locations from + # metadata constructs are also returned. + # + # :Returns: + # + # `set` + # The unique file locations as absolute paths with no + # trailing path name component separator. + # + # **Examples** + # + # >>> f.file_locations() + # {'/home/data1', 'file:///data2'} + # + # """ + # out = super().file_locations() + # if constructs: + # for c in self.constructs.filter_by_data(todict=True).values(): + # out.update(c.file_locations()) + # + # return out @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) @@ -13505,47 +13347,47 @@ def subspace(self): """ return SubspaceField(self) - def add_file_location( - self, - location, - constructs=True, - ): - """Add a new file location in-place. - - All data definitions that reference files are additionally - referenced from the given location. - - .. versionadded:: 3.15.0 - - .. seealso:: `del_file_location`, `file_locations` - - :Parameters: - - location: `str` - The new location. - - constructs: `bool`, optional - If True (the default) then metadata constructs also - have the new file location added to them. - - :Returns: - - `str` - The new location as an absolute path with no trailing - path name component separator. - - **Examples** - - >>> f.add_file_location('/data/model/') - '/data/model' - - """ - location = super().add_file_location(location) - if constructs: - for c in self.constructs.filter_by_data(todict=True).values(): - c.add_file_location(location) - - return location + # def add_file_location( + # self, + # location, + # constructs=True, + # ): + # """Add a new file location in-place. + # + # All data definitions that reference files are additionally + # referenced from the given location. + # + # .. versionadded:: 3.15.0 + # + # .. seealso:: `del_file_location`, `file_locations` + # + # :Parameters: + # + # location: `str` + # The new location. + # + # constructs: `bool`, optional + # If True (the default) then metadata constructs also + # have the new file location added to them. + # + # :Returns: + # + # `str` + # The new location as an absolute path with no trailing + # path name component separator. + # + # **Examples** + # + # >>> f.add_file_location('/data/model/') + # '/data/model' + # + # """ + # location = super().add_file_location(location) + # if constructs: + # for c in self.constructs.filter_by_data(todict=True).values(): + # c.add_file_location(location) + # + # return location def section(self, axes=None, stop=None, min_step=1, **kwargs): """Return a FieldList of m dimensional sections of a Field of n diff --git a/cf/functions.py b/cf/functions.py index 04e35e84f1..bf47bb7d19 100644 --- a/cf/functions.py +++ b/cf/functions.py @@ -27,7 +27,7 @@ from dask.base import is_dask_collection from psutil import virtual_memory -from . import __cfa_version__, __file__, __version__ +from . import __file__, __version__ from .constants import ( CONSTANTS, OperandBoundsCombination, @@ -1360,6 +1360,8 @@ def CF(): def CFA(): """The version of the CFA conventions. + Deprecated at version NEXTVERSION and is no longer available. + This indicates which version of the CFA conventions are represented by this release of the cf package, and therefore the version can not be changed. @@ -1380,8 +1382,9 @@ def CFA(): '0.6.2' """ - return __cfa_version__ - + _DEPRECATION_ERROR_FUNCTION( + "CFA", version="NEXTVERSION", removed_at="5.0.0" + ) # pragma: no cover # Module-level alias to avoid name clashes with function keyword # arguments (corresponding to 'import atol as cf_atol' etc. in other diff --git a/cf/mixin2/cfanetcdf.py b/cf/mixin2/cfanetcdf.py index ad1396ce71..390717df47 100644 --- a/cf/mixin2/cfanetcdf.py +++ b/cf/mixin2/cfanetcdf.py @@ -17,491 +17,493 @@ class CFANetCDF(NetCDFMixin): """ - def cfa_del_aggregated_data(self): - """Remove the CFA-netCDF aggregation instruction terms. - The aggregation instructions are stored in the - ``aggregation_data`` attribute of a CFA-netCDF aggregation - variable. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_get_aggregated_data`, - `cfa_has_aggregated_data`, - `cfa_set_aggregated_data` - - :Returns: - - `dict` - The removed CFA-netCDF aggregation instruction terms. - - **Examples** - - >>> f.cfa_set_aggregated_data( - ... {'location': 'cfa_location', - ... 'file': 'cfa_file', - ... 'address': 'cfa_address', - ... 'format': 'cfa_format', - ... 'tracking_id': 'tracking_id'} - ... ) - >>> f.cfa_has_aggregated_data() - True - >>> f.cfa_get_aggregated_data() - {'location': 'cfa_location', - 'file': 'cfa_file', - 'address': 'cfa_address', - 'format': 'c ', - 'tracking_id': 'tracking_id'} - >>> f.cfa_del_aggregated_data() - {'location': 'cfa_location', - 'file': 'cfa_file', - 'address': 'cfa_address', - 'format': 'cfa_format', - 'tracking_id': 'tracking_id'} - >>> f.cfa_has_aggregated_data() - False - >>> f.cfa_del_aggregated_data() - {} - >>> f.cfa_get_aggregated_data() - {} - - """ - return self._nc_del("cfa_aggregated_data", {}).copy() - - def cfa_get_aggregated_data(self): - """Return the CFA-netCDF aggregation instruction terms. - - The aggregation instructions are stored in the - ``aggregation_data`` attribute of a CFA-netCDF aggregation - variable. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_del_aggregated_data`, - `cfa_has_aggregated_data`, - `cfa_set_aggregated_data` - - :Returns: - - `dict` - The aggregation instruction terms and their - corresponding netCDF variable names in a dictionary - whose key/value pairs are the aggregation instruction - terms and their corresponding variable names. - - **Examples** - - >>> f.cfa_set_aggregated_data( - ... {'location': 'cfa_location', - ... 'file': 'cfa_file', - ... 'address': 'cfa_address', - ... 'format': 'cfa_format', - ... 'tracking_id': 'tracking_id'} - ... ) - >>> f.cfa_has_aggregated_data() - True - >>> f.cfa_get_aggregated_data() - {'location': 'cfa_location', - 'file': 'cfa_file', - 'address': 'cfa_address', - 'format': 'cfa_format', - 'tracking_id': 'tracking_id'} - >>> f.cfa_del_aggregated_data() - {'location': 'cfa_location', - 'file': 'cfa_file', - 'address': 'cfa_address', - 'format': 'cfa_format', - 'tracking_id': 'tracking_id'} - >>> f.cfa_has_aggregated_data() - False - >>> f.cfa_del_aggregated_data() - {} - >>> f.cfa_get_aggregated_data() - {} - - """ - out = self._nc_get("cfa_aggregated_data", default=None) - if out is not None: - return out.copy() - - return {} - - def cfa_has_aggregated_data(self): - """Whether any CFA-netCDF aggregation instruction terms have been set. - - The aggregation instructions are stored in the - ``aggregation_data`` attribute of a CFA-netCDF aggregation - variable. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_del_aggregated_data`, - `cfa_get_aggregated_data`, - `cfa_set_aggregated_data` - - :Returns: - - `bool` - `True` if the CFA-netCDF aggregation instruction terms - have been set, otherwise `False`. - - **Examples** - - >>> f.cfa_set_aggregated_data( - ... {'location': 'cfa_location', - ... 'file': 'cfa_file', - ... 'address': 'cfa_address', - ... 'format': 'cfa_format', - ... 'tracking_id': 'tracking_id'} - ... ) - >>> f.cfa_has_aggregated_data() - True - >>> f.cfa_get_aggregated_data() - {'location': 'cfa_location', - 'file': 'cfa_file', - 'address': 'cfa_address', - 'format': 'cfa_format', - 'tracking_id': 'tracking_id'} - >>> f.cfa_del_aggregated_data() - {'location': 'cfa_location', - 'file': 'cfa_file', - 'address': 'cfa_address', - 'format': 'cfa_format', - 'tracking_id': 'tracking_id'} - >>> f.cfa_has_aggregated_data() - False - >>> f.cfa_del_aggregated_data() - {} - >>> f.cfa_get_aggregated_data() - {} - - """ - return self._nc_has("cfa_aggregated_data") - - def cfa_set_aggregated_data(self, value): - """Set the CFA-netCDF aggregation instruction terms. - - The aggregation instructions are stored in the - ``aggregation_data`` attribute of a CFA-netCDF aggregation - variable. - - If there are any ``/`` (slash) characters in the netCDF - variable names then these act as delimiters for a group - hierarchy. By default, or if the name starts with a ``/`` - character and contains no others, the name is assumed to be in - the root group. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_del_aggregated_data`, - `cfa_get_aggregated_data`, - `cfa_has_aggregated_data` - - :Parameters: - - value: `str` or `dict` - The aggregation instruction terms and their - corresponding netCDF variable names. Either a - CFA-netCDF-compliant string value of an - ``aggregated_data`` attribute, or a dictionary whose - key/value pairs are the aggregation instruction terms - and their corresponding variable names. - - :Returns: - - `None` - - **Examples** - - >>> f.cfa_set_aggregated_data( - ... {'location': 'cfa_location', - ... 'file': 'cfa_file', - ... 'address': 'cfa_address', - ... 'format': 'cfa_format', - ... 'tracking_id': 'tracking_id'} - ... ) - >>> f.cfa_has_aggregated_data() - True - >>> f.cfa_get_aggregated_data() - {'location': 'cfa_location', - 'file': 'cfa_file', - 'address': 'cfa_address', - 'format': 'cfa_format', - 'tracking_id': 'tracking_id'} - >>> f.cfa_del_aggregated_data() - {'location': 'cfa_location', - 'file': 'cfa_file', - 'address': 'cfa_address', - 'format': 'cfa_format', - 'tracking_id': 'tracking_id'} - >>> f.cfa_has_aggregated_data() - False - >>> f.cfa_del_aggregated_data() - {} - >>> f.cfa_get_aggregated_data() - {} - - """ - if value: - if isinstance(value, str): - v = split("\s+", value) - value = {term[:-1]: var for term, var in zip(v[::2], v[1::2])} - else: - # 'value' is a dictionary - value = value.copy() - - self._nc_set("cfa_aggregated_data", value) - - def cfa_clear_file_substitutions(self): - """Remove all of the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_del_file_substitution`, - `cfa_file_substitutions`, - `cfa_has_file_substitutions`, - `cfa_update_file_substitutions` - - :Returns: - - `dict` - {{Returns cfa_clear_file_substitutions}} - - **Examples** - - >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) - >>> f.cfa_has_file_substitutions() - True - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/'} - >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/', '${base2}': '/home/data/'} - >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) - >>> f.cfa_file_substitutions() - {'${base}': '/new/location/', '${base2}': '/home/data/'} - >>> f.cfa_del_file_substitution('${base}') - {'${base}': '/new/location/'} - >>> f.cfa_clear_file_substitutions() - {'${base2}': '/home/data/'} - >>> f.cfa_has_file_substitutions() - False - >>> f.cfa_file_substitutions() - {} - >>> f.cfa_clear_file_substitutions() - {} - >>> print(f.cfa_del_file_substitution('base', None)) - None - - """ - return self._nc_del("cfa_file_substitutions", {}).copy() - - def cfa_del_file_substitution(self, base): - """Remove a CFA-netCDF file name substitution. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_clear_file_substitutions`, - `cfa_file_substitutions`, - `cfa_has_file_substitutions`, - `cfa_update_file_substitutions` - - :Parameters: - - {{cfa base: `str`}} - - :Returns: - - `dict` - {{Returns cfa_del_file_substitution}} - - **Examples** - - >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) - >>> f.cfa_has_file_substitutions() - True - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/'} - >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/', '${base2}': '/home/data/'} - >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) - >>> f.cfa_file_substitutions() - {'${base}': '/new/location/', '${base2}': '/home/data/'} - >>> f.cfa_del_file_substitution('${base}') - {'${base}': '/new/location/'} - >>> f.cfa_clear_file_substitutions() - {'${base2}': '/home/data/'} - >>> f.cfa_has_file_substitutions() - False - >>> f.cfa_file_substitutions() - {} - >>> f.cfa_clear_file_substitutions() - {} - >>> print(f.cfa_del_file_substitution('base')) - {} - - """ - if not (base.startswith("${") and base.endswith("}")): - base = f"${{{base}}}" - - subs = self.cfa_file_substitutions() - if base not in subs: - return {} - - out = {base: subs.pop(base)} - if subs: - self._nc_set("cfa_file_substitutions", subs) - else: - self._nc_del("cfa_file_substitutions", None) - - return out - - def cfa_file_substitutions(self): - """Return the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_clear_file_substitutions`, - `cfa_del_file_substitution`, - `cfa_has_file_substitutions`, - `cfa_update_file_substitutions` - :Returns: - - `dict` - The CFA-netCDF file name substitutions. - - **Examples** - - >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) - >>> f.cfa_has_file_substitutions() - True - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/'} - >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/', '${base2}': '/home/data/'} - >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) - >>> f.cfa_file_substitutions() - {'${base}': '/new/location/', '${base2}': '/home/data/'} - >>> f.cfa_del_file_substitution('${base}') - {'${base}': '/new/location/'} - >>> f.cfa_clear_file_substitutions() - {'${base2}': '/home/data/'} - >>> f.cfa_has_file_substitutions() - False - >>> f.cfa_file_substitutions() - {} - >>> f.cfa_clear_file_substitutions() - {} - >>> print(f.cfa_del_file_substitution('base', None)) - None - - """ - out = self._nc_get("cfa_file_substitutions", default=None) - if out is not None: - return out.copy() - - return {} - - def cfa_has_file_substitutions(self): - """Whether any CFA-netCDF file name substitutions have been set. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_clear_file_substitutions`, - `cfa_del_file_substitution`, - `cfa_file_substitutions`, - `cfa_update_file_substitutions` - - :Returns: - - `bool` - `True` if any CFA-netCDF file name substitutions have - been set, otherwise `False`. - - **Examples** - - >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) - >>> f.cfa_has_file_substitutions() - True - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/'} - >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/', '${base2}': '/home/data/'} - >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) - >>> f.cfa_file_substitutions() - {'${base}': '/new/location/', '${base2}': '/home/data/'} - >>> f.cfa_del_file_substitution('${base}') - {'${base}': '/new/location/'} - >>> f.cfa_clear_file_substitutions() - {'${base2}': '/home/data/'} - >>> f.cfa_has_file_substitutions() - False - >>> f.cfa_file_substitutions() - {} - >>> f.cfa_clear_file_substitutions() - {} - >>> print(f.cfa_del_file_substitution('base', None)) - None - - """ - return self._nc_has("cfa_file_substitutions") - - def cfa_update_file_substitutions(self, substitutions): - """Set CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - .. seealso:: `cfa_clear_file_substitutions`, - `cfa_del_file_substitution`, - `cfa_file_substitutions`, - `cfa_has_file_substitutions` - - :Parameters: - - {{cfa substitutions: `dict`}} - - :Returns: - - `None` - - **Examples** - - >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) - >>> f.cfa_has_file_substitutions() - True - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/'} - >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) - >>> f.cfa_file_substitutions() - {'${base}': 'file:///data/', '${base2}': '/home/data/'} - >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) - >>> f.cfa_file_substitutions() - {'${base}': '/new/location/', '${base2}': '/home/data/'} - >>> f.cfa_del_file_substitution('${base}') - {'${base}': '/new/location/'} - >>> f.cfa_clear_file_substitutions() - {'${base2}': '/home/data/'} - >>> f.cfa_has_file_substitutions() - False - >>> f.cfa_file_substitutions() - {} - >>> f.cfa_clear_file_substitutions() - {} - >>> print(f.cfa_del_file_substitution('base', None)) - None - - """ - if not substitutions: - return - - substitutions = substitutions.copy() - for base, sub in tuple(substitutions.items()): - if not (base.startswith("${") and base.endswith("}")): - substitutions[f"${{{base}}}"] = substitutions.pop(base) - - subs = self.cfa_file_substitutions() - subs.update(substitutions) - self._nc_set("cfa_file_substitutions", subs) +# +# def cfa_del_aggregated_data(self): +# """Remove the CFA-netCDF aggregation instruction terms. +# +# The aggregation instructions are stored in the +# ``aggregation_data`` attribute of a CFA-netCDF aggregation +# variable. +# +# .. versionadded:: 3.15.0 +# +# .. seealso:: `cfa_get_aggregated_data`, +# `cfa_has_aggregated_data`, +# `cfa_set_aggregated_data` +# +# :Returns: +# +# `dict` +# The removed CFA-netCDF aggregation instruction terms. +# +# **Examples** +# +# >>> f.cfa_set_aggregated_data( +# ... {'location': 'cfa_location', +# ... 'file': 'cfa_file', +# ... 'address': 'cfa_address', +# ... 'format': 'cfa_format', +# ... 'tracking_id': 'tracking_id'} +# ... ) +# >>> f.cfa_has_aggregated_data() +# True +# >>> f.cfa_get_aggregated_data() +# {'location': 'cfa_location', +# 'file': 'cfa_file', +# 'address': 'cfa_address', +# 'format': 'c ', +# 'tracking_id': 'tracking_id'} +# >>> f.cfa_del_aggregated_data() +# {'location': 'cfa_location', +# 'file': 'cfa_file', +# 'address': 'cfa_address', +# 'format': 'cfa_format', +# 'tracking_id': 'tracking_id'} +# >>> f.cfa_has_aggregated_data() +# False +# >>> f.cfa_del_aggregated_data() +# {} +# >>> f.cfa_get_aggregated_data() +# {} +# +# """ +# return self._nc_del("cfa_aggregated_data", {}).copy() +# +# def cfa_get_aggregated_data(self): +# """Return the CFA-netCDF aggregation instruction terms. +# +# The aggregation instructions are stored in the +# ``aggregation_data`` attribute of a CFA-netCDF aggregation +# variable. +# +# .. versionadded:: 3.15.0 +# +# .. seealso:: `cfa_del_aggregated_data`, +# `cfa_has_aggregated_data`, +# `cfa_set_aggregated_data` +# +# :Returns: +# +# `dict` +# The aggregation instruction terms and their +# corresponding netCDF variable names in a dictionary +# whose key/value pairs are the aggregation instruction +# terms and their corresponding variable names. +# +# **Examples** +# +# >>> f.cfa_set_aggregated_data( +# ... {'location': 'cfa_location', +# ... 'file': 'cfa_file', +# ... 'address': 'cfa_address', +# ... 'format': 'cfa_format', +# ... 'tracking_id': 'tracking_id'} +# ... ) +# >>> f.cfa_has_aggregated_data() +# True +# >>> f.cfa_get_aggregated_data() +# {'location': 'cfa_location', +# 'file': 'cfa_file', +# 'address': 'cfa_address', +# 'format': 'cfa_format', +# 'tracking_id': 'tracking_id'} +# >>> f.cfa_del_aggregated_data() +# {'location': 'cfa_location', +# 'file': 'cfa_file', +# 'address': 'cfa_address', +# 'format': 'cfa_format', +# 'tracking_id': 'tracking_id'} +# >>> f.cfa_has_aggregated_data() +# False +# >>> f.cfa_del_aggregated_data() +# {} +# >>> f.cfa_get_aggregated_data() +# {} +# +# """ +# out = self._nc_get("cfa_aggregated_data", default=None) +# if out is not None: +# return out.copy() +# +# return {} +# +# def cfa_has_aggregated_data(self): +# """Whether any CFA-netCDF aggregation instruction terms have been set. +# +# The aggregation instructions are stored in the +# ``aggregation_data`` attribute of a CFA-netCDF aggregation +# variable. +# +# .. versionadded:: 3.15.0 +# +# .. seealso:: `cfa_del_aggregated_data`, +# `cfa_get_aggregated_data`, +# `cfa_set_aggregated_data` +# +# :Returns: +# +# `bool` +# `True` if the CFA-netCDF aggregation instruction terms +# have been set, otherwise `False`. +# +# **Examples** +# +# >>> f.cfa_set_aggregated_data( +# ... {'location': 'cfa_location', +# ... 'file': 'cfa_file', +# ... 'address': 'cfa_address', +# ... 'format': 'cfa_format', +# ... 'tracking_id': 'tracking_id'} +# ... ) +# >>> f.cfa_has_aggregated_data() +# True +# >>> f.cfa_get_aggregated_data() +# {'location': 'cfa_location', +# 'file': 'cfa_file', +# 'address': 'cfa_address', +# 'format': 'cfa_format', +# 'tracking_id': 'tracking_id'} +# >>> f.cfa_del_aggregated_data() +# {'location': 'cfa_location', +# 'file': 'cfa_file', +# 'address': 'cfa_address', +# 'format': 'cfa_format', +# 'tracking_id': 'tracking_id'} +# >>> f.cfa_has_aggregated_data() +# False +# >>> f.cfa_del_aggregated_data() +# {} +# >>> f.cfa_get_aggregated_data() +# {} +# +# """ +# return self._nc_has("cfa_aggregated_data") +# +# def cfa_set_aggregated_data(self, value): +# """Set the CFA-netCDF aggregation instruction terms. +# +# The aggregation instructions are stored in the +# ``aggregation_data`` attribute of a CFA-netCDF aggregation +# variable. +# +# If there are any ``/`` (slash) characters in the netCDF +# variable names then these act as delimiters for a group +# hierarchy. By default, or if the name starts with a ``/`` +# character and contains no others, the name is assumed to be in +# the root group. +# +# .. versionadded:: 3.15.0 +# +# .. seealso:: `cfa_del_aggregated_data`, +# `cfa_get_aggregated_data`, +# `cfa_has_aggregated_data` +# +# :Parameters: +# +# value: `str` or `dict` +# The aggregation instruction terms and their +# corresponding netCDF variable names. Either a +# CFA-netCDF-compliant string value of an +# ``aggregated_data`` attribute, or a dictionary whose +# key/value pairs are the aggregation instruction terms +# and their corresponding variable names. +# +# :Returns: +# +# `None` +# +# **Examples** +# +# >>> f.cfa_set_aggregated_data( +# ... {'location': 'cfa_location', +# ... 'file': 'cfa_file', +# ... 'address': 'cfa_address', +# ... 'format': 'cfa_format', +# ... 'tracking_id': 'tracking_id'} +# ... ) +# >>> f.cfa_has_aggregated_data() +# True +# >>> f.cfa_get_aggregated_data() +# {'location': 'cfa_location', +# 'file': 'cfa_file', +# 'address': 'cfa_address', +# 'format': 'cfa_format', +# 'tracking_id': 'tracking_id'} +# >>> f.cfa_del_aggregated_data() +# {'location': 'cfa_location', +# 'file': 'cfa_file', +# 'address': 'cfa_address', +# 'format': 'cfa_format', +# 'tracking_id': 'tracking_id'} +# >>> f.cfa_has_aggregated_data() +# False +# >>> f.cfa_del_aggregated_data() +# {} +# >>> f.cfa_get_aggregated_data() +# {} +# +# """ +# if value: +# if isinstance(value, str): +# v = split("\s+", value) +# value = {term[:-1]: var for term, var in zip(v[::2], v[1::2])} +# else: +# # 'value' is a dictionary +# value = value.copy() +# +# self._nc_set("cfa_aggregated_data", value) +# +# def cfa_clear_file_substitutions(self): +# """Remove all of the CFA-netCDF file name substitutions. +# +# .. versionadded:: 3.15.0 +# +# .. seealso:: `cfa_del_file_substitution`, +# `cfa_file_substitutions`, +# `cfa_has_file_substitutions`, +# `cfa_update_file_substitutions` +# +# :Returns: +# +# `dict` +# {{Returns cfa_clear_file_substitutions}} +# +# **Examples** +# +# >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) +# >>> f.cfa_has_file_substitutions() +# True +# >>> f.cfa_file_substitutions() +# {'${base}': 'file:///data/'} +# >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) +# >>> f.cfa_file_substitutions() +# {'${base}': 'file:///data/', '${base2}': '/home/data/'} +# >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) +# >>> f.cfa_file_substitutions() +# {'${base}': '/new/location/', '${base2}': '/home/data/'} +# >>> f.cfa_del_file_substitution('${base}') +# {'${base}': '/new/location/'} +# >>> f.cfa_clear_file_substitutions() +# {'${base2}': '/home/data/'} +# >>> f.cfa_has_file_substitutions() +# False +# >>> f.cfa_file_substitutions() +# {} +# >>> f.cfa_clear_file_substitutions() +# {} +# >>> print(f.cfa_del_file_substitution('base', None)) +# None +# +# """ +# return self._nc_del("cfa_file_substitutions", {}).copy() +# +# def cfa_del_file_substitution(self, base): +# """Remove a CFA-netCDF file name substitution. +# +# .. versionadded:: 3.15.0 +# +# .. seealso:: `cfa_clear_file_substitutions`, +# `cfa_file_substitutions`, +# `cfa_has_file_substitutions`, +# `cfa_update_file_substitutions` +# +# :Parameters: +# +# {{cfa base: `str`}} +# +# :Returns: +# +# `dict` +# {{Returns cfa_del_file_substitution}} +# +# **Examples** +# +# >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) +# >>> f.cfa_has_file_substitutions() +# True +# >>> f.cfa_file_substitutions() +# {'${base}': 'file:///data/'} +# >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) +# >>> f.cfa_file_substitutions() +# {'${base}': 'file:///data/', '${base2}': '/home/data/'} +# >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) +# >>> f.cfa_file_substitutions() +# {'${base}': '/new/location/', '${base2}': '/home/data/'} +# >>> f.cfa_del_file_substitution('${base}') +# {'${base}': '/new/location/'} +# >>> f.cfa_clear_file_substitutions() +# {'${base2}': '/home/data/'} +# >>> f.cfa_has_file_substitutions() +# False +# >>> f.cfa_file_substitutions() +# {} +# >>> f.cfa_clear_file_substitutions() +# {} +# >>> print(f.cfa_del_file_substitution('base')) +# {} +# +# """ +# if not (base.startswith("${") and base.endswith("}")): +# base = f"${{{base}}}" +# +# subs = self.cfa_file_substitutions() +# if base not in subs: +# return {} +# +# out = {base: subs.pop(base)} +# if subs: +# self._nc_set("cfa_file_substitutions", subs) +# else: +# self._nc_del("cfa_file_substitutions", None) +# +# return out +# +# def cfa_file_substitutions(self): +# """Return the CFA-netCDF file name substitutions. +# +# .. versionadded:: 3.15.0 +# +# .. seealso:: `cfa_clear_file_substitutions`, +# `cfa_del_file_substitution`, +# `cfa_has_file_substitutions`, +# `cfa_update_file_substitutions` +# :Returns: +# +# `dict` +# The CFA-netCDF file name substitutions. +# +# **Examples** +# +# >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) +# >>> f.cfa_has_file_substitutions() +# True +# >>> f.cfa_file_substitutions() +# {'${base}': 'file:///data/'} +# >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) +# >>> f.cfa_file_substitutions() +# {'${base}': 'file:///data/', '${base2}': '/home/data/'} +# >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) +# >>> f.cfa_file_substitutions() +# {'${base}': '/new/location/', '${base2}': '/home/data/'} +# >>> f.cfa_del_file_substitution('${base}') +# {'${base}': '/new/location/'} +# >>> f.cfa_clear_file_substitutions() +# {'${base2}': '/home/data/'} +# >>> f.cfa_has_file_substitutions() +# False +# >>> f.cfa_file_substitutions() +# {} +# >>> f.cfa_clear_file_substitutions() +# {} +# >>> print(f.cfa_del_file_substitution('base', None)) +# None +# +# """ +# out = self._nc_get("cfa_file_substitutions", default=None) +# if out is not None: +# return out.copy() +# +# return {} +# +# def cfa_has_file_substitutions(self): +# """Whether any CFA-netCDF file name substitutions have been set. +# +# .. versionadded:: 3.15.0 +# +# .. seealso:: `cfa_clear_file_substitutions`, +# `cfa_del_file_substitution`, +# `cfa_file_substitutions`, +# `cfa_update_file_substitutions` +# +# :Returns: +# +# `bool` +# `True` if any CFA-netCDF file name substitutions have +# been set, otherwise `False`. +# +# **Examples** +# +# >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) +# >>> f.cfa_has_file_substitutions() +# True +# >>> f.cfa_file_substitutions() +# {'${base}': 'file:///data/'} +# >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) +# >>> f.cfa_file_substitutions() +# {'${base}': 'file:///data/', '${base2}': '/home/data/'} +# >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) +# >>> f.cfa_file_substitutions() +# {'${base}': '/new/location/', '${base2}': '/home/data/'} +# >>> f.cfa_del_file_substitution('${base}') +# {'${base}': '/new/location/'} +# >>> f.cfa_clear_file_substitutions() +# {'${base2}': '/home/data/'} +# >>> f.cfa_has_file_substitutions() +# False +# >>> f.cfa_file_substitutions() +# {} +# >>> f.cfa_clear_file_substitutions() +# {} +# >>> print(f.cfa_del_file_substitution('base', None)) +# None +# +# """ +# return self._nc_has("cfa_file_substitutions") +# +# def cfa_update_file_substitutions(self, substitutions): +# """Set CFA-netCDF file name substitutions. +# +# .. versionadded:: 3.15.0 +# +# .. seealso:: `cfa_clear_file_substitutions`, +# `cfa_del_file_substitution`, +# `cfa_file_substitutions`, +# `cfa_has_file_substitutions` +# +# :Parameters: +# +# {{cfa substitutions: `dict`}} +# +# :Returns: +# +# `None` +# +# **Examples** +# +# >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) +# >>> f.cfa_has_file_substitutions() +# True +# >>> f.cfa_file_substitutions() +# {'${base}': 'file:///data/'} +# >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) +# >>> f.cfa_file_substitutions() +# {'${base}': 'file:///data/', '${base2}': '/home/data/'} +# >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) +# >>> f.cfa_file_substitutions() +# {'${base}': '/new/location/', '${base2}': '/home/data/'} +# >>> f.cfa_del_file_substitution('${base}') +# {'${base}': '/new/location/'} +# >>> f.cfa_clear_file_substitutions() +# {'${base2}': '/home/data/'} +# >>> f.cfa_has_file_substitutions() +# False +# >>> f.cfa_file_substitutions() +# {} +# >>> f.cfa_clear_file_substitutions() +# {} +# >>> print(f.cfa_del_file_substitution('base', None)) +# None +# +# """ +# if not substitutions: +# return +# +# substitutions = substitutions.copy() +# for base, sub in tuple(substitutions.items()): +# if not (base.startswith("${") and base.endswith("}")): +# substitutions[f"${{{base}}}"] = substitutions.pop(base) +# +# subs = self.cfa_file_substitutions() +# subs.update(substitutions) +# self._nc_set("cfa_file_substitutions", subs) diff --git a/cf/read_write/netcdf/netcdfread.py b/cf/read_write/netcdf/netcdfread.py index 54d92305fd..5c26412ccb 100644 --- a/cf/read_write/netcdf/netcdfread.py +++ b/cf/read_write/netcdf/netcdfread.py @@ -9,995 +9,997 @@ class NetCDFRead(cfdm.read_write.netcdf.NetCDFRead): """ - def _ncdimensions(self, ncvar, ncdimensions=None, parent_ncvar=None): - """Return a list of the netCDF dimensions corresponding to a - netCDF variable. - If the variable has been compressed then the *implied - uncompressed* dimensions are returned. - - For a CFA variable, the netCDF dimensions are taken from the - 'aggregated_dimensions' netCDF attribute. - - .. versionadded:: 3.0.0 - - :Parameters: - - ncvar: `str` - The netCDF variable name. - - ncdimensions: sequence of `str`, optional - Use these netCDF dimensions, rather than retrieving them - from the netCDF variable itself. This allows the - dimensions of a domain variable to be parsed. Note that - this only parameter only needs to be used once because the - parsed domain dimensions are automatically stored in - `self.read_var['domain_ncdimensions'][ncvar]`. - - .. versionadded:: 3.11.0 - - parent_ncvar: `str`, optional - TODO - - .. versionadded:: TODO - - :Returns: - - `list` - The netCDF dimension names spanned by the netCDF variable. - - **Examples** - - >>> n._ncdimensions('humidity') - ['time', 'lat', 'lon'] - - For a variable compressed by gathering: - - dimensions: - lat=73; - lon=96; - landpoint=2381; - depth=4; - variables: - int landpoint(landpoint); - landpoint:compress="lat lon"; - float landsoilt(depth,landpoint); - landsoilt:long_name="soil temperature"; - landsoilt:units="K"; - - we would have - - >>> n._ncdimensions('landsoilt') - ['depth', 'lat', 'lon'] - - """ - - if not self._is_cfa_variable(ncvar): - return super()._ncdimensions( - ncvar, ncdimensions=ncdimensions, parent_ncvar=parent_ncvar - ) - - # Still here? Then we have a CFA variable. - ncdimensions = self.read_vars["variable_attributes"][ncvar][ - "aggregated_dimensions" - ].split() - - return list(map(str, ncdimensions)) - - def _get_domain_axes(self, ncvar, allow_external=False, parent_ncvar=None): - """Return the domain axis identifiers that correspond to a - netCDF variable's netCDF dimensions. - - For a CFA variable, the netCDF dimensions are taken from the - 'aggregated_dimensions' netCDF attribute. - - :Parameter: - - ncvar: `str` - The netCDF variable name. - - allow_external: `bool` - If `True` and *ncvar* is an external variable then return an - empty list. - - parent_ncvar: `str`, optional - TODO - - .. versionadded:: TODO - - :Returns: - - `list` - - **Examples** - - >>> r._get_domain_axes('areacello') - ['domainaxis0', 'domainaxis1'] - - >>> r._get_domain_axes('areacello', allow_external=True) - [] - - """ - if not self._is_cfa_variable(ncvar): - return super()._get_domain_axes( - ncvar=ncvar, - allow_external=allow_external, - parent_ncvar=parent_ncvar, - ) - - # ------------------------------------------------------------ - # Still here? Then we have a CFA-netCDF variable. - # ------------------------------------------------------------ - g = self.read_vars - - ncdimensions = g["variable_attributes"][ncvar][ - "aggregated_dimensions" - ].split() - - ncdim_to_axis = g["ncdim_to_axis"] - axes = [ - ncdim_to_axis[ncdim] - for ncdim in ncdimensions - if ncdim in ncdim_to_axis - ] - - return axes - - def _create_data( - self, - ncvar, - construct=None, - unpacked_dtype=False, - uncompress_override=None, - parent_ncvar=None, - coord_ncvar=None, - cfa_term=None, - compression_index=False, - ): - """Create data for a netCDF or CFA-netCDF variable. - - .. versionadded:: 3.0.0 - - :Parameters: - - ncvar: `str` - The name of the netCDF variable that contains the - data. See the *cfa_term* parameter. - - construct: optional - - unpacked_dtype: `False` or `numpy.dtype`, optional - - uncompress_override: `bool`, optional - - coord_ncvar: `str`, optional - - cfa_term: `dict`, optional - The name of a non-standard aggregation instruction - term from which to create the data. If set then - *ncvar* must be the value of the term in the - ``aggregation_data`` attribute. - - .. versionadded:: 3.15.0 - - compression_index: `bool`, optional - True if the data being created are compression - indices. - - .. versionadded:: 3.15.2 - - :Returns: - - `Data` - - """ - if not cfa_term and not self._is_cfa_variable(ncvar): - # Create data for a normal netCDF variable - data = super()._create_data( - ncvar=ncvar, - construct=construct, - unpacked_dtype=unpacked_dtype, - uncompress_override=uncompress_override, - parent_ncvar=parent_ncvar, - coord_ncvar=coord_ncvar, - ) - - # Set the CFA write status to True when there is exactly - # one dask chunk - if data.npartitions == 1: - data._cfa_set_write(True) - - # if ( - # not compression_index - # and self.read_vars.get("cache") - # and self.implementation.get_construct_type(construct) - # != "field" - # ): - # # Only cache values from non-field data and - # # non-compression-index data, on the assumptions that: - # # - # # a) Field data is, in general, so large that finding - # # the cached values takes too long. - # # - # # b) Cached values are never really required for - # # compression index data. - # self._cache_data_elements(data, ncvar) - - return data - - # ------------------------------------------------------------ - # Still here? Create data for a CFA variable - # ------------------------------------------------------------ - if construct is not None: - # Remove the aggregation attributes from the construct - self.implementation.del_property( - construct, "aggregated_dimensions", None - ) - aggregated_data = self.implementation.del_property( - construct, "aggregated_data", None - ) - else: - aggregated_data = None - - if cfa_term: - term, term_ncvar = tuple(cfa_term.items())[0] - cfa_array, kwargs = self._create_cfanetcdfarray_term( - ncvar, term, term_ncvar - ) - else: - cfa_array, kwargs = self._create_cfanetcdfarray( - ncvar, - unpacked_dtype=unpacked_dtype, - coord_ncvar=coord_ncvar, - ) - - attributes = kwargs["attributes"] - data = self._create_Data( - cfa_array, - ncvar, - units=attributes.get("units"), - calendar=attributes.get("calendar"), - ) - - # Note: We don't cache elements from CFA variables, because - # the data are in fragment files which have not been - # opened and may not not even be openable (such as could - # be the case if a fragment file was on tape storage). - - # Set the CFA write status to True iff each non-aggregated - # axis has exactly one dask storage chunk - if cfa_term: - data._cfa_set_term(True) - else: - cfa_write = True - for n, numblocks in zip( - cfa_array.get_fragment_shape(), data.numblocks - ): - if n == 1 and numblocks > 1: - # Note: 'n == 1' is True for non-aggregated axes - cfa_write = False - break - - data._cfa_set_write(cfa_write) - - # Store the 'aggregated_data' attribute - if aggregated_data: - data.cfa_set_aggregated_data(aggregated_data) - - # Store the file substitutions - data.cfa_update_file_substitutions(kwargs.get("substitutions")) - - return data - - def _is_cfa_variable(self, ncvar): - """Return True if *ncvar* is a CFA aggregated variable. - - .. versionadded:: 3.14.0 - - :Parameters: - - ncvar: `str` - The name of the netCDF variable. - - :Returns: - - `bool` - Whether or not *ncvar* is a CFA variable. - - """ - g = self.read_vars - return ( - g["cfa"] - and ncvar in g["cfa_aggregated_data"] - and ncvar not in g["external_variables"] - ) - - # def _create_Data( - # self, - # array, - # ncvar, - # units=None, - # calendar=None, - # ncdimensions=(), - # **kwargs, - # ): - # """Create a Data object from a netCDF variable. - # - # .. versionadded:: 3.0.0 - # - # :Parameters: - # - # array: `Array` - # The file array. - # - # ncvar: `str` - # The netCDF variable containing the array. - # - # units: `str`, optional - # The units of *array*. By default, or if `None`, it is - # assumed that there are no units. - # - # calendar: `str`, optional - # The calendar of *array*. By default, or if `None`, it is - # assumed that there is no calendar. - # - # ncdimensions: sequence of `str`, optional - # The netCDF dimensions spanned by the array. - # - # .. versionadded:: 3.14.0 - # - # kwargs: optional - # Extra parameters to pass to the initialisation of the - # returned `Data` object. - # - # :Returns: - # - # `Data` - # - # """ - # if array.dtype is None: - # # The array is based on a netCDF VLEN variable, and - # # therefore has unknown data type. To find the correct - # # data type (e.g. "=1) netCDF string type variable comes out - # # as a numpy object array, so convert it to numpy - # # string array. - # array = array.astype("U", copy=False) - # # NetCDF4 doesn't auto-mask VLEN variables - # array = np.ma.where(array == "", np.ma.masked, array) - # - # # Parse dask chunks - # chunks = self._dask_chunks(array, ncvar, compressed) - # - # data = super()._create_Data( - # array, - # ncvar, - # units=units, - # calendar=calendar, - # chunks=chunks, - # **kwargs, - # ) - # - # return data - - def _customise_read_vars(self): - """Customise the read parameters. - - Take the opportunity to apply CFA updates to - `read_vars['variable_dimensions']` and - `read_vars['do_not_create_field']`. - - .. versionadded:: 3.0.0 - - """ - super()._customise_read_vars() - g = self.read_vars - - if not g["cfa"]: - return - - g["cfa_aggregated_data"] = {} - g["cfa_aggregation_instructions"] = {} - g["cfa_file_substitutions"] = {} - - # ------------------------------------------------------------ - # Still here? Then this is a CFA-netCDF file - # ------------------------------------------------------------ - if g["CFA_version"] < Version("0.6.2"): - raise ValueError( - f"Can't read file {g['filename']} that uses obsolete " - f"CFA conventions version CFA-{g['CFA_version']}. " - "(Note that cf version 3.13.1 can be used to read and " - "write CFA-0.4 files.)" - ) - - # Get the directory of the CFA-netCDF file being read - from os.path import abspath - from pathlib import PurePath - - g["cfa_dir"] = PurePath(abspath(g["filename"])).parent - - # Process the aggregation instruction variables, and the - # aggregated dimensions. - dimensions = g["variable_dimensions"] - attributes = g["variable_attributes"] - - for ncvar, attributes in attributes.items(): - if "aggregated_dimensions" not in attributes: - # This is not an aggregated variable - continue - - # Set the aggregated variable's dimensions as its - # aggregated dimensions - ncdimensions = attributes["aggregated_dimensions"].split() - dimensions[ncvar] = tuple(map(str, ncdimensions)) - - # Do not create fields/domains from aggregation - # instruction variables - parsed_aggregated_data = self._cfa_parse_aggregated_data( - ncvar, attributes.get("aggregated_data") - ) - for term_ncvar in parsed_aggregated_data.values(): - g["do_not_create_field"].add(term_ncvar) - - # def _cache_data_elements(self, data, ncvar): - # """Cache selected element values. - # - # Updates *data* in-place to store its first, second, - # penultimate, and last element values (as appropriate). - # - # These values are used by, amongst other things, - # `cf.Data.equals`, `cf.aggregate` and for inspection. - # - # Doing this here is quite cheap because only the individual - # elements are read from the already-open file, as opposed to - # being retrieved from *data* (which would require a whole dask - # chunk to be read to get each single value). - # - # However, empirical evidence shows that using netCDF4 to access - # the first and last elements of a large array on disk - # (e.g. shape (1, 75, 1207, 1442)) is slow (e.g. ~2 seconds) and - # doesn't scale well with array size (i.e. it takes - # disproportionally longer for larger arrays). Such arrays are - # usually in field constructs, for which `cf.aggregate` does not - # need to know any array values, so this method should be used - # with caution, if at all, on field construct data. - # - # .. versionadded:: 3.14.0 - # - # :Parameters: - # - # data: `Data` - # The data to be updated with its cached values. - # - # ncvar: `str` - # The name of the netCDF variable that contains the - # data. - # - # :Returns: - # - # `None` - # - # """ - # - # if data.data.get_compression_type(): - # # Don't get cached elements from arrays compressed by - # # convention, as they'll likely be wrong. - # return - # - # g = self.read_vars - # - # # Get the netCDF4.Variable for the data - # if g["has_groups"]: - # group, name = self._netCDF4_group( - # g["variable_grouped_dataset"][ncvar], ncvar - # ) - # variable = group.variables.get(name) - # else: - # variable = g["variables"].get(ncvar) - # - # # Get the required element values - # size = data.size - # ndim = data.ndim - # - # char = False - # if variable.ndim == ndim + 1: - # dtype = variable.dtype - # if dtype is not str and dtype.kind in "SU": - # # This variable is a netCDF classic style char array - # # with a trailing dimension that needs to be collapsed - # char = True - # - # if ndim == 1: - # # Also cache the second element for 1-d data, on the - # # assumption that they may well be dimension coordinate - # # data. - # if size == 1: - # indices = (0, -1) - # value = variable[...] - # values = (value, value) - # elif size == 2: - # indices = (0, 1, -1) - # value = variable[-1:] - # values = (variable[:1], value, value) - # else: - # indices = (0, 1, -1) - # values = (variable[:1], variable[1:2], variable[-1:]) - # elif ndim == 2 and data.shape[-1] == 2: - # # Assume that 2-d data with a last dimension of size 2 - # # contains coordinate bounds, for which it is useful to - # # cache the upper and lower bounds of the the first and - # # last cells. - # indices = (0, 1, -2, -1) - # ndim1 = ndim - 1 - # values = ( - # variable[(slice(0, 1),) * ndim1 + (slice(0, 1),)], - # variable[(slice(0, 1),) * ndim1 + (slice(1, 2),)], - # ) - # if data.size == 2: - # values = values + values - # else: - # values += ( - # variable[(slice(-1, None, 1),) * ndim1 + (slice(0, 1),)], - # variable[(slice(-1, None, 1),) * ndim1 + (slice(1, 2),)], - # ) - # elif size == 1: - # indices = (0, -1) - # value = variable[...] - # values = (value, value) - # elif size == 3: - # indices = (0, 1, -1) - # if char: - # values = variable[...].reshape(3, variable.shape[-1]) - # else: - # values = variable[...].flatten() - # else: - # indices = (0, -1) - # values = ( - # variable[(slice(0, 1),) * ndim], - # variable[(slice(-1, None, 1),) * ndim], - # ) - # - # # Create a dictionary of the element values - # elements = {} - # for index, value in zip(indices, values): - # if char: - # # Variable is a netCDF classic style char array, so - # # collapse (by concatenation) the outermost (fastest - # # varying) dimension. E.g. [['a','b','c']] becomes - # # ['abc'] - # if value.dtype.kind == "U": - # value = value.astype("S") - # - # a = netCDF4.chartostring(value) - # shape = a.shape - # a = np.array([x.rstrip() for x in a.flat]) - # a = np.reshape(a, shape) - # value = np.ma.masked_where(a == "", a) - # - # if np.ma.is_masked(value): - # value = np.ma.masked - # else: - # try: - # value = value.item() - # except (AttributeError, ValueError): - # # AttributeError: A netCDF string type scalar - # # variable comes out as Python str object, which - # # has no 'item' method. - # # - # # ValueError: A size-0 array can't be converted to - # # a Python scalar. - # pass - # - # elements[index] = value - # - # # Store the elements in the data object - # data._set_cached_elements(elements) - - def _create_cfanetcdfarray( - self, - ncvar, - unpacked_dtype=False, - coord_ncvar=None, - term=None, - ): - """Create a CFA-netCDF variable array. - - .. versionadded:: 3.14.0 - - :Parameters: - - ncvar: `str` - The name of the CFA-netCDF aggregated variable. See - the *term* parameter. - - unpacked_dtype: `False` or `numpy.dtype`, optional - - coord_ncvar: `str`, optional - - term: `str`, optional - The name of a non-standard aggregation instruction - term from which to create the array. If set then - *ncvar* must be the value of the non-standard term in - the ``aggregation_data`` attribute. - - .. versionadded:: 3.15.0 - - :Returns: - - (`CFANetCDFArray`, `dict`) - The new `CFANetCDFArray` instance and dictionary of - the kwargs used to create it. - - """ - g = self.read_vars - - # Get the kwargs needed to instantiate a general netCDF array - # instance - kwargs = self._create_netcdfarray( - ncvar, - unpacked_dtype=unpacked_dtype, - coord_ncvar=coord_ncvar, - return_kwargs_only=True, - ) - - # Get rid of the incorrect shape. This will end up getting set - # correctly by the CFANetCDFArray instance. - kwargs.pop("shape", None) - aggregated_data = g["cfa_aggregated_data"][ncvar] - - standardised_terms = ("location", "file", "address", "format") - - instructions = [] - aggregation_instructions = {} - for t, term_ncvar in aggregated_data.items(): - if t not in standardised_terms: - continue - - aggregation_instructions[t] = g["cfa_aggregation_instructions"][ - term_ncvar - ] - instructions.append(f"{t}: {term_ncvar}") - - if t == "file": - kwargs["substitutions"] = g["cfa_file_substitutions"].get( - term_ncvar - ) - - kwargs["x"] = aggregation_instructions - kwargs["instructions"] = " ".join(sorted(instructions)) - - # Use the kwargs to create a CFANetCDFArray instance - if g["original_netCDF4"]: - array = self.implementation.initialise_CFANetCDF4Array(**kwargs) - else: - # h5netcdf - array = self.implementation.initialise_CFAH5netcdfArray(**kwargs) - - return array, kwargs - - def _create_cfanetcdfarray_term( - self, - parent_ncvar, - term, - ncvar, - ): - """Create a CFA-netCDF variable array. - - .. versionadded:: 3.14.0 - - :Parameters: - - parent_ncvar: `str` - The name of the CFA-netCDF aggregated variable. See - the *term* parameter. - - term: `str`, optional - The name of a non-standard aggregation instruction - term from which to create the array. If set then - *ncvar* must be the value of the non-standard term in - the ``aggregation_data`` attribute. - - .. versionadded:: 3.15.0 - - ncvar: `str` - The name of the CFA-netCDF aggregated variable. See - the *term* parameter. - - :Returns: - - (`CFANetCDFArray`, `dict`) - The new `CFANetCDFArray` instance and dictionary of - the kwargs used to create it. - - """ - g = self.read_vars - - # Get the kwargs needed to instantiate a general netCDF array - # instance - kwargs = self._create_netcdfarray( - ncvar, - return_kwargs_only=True, - ) - - # Get rid of the incorrect shape. This will end up getting set - # correctly by the CFANetCDFArray instance. - kwargs.pop("shape", None) - - instructions = [] - aggregation_instructions = {} - for t, term_ncvar in g["cfa_aggregated_data"][parent_ncvar].items(): - if t in ("location", term): - aggregation_instructions[t] = g[ - "cfa_aggregation_instructions" - ][term_ncvar] - instructions.append(f"{t}: {ncvar}") - - kwargs["term"] = term - kwargs["dtype"] = aggregation_instructions[term].dtype - kwargs["x"] = aggregation_instructions - kwargs["instructions"] = " ".join(sorted(instructions)) - - if g["original_netCDF4"]: - array = self.implementation.initialise_CFANetCDF4Array(**kwargs) - else: - # h5netcdf - array = self.implementation.initialise_CFAH5netcdfArray(**kwargs) - - return array, kwargs - - # - # def _parse_chunks(self, ncvar): - # """Parse the dask chunks. - # - # .. versionadded:: 3.14.0 - # - # :Parameters: - # - # ncvar: `str` - # The name of the netCDF variable containing the array. - # - # :Returns: - # - # `str`, `int` or `dict` - # The parsed chunks that are suitable for passing to a - # `Data` object containing the variable's array. - # - # """ - # g = self.read_vars - # - # default_chunks = "auto" - # chunks = g.get("chunks", default_chunks) - # - # if chunks is None: - # return -1 - # - # if isinstance(chunks, dict): - # if not chunks: - # return default_chunks - # - # # For ncdimensions = ('time', 'lat'): - # # - # # chunks={} -> ["auto", "auto"] - # # chunks={'ncdim%time': 12} -> [12, "auto"] - # # chunks={'ncdim%time': 12, 'ncdim%lat': 10000} -> [12, 10000] - # # chunks={'ncdim%time': 12, 'ncdim%lat': "20MB"} -> [12, "20MB"] - # # chunks={'ncdim%time': 12, 'latitude': -1} -> [12, -1] - # # chunks={'ncdim%time': 12, 'Y': None} -> [12, None] - # # chunks={'ncdim%time': 12, 'ncdim%lat': (30, 90)} -> [12, (30, 90)] - # # chunks={'ncdim%time': 12, 'ncdim%lat': None, 'X': 5} -> [12, None] - # attributes = g["variable_attributes"] - # chunks2 = [] - # for ncdim in g["variable_dimensions"][ncvar]: - # key = f"ncdim%{ncdim}" - # if key in chunks: - # chunks2.append(chunks[key]) - # continue - # - # found_coord_attr = False - # dim_coord_attrs = attributes.get(ncdim) - # if dim_coord_attrs is not None: - # for attr in ("standard_name", "axis"): - # key = dim_coord_attrs.get(attr) - # if key in chunks: - # found_coord_attr = True - # chunks2.append(chunks[key]) - # break - # - # if not found_coord_attr: - # # Use default chunks for this dimension - # chunks2.append(default_chunks) - # - # chunks = chunks2 - # - # return chunks - - def _customise_field_ancillaries(self, parent_ncvar, f): - """Create customised field ancillary constructs. - - This method currently creates: - - * Field ancillary constructs derived from non-standardised - terms in CFA aggregation instructions. Each construct spans - the same domain axes as the parent field construct. - Constructs are never created for `Domain` instances. - - .. versionadded:: 3.15.0 - - :Parameters: - - parent_ncvar: `str` - The netCDF variable name of the parent variable. - - f: `Field` - The parent field construct. - - :Returns: - - `dict` - A mapping of netCDF variable names to newly-created - construct identifiers. - - **Examples** - - >>> n._customise_field_ancillaries('tas', f) - {} - - >>> n._customise_field_ancillaries('pr', f) - {'tracking_id': 'fieldancillary1'} - - """ - if not self._is_cfa_variable(parent_ncvar): - return {} - - # ------------------------------------------------------------ - # Still here? Then we have a CFA-netCDF variable: Loop round - # the aggregation instruction terms and convert each - # non-standard term into a field ancillary construct that - # spans the same domain axes as the parent field. - # ------------------------------------------------------------ - g = self.read_vars - - standardised_terms = ("location", "file", "address", "format") - - out = {} - for term, term_ncvar in g["cfa_aggregated_data"][parent_ncvar].items(): - if term in standardised_terms: - continue - - if g["variables"][term_ncvar].ndim != f.ndim: - # Can only create field ancillaries with the same rank - # as the field - continue - - # Still here? Then we've got a non-standard aggregation - # term from which we can create a field - # ancillary construct. - anc = self.implementation.initialise_FieldAncillary() - - self.implementation.set_properties( - anc, g["variable_attributes"][term_ncvar] - ) - anc.set_property("long_name", term) - - # Store the term name as the 'id' attribute. This will be - # used as the term name if the field field ancillary is - # written to disk as a non-standard CFA term. - anc.id = term - - data = self._create_data( - parent_ncvar, anc, cfa_term={term: term_ncvar} - ) - - self.implementation.set_data(anc, data, copy=False) - self.implementation.nc_set_variable(anc, term_ncvar) - - key = self.implementation.set_field_ancillary( - f, - anc, - axes=self.implementation.get_field_data_axes(f), - copy=False, - ) - out[term_ncvar] = key - - return out - - def _cfa_parse_aggregated_data(self, ncvar, aggregated_data): - """Parse a CFA-netCDF ``aggregated_data`` attribute. - - .. versionadded:: 3.15.0 - - :Parameters: - - ncvar: `str` - The netCDF variable name. - - aggregated_data: `str` or `None` - The CFA-netCDF ``aggregated_data`` attribute. - - :Returns: - - `dict` - The parsed attribute. - - """ - if not aggregated_data: - return {} - - g = self.read_vars - aggregation_instructions = g["cfa_aggregation_instructions"] - variable_attributes = g["variable_attributes"] - - # Loop round aggregation instruction terms - out = {} - for x in self._parse_x( - ncvar, - aggregated_data, - keys_are_variables=True, - ): - term, term_ncvar = tuple(x.items())[0] - term_ncvar = term_ncvar[0] - out[term] = term_ncvar - - if term_ncvar in aggregation_instructions: - # Already processed this term - continue - - variable = g["variables"][term_ncvar] - array = cfdm.netcdf_indexer( - variable, - mask=True, - unpack=True, - always_masked_array=False, - orthogonal_indexing=False, - copy=False, - ) - aggregation_instructions[term_ncvar] = array[...] - - if term == "file": - # Find URI substitutions that may be stored in the - # CFA file instruction variable's "substitutions" - # attribute - subs = variable_attributes[term_ncvar].get( - "substitutions", - ) - if subs: - # Convert the string "${base}: value" to the - # dictionary {"${base}": "value"} - s = subs.split() - subs = { - base[:-1]: sub for base, sub in zip(s[::2], s[1::2]) - } - - # Apply user-defined substitutions, which take - # precedence over those defined in the file. - subs.update(g["cfa_options"].get("substitutions", {})) - g["cfa_file_substitutions"][term_ncvar] = subs - - g["cfa_aggregated_data"][ncvar] = out - return out +# def _ncdimensions(self, ncvar, ncdimensions=None, parent_ncvar=None): +# """Return a list of the netCDF dimensions corresponding to a +# netCDF variable. +# +# If the variable has been compressed then the *implied +# uncompressed* dimensions are returned. +# +# For a CFA variable, the netCDF dimensions are taken from the +# 'aggregated_dimensions' netCDF attribute. +# +# .. versionadded:: 3.0.0 +# +# :Parameters: +# +# ncvar: `str` +# The netCDF variable name. +# +# ncdimensions: sequence of `str`, optional +# Use these netCDF dimensions, rather than retrieving them +# from the netCDF variable itself. This allows the +# dimensions of a domain variable to be parsed. Note that +# this only parameter only needs to be used once because the +# parsed domain dimensions are automatically stored in +# `self.read_var['domain_ncdimensions'][ncvar]`. +# +# .. versionadded:: 3.11.0 +# +# parent_ncvar: `str`, optional +# TODO +# +# .. versionadded:: TODO +# +# :Returns: +# +# `list` +# The netCDF dimension names spanned by the netCDF variable. +# +# **Examples** +# +# >>> n._ncdimensions('humidity') +# ['time', 'lat', 'lon'] +# +# For a variable compressed by gathering: +# +# dimensions: +# lat=73; +# lon=96; +# landpoint=2381; +# depth=4; +# variables: +# int landpoint(landpoint); +# landpoint:compress="lat lon"; +# float landsoilt(depth,landpoint); +# landsoilt:long_name="soil temperature"; +# landsoilt:units="K"; +# +# we would have +# +# >>> n._ncdimensions('landsoilt') +# ['depth', 'lat', 'lon'] +# +# """ +# +# if not self._is_cfa_variable(ncvar): +# return super()._ncdimensions( +# ncvar, ncdimensions=ncdimensions, parent_ncvar=parent_ncvar +# ) +# +# # Still here? Then we have a CFA variable. +# ncdimensions = self.read_vars["variable_attributes"][ncvar][ +# "aggregated_dimensions" +# ].split() +# +# return list(map(str, ncdimensions)) +# +# def _get_domain_axes(self, ncvar, allow_external=False, parent_ncvar=None): +# """Return the domain axis identifiers that correspond to a +# netCDF variable's netCDF dimensions. +# +# For a CFA variable, the netCDF dimensions are taken from the +# 'aggregated_dimensions' netCDF attribute. +# +# :Parameter: +# +# ncvar: `str` +# The netCDF variable name. +# +# allow_external: `bool` +# If `True` and *ncvar* is an external variable then return an +# empty list. +# +# parent_ncvar: `str`, optional +# TODO +# +# .. versionadded:: TODO +# +# :Returns: +# +# `list` +# +# **Examples** +# +# >>> r._get_domain_axes('areacello') +# ['domainaxis0', 'domainaxis1'] +# +# >>> r._get_domain_axes('areacello', allow_external=True) +# [] +# +# """ +# if not self._is_cfa_variable(ncvar): +# return super()._get_domain_axes( +# ncvar=ncvar, +# allow_external=allow_external, +# parent_ncvar=parent_ncvar, +# ) +# +# # ------------------------------------------------------------ +# # Still here? Then we have a CFA-netCDF variable. +# # ------------------------------------------------------------ +# g = self.read_vars +# +# ncdimensions = g["variable_attributes"][ncvar][ +# "aggregated_dimensions" +# ].split() +# +# ncdim_to_axis = g["ncdim_to_axis"] +# axes = [ +# ncdim_to_axis[ncdim] +# for ncdim in ncdimensions +# if ncdim in ncdim_to_axis +# ] +# +# return axes +# +# def _create_data( +# self, +# ncvar, +# construct=None, +# unpacked_dtype=False, +# uncompress_override=None, +# parent_ncvar=None, +# coord_ncvar=None, +# cfa_term=None, +# compression_index=False, +# ): +# """Create data for a netCDF or CFA-netCDF variable. +# +# .. versionadded:: 3.0.0 +# +# :Parameters: +# +# ncvar: `str` +# The name of the netCDF variable that contains the +# data. See the *cfa_term* parameter. +# +# construct: optional +# +# unpacked_dtype: `False` or `numpy.dtype`, optional +# +# uncompress_override: `bool`, optional +# +# coord_ncvar: `str`, optional +# +# cfa_term: `dict`, optional +# The name of a non-standard aggregation instruction +# term from which to create the data. If set then +# *ncvar* must be the value of the term in the +# ``aggregation_data`` attribute. +# +# .. versionadded:: 3.15.0 +# +# compression_index: `bool`, optional +# True if the data being created are compression +# indices. +# +# .. versionadded:: 3.15.2 +# +# :Returns: +# +# `Data` +# +# """ +# if not cfa_term and not self._is_cfa_variable(ncvar): +# # Create data for a normal netCDF variable +# data = super()._create_data( +# ncvar=ncvar, +# construct=construct, +# unpacked_dtype=unpacked_dtype, +# uncompress_override=uncompress_override, +# parent_ncvar=parent_ncvar, +# coord_ncvar=coord_ncvar, +# ) +# +# # Set the CFA write status to True when there is exactly +# # one dask chunk +# if data.npartitions == 1: +# data._cfa_set_write(True) +# +# # if ( +# # not compression_index +# # and self.read_vars.get("cache") +# # and self.implementation.get_construct_type(construct) +# # != "field" +# # ): +# # # Only cache values from non-field data and +# # # non-compression-index data, on the assumptions that: +# # # +# # # a) Field data is, in general, so large that finding +# # # the cached values takes too long. +# # # +# # # b) Cached values are never really required for +# # # compression index data. +# # self._cache_data_elements(data, ncvar) +# +# return data +# +# # ------------------------------------------------------------ +# # Still here? Create data for a CFA variable +# # ------------------------------------------------------------ +# if construct is not None: +# # Remove the aggregation attributes from the construct +# self.implementation.del_property( +# construct, "aggregated_dimensions", None +# ) +# aggregated_data = self.implementation.del_property( +# construct, "aggregated_data", None +# ) +# else: +# aggregated_data = None +# +# if cfa_term: +# term, term_ncvar = tuple(cfa_term.items())[0] +# cfa_array, kwargs = self._create_cfanetcdfarray_term( +# ncvar, term, term_ncvar +# ) +# else: +# cfa_array, kwargs = self._create_cfanetcdfarray( +# ncvar, +# unpacked_dtype=unpacked_dtype, +# coord_ncvar=coord_ncvar, +# ) +# +# attributes = kwargs["attributes"] +# data = self._create_Data( +# cfa_array, +# ncvar, +# units=attributes.get("units"), +# calendar=attributes.get("calendar"), +# ) +# +# # Note: We don't cache elements from CFA variables, because +# # the data are in fragment files which have not been +# # opened and may not not even be openable (such as could +# # be the case if a fragment file was on tape storage). +# +# # Set the CFA write status to True iff each non-aggregated +# # axis has exactly one dask storage chunk +# if cfa_term: +# data._cfa_set_term(True) +# else: +# cfa_write = True +# for n, numblocks in zip( +# cfa_array.get_fragment_shape(), data.numblocks +# ): +# if n == 1 and numblocks > 1: +# # Note: 'n == 1' is True for non-aggregated axes +# cfa_write = False +# break +# +# data._cfa_set_write(cfa_write) +# +# # Store the 'aggregated_data' attribute +# if aggregated_data: +# data.cfa_set_aggregated_data(aggregated_data) +# +# # Store the file substitutions +# data.cfa_update_file_substitutions(kwargs.get("substitutions")) +# +# return data +# +# def _is_cfa_variable(self, ncvar): +# """Return True if *ncvar* is a CFA aggregated variable. +# +# .. versionadded:: 3.14.0 +# +# :Parameters: +# +# ncvar: `str` +# The name of the netCDF variable. +# +# :Returns: +# +# `bool` +# Whether or not *ncvar* is a CFA variable. +# +# """ +# g = self.read_vars +# return ( +# g["cfa"] +# and ncvar in g["cfa_aggregated_data"] +# and ncvar not in g["external_variables"] +# ) +# +# def _create_Data( +# self, +# array, +# ncvar, +# units=None, +# calendar=None, +# ncdimensions=(), +# **kwargs, +# ): +# """Create a Data object from a netCDF variable. +# +# .. versionadded:: 3.0.0 +# +# :Parameters: +# +# array: `Array` +# The file array. +# +# ncvar: `str` +# The netCDF variable containing the array. +# +# units: `str`, optional +# The units of *array*. By default, or if `None`, it is +# assumed that there are no units. +# +# calendar: `str`, optional +# The calendar of *array*. By default, or if `None`, it is +# assumed that there is no calendar. +# +# ncdimensions: sequence of `str`, optional +# The netCDF dimensions spanned by the array. +# +# .. versionadded:: 3.14.0 +# +# kwargs: optional +# Extra parameters to pass to the initialisation of the +# returned `Data` object. +# +# :Returns: +# +# `Data` +# +# """ +# if array.dtype is None: +# # The array is based on a netCDF VLEN variable, and +# # therefore has unknown data type. To find the correct +# # data type (e.g. "=1) netCDF string type variable comes out +# # as a numpy object array, so convert it to numpy +# # string array. +# array = array.astype("U", copy=False) +# # NetCDF4 doesn't auto-mask VLEN variables +# array = np.ma.where(array == "", np.ma.masked, array) +# +# # Parse dask chunks +# chunks = self._dask_chunks(array, ncvar, compressed) +# +# data = super()._create_Data( +# array, +# ncvar, +# units=units, +# calendar=calendar, +# chunks=chunks, +# **kwargs, +# ) +# +# return data +# +# def _customise_read_vars(self): +# """Customise the read parameters. +# +# Take the opportunity to apply CFA updates to +# `read_vars['variable_dimensions']` and +# `read_vars['do_not_create_field']`. +# +# .. versionadded:: 3.0.0 +# +# """ +# super()._customise_read_vars() +# g = self.read_vars +# +# if not g["cfa"]: +# return +# +# g["cfa_aggregated_data"] = {} +# g["cfa_aggregation_instructions"] = {} +# g["cfa_file_substitutions"] = {} +# +# # ------------------------------------------------------------ +# # Still here? Then this is a CFA-netCDF file +# # ------------------------------------------------------------ +# if g["CFA_version"] < Version("0.6.2"): +# raise ValueError( +# f"Can't read file {g['filename']} that uses obsolete " +# f"CFA conventions version CFA-{g['CFA_version']}. " +# "(Note that cf version 3.13.1 can be used to read and " +# "write CFA-0.4 files.)" +# ) +# +# # Get the directory of the CFA-netCDF file being read +# from os.path import abspath +# from pathlib import PurePath +# +# g["cfa_dir"] = PurePath(abspath(g["filename"])).parent +# +# # Process the aggregation instruction variables, and the +# # aggregated dimensions. +# dimensions = g["variable_dimensions"] +# attributes = g["variable_attributes"] +# +# for ncvar, attributes in attributes.items(): +# if "aggregated_dimensions" not in attributes: +# # This is not an aggregated variable +# continue +# +# # Set the aggregated variable's dimensions as its +# # aggregated dimensions +# ncdimensions = attributes["aggregated_dimensions"].split() +# dimensions[ncvar] = tuple(map(str, ncdimensions)) +# +# # Do not create fields/domains from aggregation +# # instruction variables +# parsed_aggregated_data = self._cfa_parse_aggregated_data( +# ncvar, attributes.get("aggregated_data") +# ) +# for term_ncvar in parsed_aggregated_data.values(): +# g["do_not_create_field"].add(term_ncvar) +# +# def _cache_data_elements(self, data, ncvar): +# """Cache selected element values. +# +# Updates *data* in-place to store its first, second, +# penultimate, and last element values (as appropriate). +# +# These values are used by, amongst other things, +# `cf.Data.equals`, `cf.aggregate` and for inspection. +# +# Doing this here is quite cheap because only the individual +# elements are read from the already-open file, as opposed to +# being retrieved from *data* (which would require a whole dask +# chunk to be read to get each single value). +# +# However, empirical evidence shows that using netCDF4 to access +# the first and last elements of a large array on disk +# (e.g. shape (1, 75, 1207, 1442)) is slow (e.g. ~2 seconds) and +# doesn't scale well with array size (i.e. it takes +# disproportionally longer for larger arrays). Such arrays are +# usually in field constructs, for which `cf.aggregate` does not +# need to know any array values, so this method should be used +# with caution, if at all, on field construct data. +# +# .. versionadded:: 3.14.0 +# +# :Parameters: +# +# data: `Data` +# The data to be updated with its cached values. +# +# ncvar: `str` +# The name of the netCDF variable that contains the +# data. +# +# :Returns: +# +# `None` +# +# """ +# +# if data.data.get_compression_type(): +# # Don't get cached elements from arrays compressed by +# # convention, as they'll likely be wrong. +# return +# +# g = self.read_vars +# +# # Get the netCDF4.Variable for the data +# if g["has_groups"]: +# group, name = self._netCDF4_group( +# g["variable_grouped_dataset"][ncvar], ncvar +# ) +# variable = group.variables.get(name) +# else: +# variable = g["variables"].get(ncvar) +# +# # Get the required element values +# size = data.size +# ndim = data.ndim +# +# char = False +# if variable.ndim == ndim + 1: +# dtype = variable.dtype +# if dtype is not str and dtype.kind in "SU": +# # This variable is a netCDF classic style char array +# # with a trailing dimension that needs to be collapsed +# char = True +# +# if ndim == 1: +# # Also cache the second element for 1-d data, on the +# # assumption that they may well be dimension coordinate +# # data. +# if size == 1: +# indices = (0, -1) +# value = variable[...] +# values = (value, value) +# elif size == 2: +# indices = (0, 1, -1) +# value = variable[-1:] +# values = (variable[:1], value, value) +# else: +# indices = (0, 1, -1) +# values = (variable[:1], variable[1:2], variable[-1:]) +# elif ndim == 2 and data.shape[-1] == 2: +# # Assume that 2-d data with a last dimension of size 2 +# # contains coordinate bounds, for which it is useful to +# # cache the upper and lower bounds of the the first and +# # last cells. +# indices = (0, 1, -2, -1) +# ndim1 = ndim - 1 +# values = ( +# variable[(slice(0, 1),) * ndim1 + (slice(0, 1),)], +# variable[(slice(0, 1),) * ndim1 + (slice(1, 2),)], +# ) +# if data.size == 2: +# values = values + values +# else: +# values += ( +# variable[(slice(-1, None, 1),) * ndim1 + (slice(0, 1),)], +# variable[(slice(-1, None, 1),) * ndim1 + (slice(1, 2),)], +# ) +# elif size == 1: +# indices = (0, -1) +# value = variable[...] +# values = (value, value) +# elif size == 3: +# indices = (0, 1, -1) +# if char: +# values = variable[...].reshape(3, variable.shape[-1]) +# else: +# values = variable[...].flatten() +# else: +# indices = (0, -1) +# values = ( +# variable[(slice(0, 1),) * ndim], +# variable[(slice(-1, None, 1),) * ndim], +# ) +# +# # Create a dictionary of the element values +# elements = {} +# for index, value in zip(indices, values): +# if char: +# # Variable is a netCDF classic style char array, so +# # collapse (by concatenation) the outermost (fastest +# # varying) dimension. E.g. [['a','b','c']] becomes +# # ['abc'] +# if value.dtype.kind == "U": +# value = value.astype("S") +# +# a = netCDF4.chartostring(value) +# shape = a.shape +# a = np.array([x.rstrip() for x in a.flat]) +# a = np.reshape(a, shape) +# value = np.ma.masked_where(a == "", a) +# +# if np.ma.is_masked(value): +# value = np.ma.masked +# else: +# try: +# value = value.item() +# except (AttributeError, ValueError): +# # AttributeError: A netCDF string type scalar +# # variable comes out as Python str object, which +# # has no 'item' method. +# # +# # ValueError: A size-0 array can't be converted to +# # a Python scalar. +# pass +# +# elements[index] = value +# +# # Store the elements in the data object +# data._set_cached_elements(elements) +# +# def _create_cfanetcdfarray( +# self, +# ncvar, +# unpacked_dtype=False, +# coord_ncvar=None, +# term=None, +# ): +# """Create a CFA-netCDF variable array. +# +# .. versionadded:: 3.14.0 +# +# :Parameters: +# +# ncvar: `str` +# The name of the CFA-netCDF aggregated variable. See +# the *term* parameter. +# +# unpacked_dtype: `False` or `numpy.dtype`, optional +# +# coord_ncvar: `str`, optional +# +# term: `str`, optional +# The name of a non-standard aggregation instruction +# term from which to create the array. If set then +# *ncvar* must be the value of the non-standard term in +# the ``aggregation_data`` attribute. +# +# .. versionadded:: 3.15.0 +# +# :Returns: +# +# (`CFANetCDFArray`, `dict`) +# The new `CFANetCDFArray` instance and dictionary of +# the kwargs used to create it. +# +# """ +# g = self.read_vars +# +# # Get the kwargs needed to instantiate a general netCDF array +# # instance +# kwargs = self._create_netcdfarray( +# ncvar, +# unpacked_dtype=unpacked_dtype, +# coord_ncvar=coord_ncvar, +# return_kwargs_only=True, +# ) +# +# # Get rid of the incorrect shape. This will end up getting set +# # correctly by the CFANetCDFArray instance. +# kwargs.pop("shape", None) +# aggregated_data = g["cfa_aggregated_data"][ncvar] +# +# standardised_terms = ("location", "file", "address", "format") +# +# instructions = [] +# aggregation_instructions = {} +# for t, term_ncvar in aggregated_data.items(): +# if t not in standardised_terms: +# continue +# +# aggregation_instructions[t] = g["cfa_aggregation_instructions"][ +# term_ncvar +# ] +# instructions.append(f"{t}: {term_ncvar}") +# +# if t == "file": +# kwargs["substitutions"] = g["cfa_file_substitutions"].get( +# term_ncvar +# ) +# +# kwargs["x"] = aggregation_instructions +# kwargs["instructions"] = " ".join(sorted(instructions)) +# +# # Use the kwargs to create a CFANetCDFArray instance +# if g["original_netCDF4"]: +# array = self.implementation.initialise_CFANetCDF4Array(**kwargs) +# else: +# # h5netcdf +# array = self.implementation.initialise_CFAH5netcdfArray(**kwargs) +# +# return array, kwargs +# +# def _create_cfanetcdfarray_term( +# self, +# parent_ncvar, +# term, +# ncvar, +# ): +# """Create a CFA-netCDF variable array. +# +# .. versionadded:: 3.14.0 +# +# :Parameters: +# +# parent_ncvar: `str` +# The name of the CFA-netCDF aggregated variable. See +# the *term* parameter. +# +# term: `str`, optional +# The name of a non-standard aggregation instruction +# term from which to create the array. If set then +# *ncvar* must be the value of the non-standard term in +# the ``aggregation_data`` attribute. +# +# .. versionadded:: 3.15.0 +# +# ncvar: `str` +# The name of the CFA-netCDF aggregated variable. See +# the *term* parameter. +# +# :Returns: +# +# (`CFANetCDFArray`, `dict`) +# The new `CFANetCDFArray` instance and dictionary of +# the kwargs used to create it. +# +# """ +# g = self.read_vars +# +# # Get the kwargs needed to instantiate a general netCDF array +# # instance +# kwargs = self._create_netcdfarray( +# ncvar, +# return_kwargs_only=True, +# ) +# +# # Get rid of the incorrect shape. This will end up getting set +# # correctly by the CFANetCDFArray instance. +# kwargs.pop("shape", None) +# +# instructions = [] +# aggregation_instructions = {} +# for t, term_ncvar in g["cfa_aggregated_data"][parent_ncvar].items(): +# if t in ("location", term): +# aggregation_instructions[t] = g[ +# "cfa_aggregation_instructions" +# ][term_ncvar] +# instructions.append(f"{t}: {ncvar}") +# +# kwargs["term"] = term +# kwargs["dtype"] = aggregation_instructions[term].dtype +# kwargs["x"] = aggregation_instructions +# kwargs["instructions"] = " ".join(sorted(instructions)) +# +# if g["original_netCDF4"]: +# array = self.implementation.initialise_CFANetCDF4Array(**kwargs) +# else: +# # h5netcdf +# array = self.implementation.initialise_CFAH5netcdfArray(**kwargs) +# +# return array, kwargs +# +# +# def _parse_chunks(self, ncvar): +# """Parse the dask chunks. +# +# .. versionadded:: 3.14.0 +# +# :Parameters: +# +# ncvar: `str` +# The name of the netCDF variable containing the array. +# +# :Returns: +# +# `str`, `int` or `dict` +# The parsed chunks that are suitable for passing to a +# `Data` object containing the variable's array. +# +# """ +# g = self.read_vars +# +# default_chunks = "auto" +# chunks = g.get("chunks", default_chunks) +# +# if chunks is None: +# return -1 +# +# if isinstance(chunks, dict): +# if not chunks: +# return default_chunks +# +# # For ncdimensions = ('time', 'lat'): +# # +# # chunks={} -> ["auto", "auto"] +# # chunks={'ncdim%time': 12} -> [12, "auto"] +# # chunks={'ncdim%time': 12, 'ncdim%lat': 10000} -> [12, 10000] +# # chunks={'ncdim%time': 12, 'ncdim%lat': "20MB"} -> [12, "20MB"] +# # chunks={'ncdim%time': 12, 'latitude': -1} -> [12, -1] +# # chunks={'ncdim%time': 12, 'Y': None} -> [12, None] +# # chunks={'ncdim%time': 12, 'ncdim%lat': (30, 90)} -> [12, (30, 90)] +# # chunks={'ncdim%time': 12, 'ncdim%lat': None, 'X': 5} -> [12, None] +# attributes = g["variable_attributes"] +# chunks2 = [] +# for ncdim in g["variable_dimensions"][ncvar]: +# key = f"ncdim%{ncdim}" +# if key in chunks: +# chunks2.append(chunks[key]) +# continue +# +# found_coord_attr = False +# dim_coord_attrs = attributes.get(ncdim) +# if dim_coord_attrs is not None: +# for attr in ("standard_name", "axis"): +# key = dim_coord_attrs.get(attr) +# if key in chunks: +# found_coord_attr = True +# chunks2.append(chunks[key]) +# break +# +# if not found_coord_attr: +# # Use default chunks for this dimension +# chunks2.append(default_chunks) +# +# chunks = chunks2 +# +# return chunks +# +# def _customise_field_ancillaries(self, parent_ncvar, f): +# """Create customised field ancillary constructs. +# +# This method currently creates: +# +# * Field ancillary constructs derived from non-standardised +# terms in CFA aggregation instructions. Each construct spans +# the same domain axes as the parent field construct. +# Constructs are never created for `Domain` instances. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# parent_ncvar: `str` +# The netCDF variable name of the parent variable. +# +# f: `Field` +# The parent field construct. +# +# :Returns: +# +# `dict` +# A mapping of netCDF variable names to newly-created +# construct identifiers. +# +# **Examples** +# +# >>> n._customise_field_ancillaries('tas', f) +# {} +# +# >>> n._customise_field_ancillaries('pr', f) +# {'tracking_id': 'fieldancillary1'} +# +# """ +# if not self._is_cfa_variable(parent_ncvar): +# return {} +# +# # ------------------------------------------------------------ +# # Still here? Then we have a CFA-netCDF variable: Loop round +# # the aggregation instruction terms and convert each +# # non-standard term into a field ancillary construct that +# # spans the same domain axes as the parent field. +# # ------------------------------------------------------------ +# g = self.read_vars +# +# standardised_terms = ("location", "file", "address", "format") +# +# out = {} +# for term, term_ncvar in g["cfa_aggregated_data"][parent_ncvar].items(): +# if term in standardised_terms: +# continue +# +# if g["variables"][term_ncvar].ndim != f.ndim: +# # Can only create field ancillaries with the same rank +# # as the field +# continue +# +# # Still here? Then we've got a non-standard aggregation +# # term from which we can create a field +# # ancillary construct. +# anc = self.implementation.initialise_FieldAncillary() +# +# self.implementation.set_properties( +# anc, g["variable_attributes"][term_ncvar] +# ) +# anc.set_property("long_name", term) +# +# # Store the term name as the 'id' attribute. This will be +# # used as the term name if the field field ancillary is +# # written to disk as a non-standard CFA term. +# anc.id = term +# +# data = self._create_data( +# parent_ncvar, anc, cfa_term={term: term_ncvar} +# ) +# +# self.implementation.set_data(anc, data, copy=False) +# self.implementation.nc_set_variable(anc, term_ncvar) +# +# key = self.implementation.set_field_ancillary( +# f, +# anc, +# axes=self.implementation.get_field_data_axes(f), +# copy=False, +# ) +# out[term_ncvar] = key +# +# return out +# +# def _cfa_parse_aggregated_data(self, ncvar, aggregated_data): +# """Parse a CFA-netCDF ``aggregated_data`` attribute. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# ncvar: `str` +# The netCDF variable name. +# +# aggregated_data: `str` or `None` +# The CFA-netCDF ``aggregated_data`` attribute. +# +# :Returns: +# +# `dict` +# The parsed attribute. +# +# """ +# if not aggregated_data: +# return {} +# +# g = self.read_vars +# aggregation_instructions = g["cfa_aggregation_instructions"] +# variable_attributes = g["variable_attributes"] +# +# # Loop round aggregation instruction terms +# out = {} +# for x in self._parse_x( +# ncvar, +# aggregated_data, +# keys_are_variables=True, +# ): +# term, term_ncvar = tuple(x.items())[0] +# term_ncvar = term_ncvar[0] +# out[term] = term_ncvar +# +# if term_ncvar in aggregation_instructions: +# # Already processed this term +# continue +# +# variable = g["variables"][term_ncvar] +# array = cfdm.netcdf_indexer( +# variable, +# mask=True, +# unpack=True, +# always_masked_array=False, +# orthogonal_indexing=False, +# copy=False, +# ) +# aggregation_instructions[term_ncvar] = array[...] +# +# if term == "file": +# # Find URI substitutions that may be stored in the +# # CFA file instruction variable's "substitutions" +# # attribute +# subs = variable_attributes[term_ncvar].get( +# "substitutions", +# ) +# if subs: +# # Convert the string "${base}: value" to the +# # dictionary {"${base}": "value"} +# s = subs.split() +# subs = { +# base[:-1]: sub for base, sub in zip(s[::2], s[1::2]) +# } +# +# # Apply user-defined substitutions, which take +# # precedence over those defined in the file. +# subs.update(g["cfa_options"].get("substitutions", {})) +# g["cfa_file_substitutions"][term_ncvar] = subs +# +# g["cfa_aggregated_data"][ncvar] = out +# return out +# diff --git a/cf/read_write/netcdf/netcdfwrite.py b/cf/read_write/netcdf/netcdfwrite.py index 068c55b968..6d2d438e25 100644 --- a/cf/read_write/netcdf/netcdfwrite.py +++ b/cf/read_write/netcdf/netcdfwrite.py @@ -23,243 +23,243 @@ def __new__(cls, *args, **kwargs): instance._NetCDFRead = NetCDFRead return instance - def _unlimited(self, field, axis): - """Whether an axis is unlimited. - - If a CFA-netCDF file is being written then no axis can be - unlimited, i.e. `False` is always returned. - - .. versionadded:: 3.15.3 - - :Parameters: - - field: `Field` or `Domain` - - axis: `str` - Domain axis construct identifier, - e.g. ``'domainaxis1'``. - - :Returns: - - `bool` - - """ - if self.write_vars["cfa"]: - return False - - return super()._unlimited(field, axis) - - def _write_as_cfa(self, cfvar, construct_type, domain_axes): - """Whether or not to write as a CFA variable. - - .. versionadded:: 3.0.0 - - :Parameters: - - cfvar: cf instance that contains data - - construct_type: `str` - The construct type of the *cfvar*, or its parent if - *cfvar* is not a construct. - - .. versionadded:: 3.15.0 - - domain_axes: `None`, or `tuple` of `str` - The domain axis construct identifiers for *cfvar*. - - .. versionadded:: 3.15.0 - - :Returns: - - `bool` - True if the variable is to be written as a CFA - variable. - - """ - if construct_type is None: - # This prevents recursion whilst writing CFA-netCDF term - # variables. - return False - - g = self.write_vars - if not g["cfa"]: - return False - - data = self.implementation.get_data(cfvar, None) - if data is None: - return False - - cfa_options = g["cfa_options"] - for ctype, ndim in cfa_options.get("constructs", {}).items(): - # Write as CFA if it has an appropriate construct type ... - if ctype in ("all", construct_type): - # ... and then only if it satisfies the - # number-of-dimenions criterion and the data is - # flagged as OK. - if ndim is None or ndim == len(domain_axes): - cfa_get_write = data.cfa_get_write() - if not cfa_get_write and cfa_options["strict"]: - if g["mode"] == "w": - remove(g["filename"]) - - raise ValueError( - f"Can't write {cfvar!r} as a CFA-netCDF " - "aggregation variable. Possible reasons for this " - "include 1) there is more than one Dask chunk " - "per fragment, and 2) data values have been " - "changed relative to those in the fragments." - ) - - return cfa_get_write - - break - - return False - - def _customise_createVariable( - self, cfvar, construct_type, domain_axes, kwargs - ): - """Customise keyword arguments for - `netCDF4.Dataset.createVariable`. - - .. versionadded:: 3.0.0 - - :Parameters: - - cfvar: cf instance that contains data - - construct_type: `str` - The construct type of the *cfvar*, or its parent if - *cfvar* is not a construct. - - .. versionadded:: 3.15.0 - - domain_axes: `None`, or `tuple` of `str` - The domain axis construct identifiers for *cfvar*. - - .. versionadded:: 3.15.0 - - kwargs: `dict` - - :Returns: - - `dict` - Dictionary of keyword arguments to be passed to - `netCDF4.Dataset.createVariable`. - - """ - kwargs = super()._customise_createVariable( - cfvar, construct_type, domain_axes, kwargs - ) - - if self._write_as_cfa(cfvar, construct_type, domain_axes): - kwargs["dimensions"] = () - kwargs["chunksizes"] = None - - return kwargs - - def _write_data( - self, - data, - cfvar, - ncvar, - ncdimensions, - domain_axes=None, - unset_values=(), - compressed=False, - attributes={}, - construct_type=None, - ): - """Write a Data object. - - .. versionadded:: 3.0.0 - - :Parameters: - - data: `Data` - - cfvar: cf instance - - ncvar: `str` - - ncdimensions: `tuple` of `str` - - domain_axes: `None`, or `tuple` of `str` - The domain axis construct identifiers for *cfvar*. - - .. versionadded:: 3.15.0 - - unset_values: sequence of numbers - - attributes: `dict`, optional - The netCDF attributes for the constructs that have been - written to the file. - - construct_type: `str`, optional - The construct type of the *cfvar*, or its parent if - *cfvar* is not a construct. - - .. versionadded:: 3.15.0 - - :Returns: - - `None` - - """ - g = self.write_vars - - if self._write_as_cfa(cfvar, construct_type, domain_axes): - # -------------------------------------------------------- - # Write the data as CFA aggregated data - # -------------------------------------------------------- - self._create_cfa_data( - ncvar, - ncdimensions, - data, - cfvar, - ) - return - - # ------------------------------------------------------------ - # Still here? The write a normal (non-CFA) variable - # ------------------------------------------------------------ - if compressed: - # Write data in its compressed form - data = data.source().source() - - # Get the dask array - dx = da.asanyarray(data) - - # Convert the data type - new_dtype = g["datatype"].get(dx.dtype) - if new_dtype is not None: - dx = dx.astype(new_dtype) - - # VLEN variables can not be assigned to by masked arrays - # (https://github.com/Unidata/netcdf4-python/pull/465), so - # fill missing data in string (as opposed to char) data types. - if g["fmt"] == "NETCDF4" and dx.dtype.kind in "SU": - dx = dx.map_blocks( - self._filled_string_array, - fill_value="", - meta=np.array((), dx.dtype), - ) - - # Check for out-of-range values - if g["warn_valid"]: - if construct_type: - var = cfvar - else: - var = None - - dx = dx.map_blocks( - self._check_valid, - cfvar=var, - attributes=attributes, - meta=np.array((), dx.dtype), - ) - - da.store(dx, g["nc"][ncvar], compute=True, return_stored=False) + # def _unlimited(self, field, axis): + # """Whether an axis is unlimited. + # + # If a CFA-netCDF file is being written then no axis can be + # unlimited, i.e. `False` is always returned. + # + # .. versionadded:: 3.15.3 + # + # :Parameters: + # + # field: `Field` or `Domain` + # + # axis: `str` + # Domain axis construct identifier, + # e.g. ``'domainaxis1'``. + # + # :Returns: + # + # `bool` + # + # """ + # if self.write_vars["cfa"]: + # return False + # + # return super()._unlimited(field, axis) + # + # def _write_as_cfa(self, cfvar, construct_type, domain_axes): + # """Whether or not to write as a CFA variable. + # + # .. versionadded:: 3.0.0 + # + # :Parameters: + # + # cfvar: cf instance that contains data + # + # construct_type: `str` + # The construct type of the *cfvar*, or its parent if + # *cfvar* is not a construct. + # + # .. versionadded:: 3.15.0 + # + # domain_axes: `None`, or `tuple` of `str` + # The domain axis construct identifiers for *cfvar*. + # + # .. versionadded:: 3.15.0 + # + # :Returns: + # + # `bool` + # True if the variable is to be written as a CFA + # variable. + # + # """ + # if construct_type is None: + # # This prevents recursion whilst writing CFA-netCDF term + # # variables. + # return False + # + # g = self.write_vars + # if not g["cfa"]: + # return False + # + # data = self.implementation.get_data(cfvar, None) + # if data is None: + # return False + # + # cfa_options = g["cfa_options"] + # for ctype, ndim in cfa_options.get("constructs", {}).items(): + # # Write as CFA if it has an appropriate construct type ... + # if ctype in ("all", construct_type): + # # ... and then only if it satisfies the + # # number-of-dimenions criterion and the data is + # # flagged as OK. + # if ndim is None or ndim == len(domain_axes): + # cfa_get_write = data.cfa_get_write() + # if not cfa_get_write and cfa_options["strict"]: + # if g["mode"] == "w": + # remove(g["filename"]) + # + # raise ValueError( + # f"Can't write {cfvar!r} as a CFA-netCDF " + # "aggregation variable. Possible reasons for this " + # "include 1) there is more than one Dask chunk " + # "per fragment, and 2) data values have been " + # "changed relative to those in the fragments." + # ) + # + # return cfa_get_write + # + # break + # + # return False + # + # def _customise_createVariable( + # self, cfvar, construct_type, domain_axes, kwargs + # ): + # """Customise keyword arguments for + # `netCDF4.Dataset.createVariable`. + # + # .. versionadded:: 3.0.0 + # + # :Parameters: + # + # cfvar: cf instance that contains data + # + # construct_type: `str` + # The construct type of the *cfvar*, or its parent if + # *cfvar* is not a construct. + # + # .. versionadded:: 3.15.0 + # + # domain_axes: `None`, or `tuple` of `str` + # The domain axis construct identifiers for *cfvar*. + # + # .. versionadded:: 3.15.0 + # + # kwargs: `dict` + # + # :Returns: + # + # `dict` + # Dictionary of keyword arguments to be passed to + # `netCDF4.Dataset.createVariable`. + # + # """ + # kwargs = super()._customise_createVariable( + # cfvar, construct_type, domain_axes, kwargs + # ) + # + # if self._write_as_cfa(cfvar, construct_type, domain_axes): + # kwargs["dimensions"] = () + # kwargs["chunksizes"] = None + # + # return kwargs + # + # def _write_data( + # self, + # data, + # cfvar, + # ncvar, + # ncdimensions, + # domain_axes=None, + # unset_values=(), + # compressed=False, + # attributes={}, + # construct_type=None, + # ): + # """Write a Data object. + # + # .. versionadded:: 3.0.0 + # + # :Parameters: + # + # data: `Data` + # + # cfvar: cf instance + # + # ncvar: `str` + # + # ncdimensions: `tuple` of `str` + # + # domain_axes: `None`, or `tuple` of `str` + # The domain axis construct identifiers for *cfvar*. + # + # .. versionadded:: 3.15.0 + # + # unset_values: sequence of numbers + # + # attributes: `dict`, optional + # The netCDF attributes for the constructs that have been + # written to the file. + # + # construct_type: `str`, optional + # The construct type of the *cfvar*, or its parent if + # *cfvar* is not a construct. + # + # .. versionadded:: 3.15.0 + # + # :Returns: + # + # `None` + # + # """ + # g = self.write_vars + # + # if self._write_as_cfa(cfvar, construct_type, domain_axes): + # # -------------------------------------------------------- + # # Write the data as CFA aggregated data + # # -------------------------------------------------------- + # self._create_cfa_data( + # ncvar, + # ncdimensions, + # data, + # cfvar, + # ) + # return + # + # # ------------------------------------------------------------ + # # Still here? The write a normal (non-CFA) variable + # # ------------------------------------------------------------ + # if compressed: + # # Write data in its compressed form + # data = data.source().source() + # + # # Get the dask array + # dx = da.asanyarray(data) + # + # # Convert the data type + # new_dtype = g["datatype"].get(dx.dtype) + # if new_dtype is not None: + # dx = dx.astype(new_dtype) + # + # # VLEN variables can not be assigned to by masked arrays + # # (https://github.com/Unidata/netcdf4-python/pull/465), so + # # fill missing data in string (as opposed to char) data types. + # if g["fmt"] == "NETCDF4" and dx.dtype.kind in "SU": + # dx = dx.map_blocks( + # self._filled_string_array, + # fill_value="", + # meta=np.array((), dx.dtype), + # ) + # + # # Check for out-of-range values + # if g["warn_valid"]: + # if construct_type: + # var = cfvar + # else: + # var = None + # + # dx = dx.map_blocks( + # self._check_valid, + # cfvar=var, + # attributes=attributes, + # meta=np.array((), dx.dtype), + # ) + # + # da.store(dx, g["nc"][ncvar], compute=True, return_stored=False) def _write_dimension_coordinate( self, f, key, coord, ncdim=None, coordinates=None @@ -410,634 +410,635 @@ def _change_reference_datetime(self, coord): else: return coord2 - def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar): - """Write a CFA variable to the netCDF file. - - Any CFA private variables required will be autmatically created - and written to the file. - - .. versionadded:: 3.0.0 - - :Parameters: - - ncvar: `str` - The netCDF name for the variable. - - ncdimensions: sequence of `str` - - netcdf_attrs: `dict` - - data: `Data` - - :Returns: - - `None` - - """ - g = self.write_vars - - ndim = data.ndim - - cfa = self._cfa_aggregation_instructions(data, cfvar) - - # ------------------------------------------------------------ - # Get the location netCDF dimensions. These always start with - # "f_{size}_loc". - # ------------------------------------------------------------ - location_ncdimensions = [] - for size in cfa["location"].shape: - l_ncdim = f"f_{size}_loc" - if l_ncdim not in g["dimensions"]: - # Create a new location dimension - self._write_dimension(l_ncdim, None, size=size) - - location_ncdimensions.append(l_ncdim) - - location_ncdimensions = tuple(location_ncdimensions) - - # ------------------------------------------------------------ - # Get the fragment netCDF dimensions. These always start with - # "f_". - # ------------------------------------------------------------ - aggregation_address = cfa["address"] - fragment_ncdimensions = [] - for ncdim, size in zip( - ncdimensions + ("extra",) * (aggregation_address.ndim - ndim), - aggregation_address.shape, - ): - f_ncdim = f"f_{ncdim}" - if f_ncdim not in g["dimensions"]: - # Create a new fragment dimension - self._write_dimension(f_ncdim, None, size=size) - - fragment_ncdimensions.append(f_ncdim) - - fragment_ncdimensions = tuple(fragment_ncdimensions) - - # ------------------------------------------------------------ - # Write the standardised aggregation instruction variables to - # the CFA-netCDF file - # ------------------------------------------------------------ - substitutions = data.cfa_file_substitutions() - substitutions.update(g["cfa_options"].get("substitutions", {})) - - aggregated_data = data.cfa_get_aggregated_data() - aggregated_data_attr = [] - - # Location - term = "location" - data = cfa[term] - self.implementation.nc_set_hdf5_chunksizes(data, data.shape) - term_ncvar = self._cfa_write_term_variable( - data, - aggregated_data.get(term, f"cfa_{term}"), - location_ncdimensions, - ) - aggregated_data_attr.append(f"{term}: {term_ncvar}") - - # File - term = "file" - if substitutions: - # Create the "substitutions" netCDF attribute - subs = [] - for base, sub in substitutions.items(): - subs.append(f"{base}: {sub}") - - attributes = {"substitutions": " ".join(sorted(subs))} - else: - attributes = None - - data = cfa[term] - self.implementation.nc_set_hdf5_chunksizes(data, data.shape) - term_ncvar = self._cfa_write_term_variable( - data, - aggregated_data.get(term, f"cfa_{term}"), - fragment_ncdimensions, - attributes=attributes, - ) - aggregated_data_attr.append(f"{term}: {term_ncvar}") - - # Address - term = "address" - - # Attempt to reduce addresses to a common scalar value - u = cfa[term].unique().compressed().persist() - if u.size == 1: - cfa[term] = u.squeeze() - dimensions = () - else: - dimensions = fragment_ncdimensions - - data = cfa[term] - self.implementation.nc_set_hdf5_chunksizes(data, data.shape) - term_ncvar = self._cfa_write_term_variable( - data, - aggregated_data.get(term, f"cfa_{term}"), - dimensions, - ) - aggregated_data_attr.append(f"{term}: {term_ncvar}") - - # Format - term = "format" - - # Attempt to reduce addresses to a common scalar value - u = cfa[term].unique().compressed().persist() - if u.size == 1: - cfa[term] = u.squeeze() - dimensions = () - else: - dimensions = fragment_ncdimensions - - data = cfa[term] - self.implementation.nc_set_hdf5_chunksizes(data, data.shape) - term_ncvar = self._cfa_write_term_variable( - data, - aggregated_data.get(term, f"cfa_{term}"), - dimensions, - ) - aggregated_data_attr.append(f"{term}: {term_ncvar}") - - # ------------------------------------------------------------ - # Look for non-standard CFA terms stored as field ancillaries - # on a field and write them to the CFA-netCDF file - # ------------------------------------------------------------ - if self.implementation.is_field(cfvar): - non_standard_terms = self._cfa_write_non_standard_terms( - cfvar, fragment_ncdimensions[:ndim], aggregated_data - ) - aggregated_data_attr.extend(non_standard_terms) - - # ------------------------------------------------------------ - # Add the CFA aggregation variable attributes - # ------------------------------------------------------------ - self._write_attributes( - None, - ncvar, - extra={ - "aggregated_dimensions": " ".join(ncdimensions), - "aggregated_data": " ".join(sorted(aggregated_data_attr)), - }, - ) - - def _check_valid(self, array, cfvar=None, attributes=None): - """Checks for array values outside of the valid range. - - Specifically, checks array for out-of-range values, as - defined by the valid_[min|max|range] attributes. - - .. versionadded:: 3.14.0 - - :Parameters: - - array: `numpy.ndarray` - The array to be checked. - - cfvar: construct - The CF construct containing the array. - - attributes: `dict` - The variable's CF properties. - - :Returns: - - `numpy.ndarray` - The input array, unchanged. - - """ - super()._check_valid(cfvar, array, attributes) - return array - - def _filled_string_array(self, array, fill_value=""): - """Fill a string array. - - .. versionadded:: 3.14.0 - - :Parameters: - - array: `numpy.ndarray` - The `numpy` array with string (byte or unicode) data - type. - - :Returns: - - `numpy.ndarray` - The string array array with any missing data replaced - by the fill value. - - """ - if np.ma.isMA(array): - return array.filled(fill_value) - - return array - - def _write_field_ancillary(self, f, key, anc): - """Write a field ancillary to the netCDF file. - - If an equal field ancillary has already been written to the file - then it is not re-written. - - .. versionadded:: 3.15.0 - - :Parameters: - - f: `Field` - - key: `str` - - anc: `FieldAncillary` - - :Returns: - - `str` - The netCDF variable name of the field ancillary - object. If no ancillary variable was written then an - empty string is returned. - - """ - if anc.data.cfa_get_term(): - # This field ancillary construct is to be written as a - # non-standard CFA term belonging to the parent field, or - # else not at all. - return "" - - return super()._write_field_ancillary(f, key, anc) - - def _cfa_write_term_variable( - self, data, ncvar, ncdimensions, attributes=None - ): - """Write a CFA aggregation instruction term variable - - .. versionadded:: 3.15.0 - - :Parameters: - - data `Data` - The data to write. - - ncvar: `str` - The netCDF variable name. - - ncdimensions: `tuple` of `str` - The variable's netCDF dimensions. - - attributes: `dict`, optional - Any attributes to attach to the variable. - - :Returns: - - `str` - The netCDF variable name of the CFA term variable. - - """ - create = not self._already_in_file(data, ncdimensions) - - if create: - # Create a new CFA term variable in the file - ncvar = self._netcdf_name(ncvar) - self._write_netcdf_variable( - ncvar, ncdimensions, data, None, extra=attributes - ) - else: - # This CFA term variable has already been written to the - # file - ncvar = self.write_vars["seen"][id(data)]["ncvar"] - - return ncvar - - def _cfa_write_non_standard_terms( - self, field, fragment_ncdimensions, aggregated_data - ): - """Write a non-standard CFA aggregation instruction term variable. - - Writes non-standard CFA terms stored as field ancillaries. - - .. versionadded:: 3.15.0 - - :Parameters: - - field: `Field` - - fragment_ncdimensions: `list` of `str` - - aggregated_data: `dict` - - """ - aggregated_data_attr = [] - terms = ["location", "file", "address", "format"] - for key, field_anc in self.implementation.get_field_ancillaries( - field - ).items(): - if not field_anc.data.cfa_get_term(): - continue - - data = self.implementation.get_data(field_anc, None) - if data is None: - continue - - # Check that the field ancillary has the same axes as its - # parent field, and in the same order. - if field.get_data_axes(key) != field.get_data_axes(): - continue - - # Still here? Then this field ancillary can be represented - # by a non-standard aggregation term. - - # Then transform the data so that it spans the fragment - # dimensions, with one value per fragment. If a chunk has - # more than one unique value then the fragment's value is - # missing data. - # - # '_cfa_unique' has its own call to 'cfdm_asanyarray', so - # we can set '_asanyarray=False'. - dx = data.to_dask_array(_asanyarray=False) - dx_ind = tuple(range(dx.ndim)) - out_ind = dx_ind - dx = da.blockwise( - self._cfa_unique, - out_ind, - dx, - dx_ind, - adjust_chunks={i: 1 for i in out_ind}, - dtype=dx.dtype, - ) - - # Get the non-standard term name from the field - # ancillary's 'id' attribute - term = getattr(field_anc, "id", "term") - term = term.replace(" ", "_") - name = term - n = 0 - while term in terms: - n += 1 - term = f"{name}_{n}" - - terms.append(term) - - # Create the new CFA term variable - data = type(data)(dx) - self.implementation.nc_set_hdf5_chunksizes(data, data.shape) - term_ncvar = self._cfa_write_term_variable( - data=data, - ncvar=aggregated_data.get(term, f"cfa_{term}"), - ncdimensions=fragment_ncdimensions, - ) - - aggregated_data_attr.append(f"{term}: {term_ncvar}") - - return aggregated_data_attr - - @classmethod - def _cfa_unique(cls, a): - """Return the unique value of an array. - - If there are multiple unique vales then missing data is - returned. - - .. versionadded:: 3.15.0 - - :Parameters: - - a: `numpy.ndarray` - The array. - - :Returns: - - `numpy.ndarray` - A size 1 array containing the unique value, or missing - data if there is not a unique value. - - """ - a = cfdm_asanyarray(a) - - out_shape = (1,) * a.ndim - a = np.unique(a) - if np.ma.isMA(a): - # Remove a masked element - a = a.compressed() - - if a.size == 1: - return a.reshape(out_shape) - - return np.ma.masked_all(out_shape, dtype=a.dtype) - - def _cfa_aggregation_instructions(self, data, cfvar): - """Convert data to standardised CFA aggregation instruction terms. - - .. versionadded:: 3.15.0 - - :Parameters: - - data: `Data` - The data to be converted to standardised CFA - aggregation instruction terms. - - cfvar: construct - The construct that contains the *data*. - - :Returns: - - `dict` - A dictionary whose keys are the standardised CFA - aggregation instruction terms, with values of `Data` - instances containing the corresponding variables. - - **Examples** - - >>> n._cfa_aggregation_instructions(data, cfvar) - {'location': , - 'file': , - 'format': , - 'address': } - - """ - from os.path import abspath, join, relpath - from pathlib import PurePath - from urllib.parse import urlparse - - g = self.write_vars - - # Define the CFA file susbstitutions, giving precedence over - # those set on the Data object to those provided by the CFA - # options. - substitutions = data.cfa_file_substitutions() - substitutions.update(g["cfa_options"].get("substitutions", {})) - - absolute_paths = g["cfa_options"].get("absolute_paths") - cfa_dir = g["cfa_dir"] - - # Size of the trailing dimension - n_trailing = 0 - - aggregation_file = [] - aggregation_address = [] - aggregation_format = [] - for indices in data.chunk_indices(): - file_details = self._cfa_get_file_details(data[indices]) - - if len(file_details) != 1: - if file_details: - raise ValueError( - f"Can't write {cfvar!r} as a CFA-netCDF " - "aggregation variable: Dask chunk defined by index " - f"{indices} spans two or more fragments. " - "A possible fix for this is to set chunks=None as " - "an argument of a prior call to cf.read" - ) - - raise ValueError( - f"Can't write {cfvar!r} as a CFA-netCDF " - "aggregation variable: Dask chunk defined by index " - f"{indices} spans zero fragments." - ) - - filenames, addresses, formats = file_details.pop() - - if len(filenames) > n_trailing: - n_trailing = len(filenames) - - filenames2 = [] - for filename in filenames: - uri = urlparse(filename) - uri_scheme = uri.scheme - if not uri_scheme: - filename = abspath(join(cfa_dir, filename)) - if absolute_paths: - filename = PurePath(filename).as_uri() - else: - filename = relpath(filename, start=cfa_dir) - elif not absolute_paths and uri_scheme == "file": - filename = relpath(uri.path, start=cfa_dir) - - if substitutions: - # Apply the CFA file susbstitutions - for base, sub in substitutions.items(): - filename = filename.replace(sub, base) - - filenames2.append(filename) - - aggregation_file.append(tuple(filenames2)) - aggregation_address.append(addresses) - aggregation_format.append(formats) - - # Pad each value of the aggregation instruction arrays so that - # it has 'n_trailing' elements - a_shape = data.numblocks - pad = None - if n_trailing > 1: - a_shape += (n_trailing,) - - # Pad the ... - for i, (filenames, addresses, formats) in enumerate( - zip(aggregation_file, aggregation_address, aggregation_format) - ): - n = n_trailing - len(filenames) - if n: - # This chunk has fewer fragment files than some - # others, so some padding is required. - pad = ("",) * n - aggregation_file[i] = filenames + pad - aggregation_format[i] = formats + pad - if isinstance(addresses[0], int): - pad = (-1,) * n - - aggregation_address[i] = addresses + pad - - # Reshape the 1-d aggregation instruction arrays to span the - # data dimensions, plus the extra trailing dimension if there - # is one. - aggregation_file = np.array(aggregation_file).reshape(a_shape) - aggregation_address = np.array(aggregation_address).reshape(a_shape) - aggregation_format = np.array(aggregation_format).reshape(a_shape) - - # Mask any padded elements - if pad: - aggregation_file = np.ma.where( - aggregation_file == "", np.ma.masked, aggregation_file - ) - mask = aggregation_file.mask - aggregation_address = np.ma.array(aggregation_address, mask=mask) - aggregation_format = np.ma.array(aggregation_format, mask=mask) - - # ------------------------------------------------------------ - # Create the location array - # ------------------------------------------------------------ - dtype = np.dtype(np.int32) - if ( - max(data.to_dask_array(_asanyarray=False).chunksize) - > np.iinfo(dtype).max - ): - dtype = np.dtype(np.int64) - - ndim = data.ndim - aggregation_location = np.ma.masked_all( - (ndim, max(a_shape[:ndim])), dtype=dtype - ) - - for i, chunks in enumerate(data.chunks): - aggregation_location[i, : len(chunks)] = chunks - - # ------------------------------------------------------------ - # Return Data objects - # ------------------------------------------------------------ - data = type(data) - return { - "location": data(aggregation_location), - "file": data(aggregation_file), - "format": data(aggregation_format), - "address": data(aggregation_address), - } - - def _customise_write_vars(self): - """Customise the write parameters. - - .. versionadded:: 3.15.0 - - """ - g = self.write_vars - - if g.get("cfa"): - from os.path import abspath - from pathlib import PurePath - - # Find the absolute directory path of the output - # CFA-netCDF file URI - g["cfa_dir"] = PurePath(abspath(g["filename"])).parent - - def _cfa_get_file_details(self, data): - """Get the details of all files referenced by the data. - - .. versionadded:: 3.15.0 - - :Parameters: - - data: `Data` - The data - - :Returns: - - `set` of 3-tuples - A set containing 3-tuples giving the file names, - the addresses in the files, and the file formats. If - no files are required to compute the data then - an empty `set` is returned. - - **Examples** - - >>> n._cfa_get_file_details(data): - {(('/home/file.nc',), ('tas',), ('nc',))} - - >>> n._cfa_get_file_details(data): - {(('/home/file.pp',), (34556,), ('um',))} - - """ - out = [] - out_append = out.append - for a in data.todict().values(): - try: - out_append( - (a.get_filenames(), a.get_addresses(), a.get_formats()) - ) - except AttributeError: - pass - return set(out) +# def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar): +# """Write a CFA variable to the netCDF file. +# +# Any CFA private variables required will be autmatically created +# and written to the file. +# +# .. versionadded:: 3.0.0 +# +# :Parameters: +# +# ncvar: `str` +# The netCDF name for the variable. +# +# ncdimensions: sequence of `str` +# +# netcdf_attrs: `dict` +# +# data: `Data` +# +# :Returns: +# +# `None` +# +# """ +# g = self.write_vars +# +# ndim = data.ndim +# +# cfa = self._cfa_aggregation_instructions(data, cfvar) +# +# # ------------------------------------------------------------ +# # Get the location netCDF dimensions. These always start with +# # "f_{size}_loc". +# # ------------------------------------------------------------ +# location_ncdimensions = [] +# for size in cfa["location"].shape: +# l_ncdim = f"f_{size}_loc" +# if l_ncdim not in g["dimensions"]: +# # Create a new location dimension +# self._write_dimension(l_ncdim, None, size=size) +# +# location_ncdimensions.append(l_ncdim) +# +# location_ncdimensions = tuple(location_ncdimensions) +# +# # ------------------------------------------------------------ +# # Get the fragment netCDF dimensions. These always start with +# # "f_". +# # ------------------------------------------------------------ +# aggregation_address = cfa["address"] +# fragment_ncdimensions = [] +# for ncdim, size in zip( +# ncdimensions + ("extra",) * (aggregation_address.ndim - ndim), +# aggregation_address.shape, +# ): +# f_ncdim = f"f_{ncdim}" +# if f_ncdim not in g["dimensions"]: +# # Create a new fragment dimension +# self._write_dimension(f_ncdim, None, size=size) +# +# fragment_ncdimensions.append(f_ncdim) +# +# fragment_ncdimensions = tuple(fragment_ncdimensions) +# +# # ------------------------------------------------------------ +# # Write the standardised aggregation instruction variables to +# # the CFA-netCDF file +# # ------------------------------------------------------------ +# substitutions = data.cfa_file_substitutions() +# substitutions.update(g["cfa_options"].get("substitutions", {})) +# +# aggregated_data = data.cfa_get_aggregated_data() +# aggregated_data_attr = [] +# +# # Location +# term = "location" +# data = cfa[term] +# self.implementation.nc_set_hdf5_chunksizes(data, data.shape) +# term_ncvar = self._cfa_write_term_variable( +# data, +# aggregated_data.get(term, f"cfa_{term}"), +# location_ncdimensions, +# ) +# aggregated_data_attr.append(f"{term}: {term_ncvar}") +# +# # File +# term = "file" +# if substitutions: +# # Create the "substitutions" netCDF attribute +# subs = [] +# for base, sub in substitutions.items(): +# subs.append(f"{base}: {sub}") +# +# attributes = {"substitutions": " ".join(sorted(subs))} +# else: +# attributes = None +# +# data = cfa[term] +# self.implementation.nc_set_hdf5_chunksizes(data, data.shape) +# term_ncvar = self._cfa_write_term_variable( +# data, +# aggregated_data.get(term, f"cfa_{term}"), +# fragment_ncdimensions, +# attributes=attributes, +# ) +# aggregated_data_attr.append(f"{term}: {term_ncvar}") +# +# # Address +# term = "address" +# +# # Attempt to reduce addresses to a common scalar value +# u = cfa[term].unique().compressed().persist() +# if u.size == 1: +# cfa[term] = u.squeeze() +# dimensions = () +# else: +# dimensions = fragment_ncdimensions +# +# data = cfa[term] +# self.implementation.nc_set_hdf5_chunksizes(data, data.shape) +# term_ncvar = self._cfa_write_term_variable( +# data, +# aggregated_data.get(term, f"cfa_{term}"), +# dimensions, +# ) +# aggregated_data_attr.append(f"{term}: {term_ncvar}") +# +# # Format +# term = "format" +# +# # Attempt to reduce addresses to a common scalar value +# u = cfa[term].unique().compressed().persist() +# if u.size == 1: +# cfa[term] = u.squeeze() +# dimensions = () +# else: +# dimensions = fragment_ncdimensions +# +# data = cfa[term] +# self.implementation.nc_set_hdf5_chunksizes(data, data.shape) +# term_ncvar = self._cfa_write_term_variable( +# data, +# aggregated_data.get(term, f"cfa_{term}"), +# dimensions, +# ) +# aggregated_data_attr.append(f"{term}: {term_ncvar}") +# +# # ------------------------------------------------------------ +# # Look for non-standard CFA terms stored as field ancillaries +# # on a field and write them to the CFA-netCDF file +# # ------------------------------------------------------------ +# if self.implementation.is_field(cfvar): +# non_standard_terms = self._cfa_write_non_standard_terms( +# cfvar, fragment_ncdimensions[:ndim], aggregated_data +# ) +# aggregated_data_attr.extend(non_standard_terms) +# +# # ------------------------------------------------------------ +# # Add the CFA aggregation variable attributes +# # ------------------------------------------------------------ +# self._write_attributes( +# None, +# ncvar, +# extra={ +# "aggregated_dimensions": " ".join(ncdimensions), +# "aggregated_data": " ".join(sorted(aggregated_data_attr)), +# }, +# ) +# +# def _check_valid(self, array, cfvar=None, attributes=None): +# """Checks for array values outside of the valid range. +# +# Specifically, checks array for out-of-range values, as +# defined by the valid_[min|max|range] attributes. +# +# .. versionadded:: 3.14.0 +# +# :Parameters: +# +# array: `numpy.ndarray` +# The array to be checked. +# +# cfvar: construct +# The CF construct containing the array. +# +# attributes: `dict` +# The variable's CF properties. +# +# :Returns: +# +# `numpy.ndarray` +# The input array, unchanged. +# +# """ +# super()._check_valid(cfvar, array, attributes) +# return array +# +# def _filled_string_array(self, array, fill_value=""): +# """Fill a string array. +# +# .. versionadded:: 3.14.0 +# +# :Parameters: +# +# array: `numpy.ndarray` +# The `numpy` array with string (byte or unicode) data +# type. +# +# :Returns: +# +# `numpy.ndarray` +# The string array array with any missing data replaced +# by the fill value. +# +# """ +# if np.ma.isMA(array): +# return array.filled(fill_value) +# +# return array +# +# def _write_field_ancillary(self, f, key, anc): +# """Write a field ancillary to the netCDF file. +# +# If an equal field ancillary has already been written to the file +# then it is not re-written. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# f: `Field` +# +# key: `str` +# +# anc: `FieldAncillary` +# +# :Returns: +# +# `str` +# The netCDF variable name of the field ancillary +# object. If no ancillary variable was written then an +# empty string is returned. +# +# """ +# if anc.data.cfa_get_term(): +# # This field ancillary construct is to be written as a +# # non-standard CFA term belonging to the parent field, or +# # else not at all. +# return "" +# +# return super()._write_field_ancillary(f, key, anc) +# +# def _cfa_write_term_variable( +# self, data, ncvar, ncdimensions, attributes=None +# ): +# """Write a CFA aggregation instruction term variable +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# data `Data` +# The data to write. +# +# ncvar: `str` +# The netCDF variable name. +# +# ncdimensions: `tuple` of `str` +# The variable's netCDF dimensions. +# +# attributes: `dict`, optional +# Any attributes to attach to the variable. +# +# :Returns: +# +# `str` +# The netCDF variable name of the CFA term variable. +# +# """ +# create = not self._already_in_file(data, ncdimensions) +# +# if create: +# # Create a new CFA term variable in the file +# ncvar = self._netcdf_name(ncvar) +# self._write_netcdf_variable( +# ncvar, ncdimensions, data, None, extra=attributes +# ) +# else: +# # This CFA term variable has already been written to the +# # file +# ncvar = self.write_vars["seen"][id(data)]["ncvar"] +# +# return ncvar +# +# def _cfa_write_non_standard_terms( +# self, field, fragment_ncdimensions, aggregated_data +# ): +# """Write a non-standard CFA aggregation instruction term variable. +# +# Writes non-standard CFA terms stored as field ancillaries. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# field: `Field` +# +# fragment_ncdimensions: `list` of `str` +# +# aggregated_data: `dict` +# +# """ +# aggregated_data_attr = [] +# terms = ["location", "file", "address", "format"] +# for key, field_anc in self.implementation.get_field_ancillaries( +# field +# ).items(): +# if not field_anc.data.cfa_get_term(): +# continue +# +# data = self.implementation.get_data(field_anc, None) +# if data is None: +# continue +# +# # Check that the field ancillary has the same axes as its +# # parent field, and in the same order. +# if field.get_data_axes(key) != field.get_data_axes(): +# continue +# +# # Still here? Then this field ancillary can be represented +# # by a non-standard aggregation term. +# +# # Then transform the data so that it spans the fragment +# # dimensions, with one value per fragment. If a chunk has +# # more than one unique value then the fragment's value is +# # missing data. +# # +# # '_cfa_unique' has its own call to 'cfdm_asanyarray', so +# # we can set '_asanyarray=False'. +# dx = data.to_dask_array(_asanyarray=False) +# dx_ind = tuple(range(dx.ndim)) +# out_ind = dx_ind +# dx = da.blockwise( +# self._cfa_unique, +# out_ind, +# dx, +# dx_ind, +# adjust_chunks={i: 1 for i in out_ind}, +# dtype=dx.dtype, +# ) +# +# # Get the non-standard term name from the field +# # ancillary's 'id' attribute +# term = getattr(field_anc, "id", "term") +# term = term.replace(" ", "_") +# name = term +# n = 0 +# while term in terms: +# n += 1 +# term = f"{name}_{n}" +# +# terms.append(term) +# +# # Create the new CFA term variable +# data = type(data)(dx) +# self.implementation.nc_set_hdf5_chunksizes(data, data.shape) +# term_ncvar = self._cfa_write_term_variable( +# data=data, +# ncvar=aggregated_data.get(term, f"cfa_{term}"), +# ncdimensions=fragment_ncdimensions, +# ) +# +# aggregated_data_attr.append(f"{term}: {term_ncvar}") +# +# return aggregated_data_attr +# +# @classmethod +# def _cfa_unique(cls, a): +# """Return the unique value of an array. +# +# If there are multiple unique vales then missing data is +# returned. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# a: `numpy.ndarray` +# The array. +# +# :Returns: +# +# `numpy.ndarray` +# A size 1 array containing the unique value, or missing +# data if there is not a unique value. +# +# """ +# a = cfdm_asanyarray(a) +# +# out_shape = (1,) * a.ndim +# a = np.unique(a) +# if np.ma.isMA(a): +# # Remove a masked element +# a = a.compressed() +# +# if a.size == 1: +# return a.reshape(out_shape) +# +# return np.ma.masked_all(out_shape, dtype=a.dtype) +# +# def _cfa_aggregation_instructions(self, data, cfvar): +# """Convert data to standardised CFA aggregation instruction terms. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# data: `Data` +# The data to be converted to standardised CFA +# aggregation instruction terms. +# +# cfvar: construct +# The construct that contains the *data*. +# +# :Returns: +# +# `dict` +# A dictionary whose keys are the standardised CFA +# aggregation instruction terms, with values of `Data` +# instances containing the corresponding variables. +# +# **Examples** +# +# >>> n._cfa_aggregation_instructions(data, cfvar) +# {'location': , +# 'file': , +# 'format': , +# 'address': } +# +# """ +# from os.path import abspath, join, relpath +# from pathlib import PurePath +# from urllib.parse import urlparse +# +# g = self.write_vars +# +# # Define the CFA file susbstitutions, giving precedence over +# # those set on the Data object to those provided by the CFA +# # options. +# substitutions = data.cfa_file_substitutions() +# substitutions.update(g["cfa_options"].get("substitutions", {})) +# +# absolute_paths = g["cfa_options"].get("absolute_paths") +# cfa_dir = g["cfa_dir"] +# +# # Size of the trailing dimension +# n_trailing = 0 +# +# aggregation_file = [] +# aggregation_address = [] +# aggregation_format = [] +# for indices in data.chunk_indices(): +# file_details = self._cfa_get_file_details(data[indices]) +# +# if len(file_details) != 1: +# if file_details: +# raise ValueError( +# f"Can't write {cfvar!r} as a CFA-netCDF " +# "aggregation variable: Dask chunk defined by index " +# f"{indices} spans two or more fragments. " +# "A possible fix for this is to set chunks=None as " +# "an argument of a prior call to cf.read" +# ) +# +# raise ValueError( +# f"Can't write {cfvar!r} as a CFA-netCDF " +# "aggregation variable: Dask chunk defined by index " +# f"{indices} spans zero fragments." +# ) +# +# filenames, addresses, formats = file_details.pop() +# +# if len(filenames) > n_trailing: +# n_trailing = len(filenames) +# +# filenames2 = [] +# for filename in filenames: +# uri = urlparse(filename) +# uri_scheme = uri.scheme +# if not uri_scheme: +# filename = abspath(join(cfa_dir, filename)) +# if absolute_paths: +# filename = PurePath(filename).as_uri() +# else: +# filename = relpath(filename, start=cfa_dir) +# elif not absolute_paths and uri_scheme == "file": +# filename = relpath(uri.path, start=cfa_dir) +# +# if substitutions: +# # Apply the CFA file susbstitutions +# for base, sub in substitutions.items(): +# filename = filename.replace(sub, base) +# +# filenames2.append(filename) +# +# aggregation_file.append(tuple(filenames2)) +# aggregation_address.append(addresses) +# aggregation_format.append(formats) +# +# # Pad each value of the aggregation instruction arrays so that +# # it has 'n_trailing' elements +# a_shape = data.numblocks +# pad = None +# if n_trailing > 1: +# a_shape += (n_trailing,) +# +# # Pad the ... +# for i, (filenames, addresses, formats) in enumerate( +# zip(aggregation_file, aggregation_address, aggregation_format) +# ): +# n = n_trailing - len(filenames) +# if n: +# # This chunk has fewer fragment files than some +# # others, so some padding is required. +# pad = ("",) * n +# aggregation_file[i] = filenames + pad +# aggregation_format[i] = formats + pad +# if isinstance(addresses[0], int): +# pad = (-1,) * n +# +# aggregation_address[i] = addresses + pad +# +# # Reshape the 1-d aggregation instruction arrays to span the +# # data dimensions, plus the extra trailing dimension if there +# # is one. +# aggregation_file = np.array(aggregation_file).reshape(a_shape) +# aggregation_address = np.array(aggregation_address).reshape(a_shape) +# aggregation_format = np.array(aggregation_format).reshape(a_shape) +# +# # Mask any padded elements +# if pad: +# aggregation_file = np.ma.where( +# aggregation_file == "", np.ma.masked, aggregation_file +# ) +# mask = aggregation_file.mask +# aggregation_address = np.ma.array(aggregation_address, mask=mask) +# aggregation_format = np.ma.array(aggregation_format, mask=mask) +# +# # ------------------------------------------------------------ +# # Create the location array +# # ------------------------------------------------------------ +# dtype = np.dtype(np.int32) +# if ( +# max(data.to_dask_array(_asanyarray=False).chunksize) +# > np.iinfo(dtype).max +# ): +# dtype = np.dtype(np.int64) +# +# ndim = data.ndim +# aggregation_location = np.ma.masked_all( +# (ndim, max(a_shape[:ndim])), dtype=dtype +# ) +# +# for i, chunks in enumerate(data.chunks): +# aggregation_location[i, : len(chunks)] = chunks +# +# # ------------------------------------------------------------ +# # Return Data objects +# # ------------------------------------------------------------ +# data = type(data) +# return { +# "location": data(aggregation_location), +# "file": data(aggregation_file), +# "format": data(aggregation_format), +# "address": data(aggregation_address), +# } +# +# def _customise_write_vars(self): +# """Customise the write parameters. +# +# .. versionadded:: 3.15.0 +# +# """ +# g = self.write_vars +# +# if g.get("cfa"): +# from os.path import abspath +# from pathlib import PurePath +# +# # Find the absolute directory path of the output +# # CFA-netCDF file URI +# g["cfa_dir"] = PurePath(abspath(g["filename"])).parent +# +# def _cfa_get_file_details(self, data): +# """Get the details of all files referenced by the data. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# data: `Data` +# The data +# +# :Returns: +# +# `set` of 3-tuples +# A set containing 3-tuples giving the file names, +# the addresses in the files, and the file formats. If +# no files are required to compute the data then +# an empty `set` is returned. +# +# **Examples** +# +# >>> n._cfa_get_file_details(data): +# {(('/home/file.nc',), ('tas',), ('nc',))} +# +# >>> n._cfa_get_file_details(data): +# {(('/home/file.pp',), (34556,), ('um',))} +# +# """ +# out = [] +# out_append = out.append +# for a in data.todict().values(): +# try: +# out_append( +# (a.get_filenames(), a.get_addresses(), a.get_formats()) +# ) +# except AttributeError: +# pass +# +# return set(out) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 9b18944e87..07c9132e53 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -7,6 +7,7 @@ from urllib.parse import urlparse from cfdm import is_log_level_info +from cfdm.read_write.netcdf import NetCDFRead from numpy.ma.core import MaskError from ..aggregate import aggregate as cf_aggregate @@ -16,7 +17,8 @@ from ..fieldlist import FieldList from ..functions import _DEPRECATION_ERROR_FUNCTION_KWARGS, flat from ..query import Query -from .netcdf import NetCDFRead + +# from .netcdf import NetCDFRead from .um import UMRead _cached_temporary_files = {} @@ -63,6 +65,7 @@ def read( store_hdf5_chunks=True, domain=False, cfa=None, + cfa_write=None, netcdf_backend=None, storage_options=None, cache=True, @@ -1014,28 +1017,28 @@ def read( info = is_log_level_info(logger) - # Parse the 'cfa' parameter - if cfa is None: - cfa_options = {} - else: - cfa_options = cfa.copy() - keys = ("substitutions",) - if not set(cfa_options).issubset(keys): - raise ValueError( - "Invalid dictionary key to the 'cfa' parameter." - f"Valid keys are {keys}. Got: {cfa_options}" - ) - - if "substitutions" in cfa_options: - substitutions = cfa_options["substitutions"].copy() - for base, sub in tuple(substitutions.items()): - if not (base.startswith("${") and base.endswith("}")): - # Add missing ${...} - substitutions[f"${{{base}}}"] = substitutions.pop(base) - else: - substitutions = {} - - cfa_options["substitutions"] = substitutions + # # Parse the 'cfa' parameter + # if cfa is None: + # cfa_options = {} + # else: + # cfa_options = cfa.copy() + # keys = ("substitutions",) + # if not set(cfa_options).issubset(keys): + # raise ValueError( + # "Invalid dictionary key to the 'cfa' parameter." + # f"Valid keys are {keys}. Got: {cfa_options}" + # ) + # + # if "substitutions" in cfa_options: + # substitutions = cfa_options["substitutions"].copy() + # for base, sub in tuple(substitutions.items()): + # if not (base.startswith("${") and base.endswith("}")): + # # Add missing ${...} + # substitutions[f"${{{base}}}"] = substitutions.pop(base) + # else: + # substitutions = {} + # + # cfa_options["substitutions"] = substitutions # Initialise the output list of fields/domains if domain: @@ -1170,7 +1173,8 @@ def read( warn_valid=warn_valid, select=select, domain=domain, - cfa_options=cfa_options, + cfa=cfa, + cfa_write=cfa_write, netcdf_backend=netcdf_backend, storage_options=storage_options, cache=cache, @@ -1289,7 +1293,8 @@ def _read_a_file( store_hdf5_chunks=True, select=None, domain=False, - cfa_options=None, + cfa=None, + cfa_write=None, netcdf_backend=None, storage_options=None, cache=True, @@ -1326,7 +1331,7 @@ def _read_a_file( domain: `bool`, optional See `cf.read` for details. - cfa_options: `dict`, optional + cfa: `dict`, optional See `cf.read` for details. .. versionadded:: 3.15.0 @@ -1378,7 +1383,7 @@ def _read_a_file( extra_read_vars = { "fmt": selected_fmt, "ignore_read_error": ignore_read_error, - "cfa_options": cfa_options, + # "cfa_options": cfa_options, } # ---------------------------------------------------------------- @@ -1424,6 +1429,8 @@ def _read_a_file( dask_chunks=dask_chunks, store_hdf5_chunks=store_hdf5_chunks, cache=cache, + cfa=cfa, + cfa_write=cfa_write, ) except MaskError: # Some data required for field interpretation is missing, diff --git a/cf/read_write/um/umread.py b/cf/read_write/um/umread.py index e73166eba1..98c8fbd630 100644 --- a/cf/read_write/um/umread.py +++ b/cf/read_write/um/umread.py @@ -2150,7 +2150,7 @@ def create_data(self): # Create the Data object data = Data(dx, units=um_Units, fill_value=fill_value) - data._cfa_set_write(True) + data._nc_set_aggregation_write_status(True) self.data = data self.data_axes = data_axes diff --git a/cf/read_write/write.py b/cf/read_write/write.py index 23a8dda3cd..5ca803eb86 100644 --- a/cf/read_write/write.py +++ b/cf/read_write/write.py @@ -32,7 +32,7 @@ def write( shuffle=True, reference_datetime=None, verbose=None, - cfa=False, + cfa="auto", single=None, double=None, variable_attributes=None, @@ -41,6 +41,7 @@ def write( group=True, coordinates=False, omit_data=None, + hdf5_chunks="4 MiB", cfa_options=None, ): """Write field constructs to a netCDF file. @@ -752,57 +753,57 @@ def write( # Extra write variables extra_write_vars = {"reference_datetime": reference_datetime} - # ------------------------------------------------------------ - # CFA - # ------------------------------------------------------------ - if isinstance(cfa, dict): - cfa_options = cfa.copy() - cfa = True - else: - cfa_options = {} - cfa = bool(cfa) - - if cfa: - # Add CFA to the Conventions - cfa_conventions = f"CFA-{CFA()}" - if not Conventions: - Conventions = cfa_conventions - elif isinstance(Conventions, str): - Conventions = (Conventions, cfa_conventions) - else: - Conventions = tuple(Conventions) + (cfa_conventions,) - - keys = ("constructs", "absolute_paths", "strict", "substitutions") - if not set(cfa_options).issubset(keys): - raise ValueError( - "Invalid dictionary key to the 'cfa_options' " - f"parameter. Valid keys are {keys}. Got: {cfa_options}" - ) - - cfa_options.setdefault("constructs", "field") - cfa_options.setdefault("absolute_paths", True) - cfa_options.setdefault("strict", True) - cfa_options.setdefault("substitutions", {}) - - constructs = cfa_options["constructs"] - if isinstance(constructs, dict): - cfa_options["constructs"] = constructs.copy() - else: - if isinstance(constructs, str): - constructs = (constructs,) - - cfa_options["constructs"] = {c: None for c in constructs} - - substitutions = cfa_options["substitutions"].copy() - for base, sub in tuple(substitutions.items()): - if not (base.startswith("${") and base.endswith("}")): - # Add missing ${...} - substitutions[f"${{{base}}}"] = substitutions.pop(base) - - cfa_options["substitutions"] = substitutions - - extra_write_vars["cfa"] = cfa - extra_write_vars["cfa_options"] = cfa_options + # # ------------------------------------------------------------ + # # CFA + # # ------------------------------------------------------------ + # if isinstance(cfa, dict): + # cfa_options = cfa.copy() + # cfa = True + # else: + # cfa_options = {} + # cfa = bool(cfa) + # + # if cfa: + # # Add CFA to the Conventions + # cfa_conventions = f"CFA-{CFA()}" + # if not Conventions: + # Conventions = cfa_conventions + # elif isinstance(Conventions, str): + # Conventions = (Conventions, cfa_conventions) + # else: + # Conventions = tuple(Conventions) + (cfa_conventions,) + # + # keys = ("constructs", "absolute_paths", "strict", "substitutions") + # if not set(cfa_options).issubset(keys): + # raise ValueError( + # "Invalid dictionary key to the 'cfa_options' " + # f"parameter. Valid keys are {keys}. Got: {cfa_options}" + # ) + # + # cfa_options.setdefault("constructs", "field") + # cfa_options.setdefault("absolute_paths", True) + # cfa_options.setdefault("strict", True) + # cfa_options.setdefault("substitutions", {}) + # + # constructs = cfa_options["constructs"] + # if isinstance(constructs, dict): + # cfa_options["constructs"] = constructs.copy() + # else: + # if isinstance(constructs, str): + # constructs = (constructs,) + # + # cfa_options["constructs"] = {c: None for c in constructs} + # + # substitutions = cfa_options["substitutions"].copy() + # for base, sub in tuple(substitutions.items()): + # if not (base.startswith("${") and base.endswith("}")): + # # Add missing ${...} + # substitutions[f"${{{base}}}"] = substitutions.pop(base) + # + # cfa_options["substitutions"] = substitutions + # + # extra_write_vars["cfa"] = cfa + # extra_write_vars["cfa_options"] = cfa_options netcdf.write( fields, @@ -828,4 +829,6 @@ def write( coordinates=coordinates, extra_write_vars=extra_write_vars, omit_data=omit_data, + hdf5_chunks=hdf5_chunks, + cfa=cfa, ) diff --git a/cf/test/create_test_files.py b/cf/test/create_test_files.py index 80f3be4a9f..2321ce0a95 100644 --- a/cf/test/create_test_files.py +++ b/cf/test/create_test_files.py @@ -2222,6 +2222,86 @@ def _make_ugrid_2(filename): return filename +def _make_aggregation_value(filename): + """Create an aggregation variable with a 'value' fragment array.""" + n = netCDF4.Dataset(filename, "w") + + n.Conventions = f"CF-{VN}" + n.comment = "A netCDF file with a 'value' aggregation variable." + + n.createDimension("time", 12) + n.createDimension("level", 1) + n.createDimension("latitude", 73) + n.createDimension("longitude", 144) + n.createDimension("f_time", 2) + n.createDimension("f_level", 1) + n.createDimension("f_latitude", 1) + n.createDimension("f_longitude", 1) + n.createDimension("i", 2) + n.createDimension("j", 4) + n.createDimension("j_uid", 1) + + temperature = n.createVariable("temperature", "f8", ()) + temperature.standard_name = "air_temperature" + temperature.units = "K" + temperature.cell_methods = "time: mean" + temperature.ancillary_variables = "uid" + temperature.aggregated_dimensions = "time level latitude longitude" + temperature.aggregated_data = "location: fragment_location address: fragment_address shape: fragment_shape" + + uid = n.createVariable("uid", str, ()) + uid.long_name = "Fragment dataset unique identifiers" + uid.aggregated_dimensions = "time" + uid.aggregated_data = "value: fragment_value_uid shape: fragment_shape_uid" + + time = n.createVariable("time", "f4", ("time",)) + time.standard_name = "time" + time.units = "days since 2001-01-01" + time[...] = [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334] + + level = n.createVariable("level", "f4", ("level",)) + level.standard_name = "height_above_mean_sea_level" + level.units = "m" + + latitude = n.createVariable("latitude", "f4", ("latitude",)) + latitude.standard_name = "latitude" + latitude.units = "degrees_north" + + longitude = n.createVariable("longitude", "f4", ("longitude",)) + longitude.standard_name = "longitude" + longitude.units = "degrees_east" + + # Fragment array variables + fragment_location = n.createVariable( + "fragment_location", + str, + ("f_time", "f_level", "f_latitude", "f_longitude"), + ) + fragment_location[0, 0, 0, 0] = "January-March.nc" + fragment_location[1, 0, 0, 0] = "April-December.nc" + + fragment_address = n.createVariable("fragment_address", str, ()) + fragment_address[...] = "temperature" + + fragment_shape = n.createVariable("fragment_shape", "i4", ("j", "i")) + fragment_shape[...] = [[3, 9], [1, -1], [73, -1], [144, -1]] + fragment_shape[1:, 1] = np.ma.masked + + fragment_value_uid = n.createVariable( + "fragment_value_uid", str, ("f_time",) + ) + fragment_value_uid[0] = "04b9-7eb5-4046-97b-0bf8" + fragment_value_uid[1] = "05ee0-a183-43b3-a67-1eca" + + fragment_shape_uid = n.createVariable( + "fragment_shape_uid", "i4", ("j_uid", "i") + ) + fragment_shape_uid[...] = [3, 9] + + n.close() + return filename + + contiguous_file = _make_contiguous_file("DSG_timeSeries_contiguous.nc") indexed_file = _make_indexed_file("DSG_timeSeries_indexed.nc") indexed_contiguous_file = _make_indexed_contiguous_file( @@ -2252,6 +2332,8 @@ def _make_ugrid_2(filename): ugrid_1 = _make_ugrid_1("ugrid_1.nc") ugrid_2 = _make_ugrid_2("ugrid_2.nc") +aggregation_value = _make_aggregation_value("aggregation_value.nc") + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) cfdm.environment() diff --git a/cf/test/setup_create_field.py b/cf/test/setup_create_field.py index 34e4bdcd58..dfff3bd16b 100644 --- a/cf/test/setup_create_field.py +++ b/cf/test/setup_create_field.py @@ -133,8 +133,11 @@ def test_create_field(self): "grid_north_pole_longitude": 190.0, } ) + datum = cf.Datum(parameters={"earth_radius": 6371007}) + ref0 = cf.CoordinateReference( coordinate_conversion=coordinate_conversion, + datum=datum, coordinates=[x, y, lat, lon], ) @@ -156,10 +159,12 @@ def test_create_field(self): domain_ancillaries={"orog": orog_key, "a": ak, "b": bk}, ) ref1 = cf.CoordinateReference( - coordinate_conversion=coordinate_conversion, coordinates=[z] + coordinates=[z], + datum=datum, + coordinate_conversion=coordinate_conversion, ) - f.set_construct(ref1) + ref1 = f.set_construct(ref1) # Field ancillary variables g = cf.FieldAncillary() @@ -193,7 +198,7 @@ def test_create_field(self): f.flag_meanings = ["a", "bb", "ccc"] for cm in cf.CellMethod.create( - "grid_longitude: mean grid_latitude: max" + "grid_longitude: mean grid_latitude: maximum" ): f.set_construct(cm) diff --git a/cf/test/test_CFA.py b/cf/test/test_CFA.py index 6b005aef70..f0bc1100c8 100644 --- a/cf/test/test_CFA.py +++ b/cf/test/test_CFA.py @@ -20,9 +20,9 @@ ( tmpfile1, tmpfile2, - tmpfile3, - tmpfile4, - tmpfile5, + nc_file, + cfa_file, + cfa_file2, ) = tmpfiles @@ -39,6 +39,8 @@ def _remove_tmpfiles(): class CFATest(unittest.TestCase): + """Unit test for aggregation variables.""" + netcdf3_fmts = [ "NETCDF3_CLASSIC", "NETCDF3_64BIT", @@ -48,33 +50,35 @@ class CFATest(unittest.TestCase): netcdf4_fmts = ["NETCDF4", "NETCDF4_CLASSIC"] netcdf_fmts = netcdf3_fmts + netcdf4_fmts + aggregation_value = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "aggregation_value.nc" + ) + def test_CFA_fmt(self): - """Test the cf.read 'fmt' and 'cfa' keywords.""" + """Test the cf.read 'fmt' keyword with cfa.""" f = cf.example_field(0) cf.write(f, tmpfile1) f = cf.read(tmpfile1)[0] for fmt in self.netcdf_fmts: - cf.write(f, tmpfile2, fmt=fmt, cfa=True) - g = cf.read(tmpfile2) + cf.write(f, cfa_file, fmt=fmt, cfa="field") + g = cf.read(cfa_file) self.assertEqual(len(g), 1) self.assertTrue(f.equals(g[0])) def test_CFA_multiple_fragments(self): - """Test CFA with more than one fragment.""" + """Test aggregation variables with more than one fragment.""" f = cf.example_field(0) cf.write(f[:2], tmpfile1) cf.write(f[2:], tmpfile2) - a = cf.read([tmpfile1, tmpfile2]) - self.assertEqual(len(a), 1) - a = a[0] + a = cf.read(tmpfile1)[0] + b = cf.read(tmpfile2)[0] + a = cf.Field.concatenate([a, b], axis=0) - nc_file = tmpfile3 - cfa_file = tmpfile4 cf.write(a, nc_file) - cf.write(a, cfa_file, cfa=True) + cf.write(a, cfa_file, cfa="field") n = cf.read(nc_file) c = cf.read(cfa_file) @@ -84,108 +88,59 @@ def test_CFA_multiple_fragments(self): self.assertTrue(n[0].equals(c[0])) def test_CFA_strict(self): - """Test CFA 'strict' option to the cfa.write 'cfa' keyword.""" + """Test 'strict' option to the cf.write 'cfa' keyword.""" f = cf.example_field(0) - # By default, can't write as CF-netCDF those variables - # selected for CFA treatment, but which aren't suitable. + # By default, can't write in-memory arrays as aggregation + # variables with self.assertRaises(ValueError): - cf.write(f, tmpfile1, cfa=True) + cf.write(f, cfa_file, cfa="field") # The previous line should have deleted the output file - self.assertFalse(os.path.exists(tmpfile1)) + self.assertFalse(os.path.exists(cfa_file)) - cf.write(f, tmpfile1, cfa={"strict": False}) - g = cf.read(tmpfile1) + cf.write(f, nc_file, cfa={"constructs": "field", "strict": False}) + g = cf.read(nc_file) self.assertEqual(len(g), 1) self.assertTrue(g[0].equals(f)) - cf.write(g, tmpfile2, cfa={"strict": True}) - g = cf.read(tmpfile2) + cf.write(g, cfa_file, cfa={"constructs": "field", "strict": True}) + g = cf.read(cfa_file) self.assertEqual(len(g), 1) self.assertTrue(g[0].equals(f)) - def test_CFA_field_ancillaries(self): - """Test creation of field ancillaries from non-standard CFA terms.""" - f = cf.example_field(0) - self.assertFalse(f.field_ancillaries()) - - a = f[:2] - b = f[2:] - a.set_property("foo", "bar_a") - b.set_property("foo", "bar_b") - cf.write(a, tmpfile1) - cf.write(b, tmpfile2) - - c = cf.read( - [tmpfile1, tmpfile2], aggregate={"field_ancillaries": "foo"} - ) - self.assertEqual(len(c), 1) - c = c[0] - self.assertEqual(len(c.field_ancillaries()), 1) - anc = c.field_ancillary() - self.assertTrue(anc.data.cfa_get_term()) - self.assertFalse(anc.data.cfa_get_write()) - - cf.write(c, tmpfile3, cfa=False) - c2 = cf.read(tmpfile3) - self.assertEqual(len(c2), 1) - self.assertFalse(c2[0].field_ancillaries()) - - cf.write(c, tmpfile4, cfa=True) - d = cf.read(tmpfile4) - self.assertEqual(len(d), 1) - d = d[0] - - self.assertEqual(len(d.field_ancillaries()), 1) - anc = d.field_ancillary() - self.assertTrue(anc.data.cfa_get_term()) - self.assertFalse(anc.data.cfa_get_write()) - self.assertTrue(d.equals(c)) - - cf.write(d, tmpfile5, cfa=False) - e = cf.read(tmpfile5) - self.assertEqual(len(e), 1) - self.assertFalse(e[0].field_ancillaries()) - - cf.write(d, tmpfile5, cfa=True) - e = cf.read(tmpfile5) - self.assertEqual(len(e), 1) - self.assertTrue(e[0].equals(d)) - def test_CFA_substitutions_0(self): - """Test CFA substitution URI substitutions (0).""" + """Test aggregation substitution URI substitutions (0).""" f = cf.example_field(0) cf.write(f, tmpfile1) f = cf.read(tmpfile1)[0] cwd = os.getcwd() - - f.data.cfa_update_file_substitutions({"base": cwd}) + f.data.nc_update_aggregation_substitutions({"base": cwd}) cf.write( f, - tmpfile2, - cfa={"absolute_paths": True}, + cfa_file, + cfa={"constructs": "field", "uri": "absolute"}, ) - nc = netCDF4.Dataset(tmpfile2, "r") - cfa_file = nc.variables["cfa_file"] + nc = netCDF4.Dataset(cfa_file, "r") + cfa_location = nc.variables["cfa_location"] self.assertEqual( - cfa_file.getncattr("substitutions"), + cfa_location.getncattr("substitutions"), f"${{base}}: {cwd}", ) self.assertEqual( - cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" + cfa_location[...], f"${{base}}/{os.path.basename(tmpfile1)}" ) nc.close() - g = cf.read(tmpfile2) + g = cf.read(cfa_file) self.assertEqual(len(g), 1) self.assertTrue(f.equals(g[0])) def test_CFA_substitutions_1(self): - """Test CFA substitution URI substitutions (1).""" + """Test aggregation substitution URI substitutions (1).""" f = cf.example_field(0) cf.write(f, tmpfile1) f = cf.read(tmpfile1)[0] @@ -194,136 +149,286 @@ def test_CFA_substitutions_1(self): for base in ("base", "${base}"): cf.write( f, - tmpfile2, - cfa={"absolute_paths": True, "substitutions": {base: cwd}}, + cfa_file, + cfa={ + "constructs": "field", + "uri": "absolute", + "substitutions": {base: cwd}, + }, ) - nc = netCDF4.Dataset(tmpfile2, "r") - cfa_file = nc.variables["cfa_file"] + nc = netCDF4.Dataset(cfa_file, "r") + cfa_location = nc.variables["cfa_location"] self.assertEqual( - cfa_file.getncattr("substitutions"), + cfa_location.getncattr("substitutions"), f"${{base}}: {cwd}", ) self.assertEqual( - cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" + cfa_location[...], + f"${{base}}/{os.path.basename(tmpfile1)}", ) nc.close() - g = cf.read(tmpfile2) + g = cf.read(cfa_file) self.assertEqual(len(g), 1) self.assertTrue(f.equals(g[0])) def test_CFA_substitutions_2(self): - """Test CFA substitution URI substitutions (2).""" + """Test aggregation substitution URI substitutions (2).""" + # TODOCFA: delete + tmpfile1 = "tmpfile1.nc" + f = cf.example_field(0) + cf.write(f, tmpfile1) f = cf.read(tmpfile1)[0] cwd = os.getcwd() + basename = os.path.basename(tmpfile1) - f.data.cfa_clear_file_substitutions() - f.data.cfa_update_file_substitutions({"base": cwd}) + # TODOCFA: delete + cfa_file = "cfa_file.nc" + f.data.nc_clear_aggregation_substitutions() + f.data.nc_update_aggregation_substitutions({"base": f"{cwd}"}) cf.write( f, - tmpfile2, + cfa_file, cfa={ - "absolute_paths": True, + "constructs": "field", + "uri": "absolute", "substitutions": {"base2": "/bad/location"}, }, ) - nc = netCDF4.Dataset(tmpfile2, "r") - cfa_file = nc.variables["cfa_file"] + nc = netCDF4.Dataset(cfa_file, "r") + cfa_location = nc.variables["cfa_location"] self.assertEqual( - cfa_file.getncattr("substitutions"), + cfa_location.getncattr("substitutions"), f"${{base2}}: /bad/location ${{base}}: {cwd}", ) - self.assertEqual( - cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" - ) + self.assertEqual(cfa_location[...], f"file://${{base}}/{basename}") nc.close() - g = cf.read(tmpfile2) + g = cf.read(cfa_file) self.assertEqual(len(g), 1) self.assertTrue(f.equals(g[0])) - f.data.cfa_clear_file_substitutions() - f.data.cfa_update_file_substitutions({"base": "/bad/location"}) + f.data.nc_clear_aggregation_substitutions() + f.data.nc_update_aggregation_substitutions({"base": "/bad/location"}) cf.write( f, - tmpfile2, - cfa={"absolute_paths": True, "substitutions": {"base": cwd}}, + cfa_file, + cfa={ + "constructs": "field", + "uri": "absolute", + "substitutions": {"base": cwd}, + }, ) - nc = netCDF4.Dataset(tmpfile2, "r") - cfa_file = nc.variables["cfa_file"] + nc = netCDF4.Dataset(cfa_file, "r") + cfa_location = nc.variables["cfa_location"] self.assertEqual( - cfa_file.getncattr("substitutions"), + cfa_location.getncattr("substitutions"), f"${{base}}: {cwd}", ) - self.assertEqual( - cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" - ) + self.assertEqual(cfa_location[...], f"file://${{base}}/{basename}") nc.close() - g = cf.read(tmpfile2) + g = cf.read(cfa_file) self.assertEqual(len(g), 1) self.assertTrue(f.equals(g[0])) - f.data.cfa_clear_file_substitutions() - f.data.cfa_update_file_substitutions({"base2": "/bad/location"}) + f.data.nc_clear_aggregation_substitutions() + f.data.nc_update_aggregation_substitutions({"base2": "/bad/location"}) cf.write( f, - tmpfile2, - cfa={"absolute_paths": True, "substitutions": {"base": cwd}}, + cfa_file, + cfa={ + "constructs": "field", + "uri": "absolute", + "substitutions": {"base": cwd}, + }, ) - nc = netCDF4.Dataset(tmpfile2, "r") - cfa_file = nc.variables["cfa_file"] + nc = netCDF4.Dataset(cfa_file, "r") + cfa_location = nc.variables["cfa_location"] self.assertEqual( - cfa_file.getncattr("substitutions"), + cfa_location.getncattr("substitutions"), f"${{base2}}: /bad/location ${{base}}: {cwd}", ) self.assertEqual( - cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" + cfa_location[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" ) nc.close() - g = cf.read(tmpfile2) + g = cf.read(cfa_file) self.assertEqual(len(g), 1) - self.assertTrue(f.equals(g[0])) + g = g[0] + self.assertTrue(f.equals(g)) + + self.assertEqual( + g.data.get_filenames(normalise=False), + set((f"file://${{base}}/{os.path.basename(tmpfile1)}",)), + ) + g.data.nc_update_aggregation_substitutions({"base": "/new/location"}) + self.assertEqual( + g.data.nc_aggregation_substitutions(), + {"${base2}": "/bad/location", "${base}": "/new/location"}, + ) + self.assertEqual( + g.data.get_filenames(normalise=False), + set((f"file://${{base}}/{os.path.basename(tmpfile1)}",)), + ) + + # TODOCFA: delete + cfa_file2 = "cfa_file2.nc" + cf.write( + g, + cfa_file2, + cfa={ + "constructs": "field", + "uri": "absolute", + }, + ) + nc = netCDF4.Dataset(cfa_file2, "r") + cfa_location = nc.variables["cfa_location"] + self.assertEqual( + cfa_location.getncattr("substitutions"), + "${base2}: /bad/location ${base}: /new/location", + ) + self.assertEqual( + cfa_location[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" + ) + nc.close() + + def test_CFA_substitutions_3(self): + """Test aggregation substitution URI substitutions (2).""" + f = cf.example_field(0) + cf.write(f, tmpfile1) + f = cf.read(tmpfile1)[0] + + cwd = os.getcwd() + basename = os.path.basename(tmpfile1) + + f.data.nc_clear_aggregation_substitutions() + f.data.nc_update_aggregation_substitutions({"base": f"{cwd}/"}) + cf.write( + f, + cfa_file, + cfa={ + "constructs": "field", + "uri": "absolute", + }, + ) + + nc = netCDF4.Dataset(cfa_file, "r") + cfa_location = nc.variables["cfa_location"] + self.assertEqual( + cfa_location.getncattr("substitutions"), f"${{base}}: {cwd}/" + ) + self.assertEqual(cfa_location[...], f"${{base}}{basename}") + nc.close() + + # TODOCFA: delete + cfa_file2 = "cfa_file2.nc" + + g = cf.read(cfa_file)[0] + self.assertTrue(f.equals(g)) + cf.write( + g, + cfa_file2, + cfa={ + "constructs": "field", + "uri": "absolute", + }, + ) + + def test_CFA_substitutions_4(self): + """Test aggregation substitution URI substitutions (2).""" + f = cf.example_field(0) + + # TODOCFA: delete + tmpfile1 = "tmpfile1.nc" + + cf.write(f, tmpfile1) + f = cf.read(tmpfile1)[0] + + cwd = os.getcwd() + basename = os.path.basename(tmpfile1) + + # TODOCFA: delete + cfa_file = "cfa_file.nc" + + replacement = f"{cwd}/" + f.data.nc_clear_aggregation_substitutions() + f.data.nc_update_aggregation_substitutions({"base": replacement}) + cf.write( + f, + cfa_file, + cfa={ + "constructs": "field", + "uri": "relative", + }, + ) + + nc = netCDF4.Dataset(cfa_file, "r") + cfa_location = nc.variables["cfa_location"] + self.assertEqual( + cfa_location.getncattr("substitutions"), + f"${{base}}: {replacement}", + ) + self.assertEqual(cfa_location[...], basename) + nc.close() + + cf.write( + f, + cfa_file, + cfa={"constructs": "field", "uri": "absolute"}, + ) + nc = netCDF4.Dataset(cfa_file, "r") + cfa_location = nc.variables["cfa_location"] + self.assertEqual( + cfa_location.getncattr("substitutions"), + f"${{base}}: {replacement}", + ) + self.assertEqual(cfa_location[...], f"file://${{base}}{basename}") + nc.close() - def test_CFA_absolute_paths(self): - """Test CFA 'absolute_paths' option to the cfa.write 'cfa' keyword.""" + def test_CFA_uri(self): + """Test aggregation 'uri' option to cf.write.""" f = cf.example_field(0) cf.write(f, tmpfile1) f = cf.read(tmpfile1)[0] - for absolute_paths, filename in zip( - (True, False), + for uri, filename in zip( + ("absolute", "relative"), ( PurePath(os.path.abspath(tmpfile1)).as_uri(), os.path.basename(tmpfile1), ), ): - cf.write(f, tmpfile2, cfa={"absolute_paths": absolute_paths}) + cf.write( + f, + cfa_file, + cfa={"constructs": "field", "uri": uri}, + ) - nc = netCDF4.Dataset(tmpfile2, "r") - cfa_file = nc.variables["cfa_file"] - self.assertEqual(cfa_file[...], filename) + nc = netCDF4.Dataset(cfa_file, "r") + cfa_location = nc.variables["cfa_location"] + self.assertEqual(cfa_location[...], filename) nc.close() - g = cf.read(tmpfile2) + g = cf.read(cfa_file) self.assertEqual(len(g), 1) self.assertTrue(f.equals(g[0])) def test_CFA_constructs(self): - """Test choice of constructs to write as CFA-netCDF variables.""" + """Test aggregation 'constructs' option to cf.write.""" f = cf.example_field(1) - f.del_construct("T") + f.del_construct("time") f.del_construct("long_name=Grid latitude name") cf.write(f, tmpfile1) f = cf.read(tmpfile1)[0] @@ -359,7 +464,6 @@ def test_CFA_constructs(self): ["dimension_coordinate"], {"dimension_coordinate": None}, {"dimension_coordinate": 1}, - {"dimension_coordinate": cf.eq(1)}, ): cf.write(f, tmpfile2, cfa={"constructs": constructs}) nc = netCDF4.Dataset(tmpfile2, "r") @@ -385,7 +489,7 @@ def test_CFA_constructs(self): # Dimension and auxiliary constructs for constructs in ( ["dimension_coordinate", "auxiliary_coordinate"], - {"dimension_coordinate": None, "auxiliary_coordinate": cf.ge(2)}, + {"dimension_coordinate": None, "auxiliary_coordinate": 2}, ): cf.write(f, tmpfile2, cfa={"constructs": constructs}) nc = netCDF4.Dataset(tmpfile2, "r") @@ -410,38 +514,15 @@ def test_CFA_constructs(self): nc.close() - def test_CFA_PP(self): - """Test writing CFA-netCDF with PP format fragments.""" - f = cf.read("file1.pp")[0] - cf.write(f, tmpfile1, cfa=True) - - # Check that only the fields have been aggregated - nc = netCDF4.Dataset(tmpfile1, "r") - for ncvar, var in nc.variables.items(): - attrs = var.ncattrs() - if ncvar in ("UM_m01s15i201_vn405",): - self.assertFalse(var.ndim) - self.assertIn("aggregated_dimensions", attrs) - self.assertIn("aggregated_data", attrs) - else: - self.assertNotIn("aggregated_dimensions", attrs) - self.assertNotIn("aggregated_data", attrs) - - nc.close() - - g = cf.read(tmpfile1) - self.assertEqual(len(g), 1) - self.assertTrue(f.equals(g[0])) - def test_CFA_multiple_files(self): - """Test storing multiple CFA frgament locations.""" + """Test storing multiple locations for the same fragment.""" f = cf.example_field(0) cf.write(f, tmpfile1) f = cf.read(tmpfile1)[0] - f.add_file_location("/new/location") + f.add_file_directory("/new/path") - cf.write(f, tmpfile2, cfa=True) - g = cf.read(tmpfile2) + cf.write(f, cfa_file, cfa="field") + g = cf.read(cfa_file) self.assertEqual(len(g), 1) g = g[0] self.assertTrue(f.equals(g)) @@ -450,20 +531,99 @@ def test_CFA_multiple_files(self): self.assertEqual(len(g.get_filenames()), 3) def test_CFA_unlimited_dimension(self): - """Test CFA with unlimited dimensions""" - # Create a CFA file from a field that has an unlimited - # dimension and no metadata constructs spanning that dimension + """Test aggregation files with unlimited dimensions.""" + # Aggregated dimensions cannot be unlimited f = cf.example_field(0) - d = f.domain_axis("X") - d.nc_set_unlimited(True) - f.del_construct("X") + axis = f.domain_axis("longitude") + axis.nc_set_unlimited(True) cf.write(f, tmpfile1) g = cf.read(tmpfile1) - cf.write(g, tmpfile2, cfa=True) + with self.assertRaises(ValueError): + cf.write(g, cfa_file, cfa="field") + + def test_CFA_scalar(self): + """Test scalar aggregation variable.""" + f = cf.example_field(0) + f = f[0, 0].squeeze() + cf.write(f, tmpfile1) + g = cf.read(tmpfile1)[0] + cf.write(g, cfa_file, cfa="field") + h = cf.read(cfa_file)[0] + self.assertTrue(h.equals(f)) + + def test_CFA_value(self): + """Test the value fragment array variable.""" + write = True + for aggregation_value_file in (self.aggregation_value, cfa_file): + f = cf.read(aggregation_value_file) + self.assertEqual(len(f), 1) + f = f[0] + fa = f.field_ancillary() + self.assertEqual(fa.shape, (12,)) + self.assertEqual(fa.data.chunks, ((3, 9),)) + self.assertEqual( + fa.data.nc_get_aggregation_fragment_type(), "value" + ) + self.assertEqual( + fa.data.nc_get_aggregated_data(), + {"shape": "fragment_shape_uid", "value": "fragment_value_uid"}, + ) + + nc = netCDF4.Dataset(aggregation_value_file, "r") + fragment_value_uid = nc.variables["fragment_value_uid"][...] + nc.close() + + self.assertTrue((fa[:3].array == fragment_value_uid[0]).all()) + self.assertTrue((fa[3:].array == fragment_value_uid[1]).all()) + + if write: + cf.write(f, cfa_file) + write = False - # Check that the CFA file can be read - h = cf.read(tmpfile2) - self.assertEqual(len(h), 1) + def test_CFA_cfa(self): + """Test the cf.write 'cfa' keyword.""" + f = cf.example_field(0) + cf.write(f, tmpfile1) + f = cf.read(tmpfile1)[0] + cf.write(f, tmpfile2, cfa="field") + g = cf.read(tmpfile2)[0] + + # Default of cfa="auto" - check that aggregation variable + # gets written + cf.write(g, cfa_file) + nc = netCDF4.Dataset(cfa_file, "r") + self.assertIsNotNone( + getattr(nc.variables["q"], "aggregated_data", None) + ) + nc.close() + + cf.write(g, cfa_file, cfa={"constructs": {"auto": 2}}) + nc = netCDF4.Dataset(cfa_file, "r") + self.assertIsNotNone( + getattr(nc.variables["q"], "aggregated_data", None) + ) + nc.close() + + cf.write( + g, + cfa_file, + cfa={ + "constructs": ["auto", "dimension_coordinate"], + "strict": False, + }, + ) + nc = netCDF4.Dataset(cfa_file, "r") + for ncvar in ("q", "lat", "lon"): + self.assertIsNotNone( + getattr(nc.variables[ncvar], "aggregated_data", None) + ) + + nc.close() + + # Check bad values of cfa + for cfa in (False, True, (), []): + with self.assertRaises(ValueError): + cf.write(g, cfa_file, cfa=cfa) if __name__ == "__main__": @@ -471,3 +631,463 @@ def test_CFA_unlimited_dimension(self): cf.environment() print() unittest.main(verbosity=2) + +# n_tmpfiles = 5 +# tmpfiles = [ +# tempfile.mkstemp("_test_CFA.nc", dir=os.getcwd())[1] +# for i in range(n_tmpfiles) +# ] +# ( +# tmpfile1, +# tmpfile2, +# tmpfile3, +# tmpfile4, +# tmpfile5, +# ) = tmpfiles +# +# +# def _remove_tmpfiles(): +# """Try to remove defined temporary files by deleting their paths.""" +# for f in tmpfiles: +# try: +# os.remove(f) +# except OSError: +# pass +# +# +# atexit.register(_remove_tmpfiles) +# +# +# class CFATest(unittest.TestCase): +# netcdf3_fmts = [ +# "NETCDF3_CLASSIC", +# "NETCDF3_64BIT", +# "NETCDF3_64BIT_OFFSET", +# "NETCDF3_64BIT_DATA", +# ] +# netcdf4_fmts = ["NETCDF4", "NETCDF4_CLASSIC"] +# netcdf_fmts = netcdf3_fmts + netcdf4_fmts +# +# def test_CFA_fmt(self): +# """Test the cf.read 'fmt' and 'cfa' keywords.""" +# f = cf.example_field(0) +# cf.write(f, tmpfile1) +# f = cf.read(tmpfile1)[0] +# +# for fmt in self.netcdf_fmts: +# cf.write(f, tmpfile2, fmt=fmt, cfa=True) +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# self.assertTrue(f.equals(g[0])) +# +# def test_CFA_multiple_fragments(self): +# """Test CFA with more than one fragment.""" +# f = cf.example_field(0) +# +# cf.write(f[:2], tmpfile1) +# cf.write(f[2:], tmpfile2) +# +# a = cf.read([tmpfile1, tmpfile2]) +# self.assertEqual(len(a), 1) +# a = a[0] +# +# nc_file = tmpfile3 +# cfa_file = tmpfile4 +# cf.write(a, nc_file) +# cf.write(a, cfa_file, cfa=True) +# +# n = cf.read(nc_file) +# c = cf.read(cfa_file) +# self.assertEqual(len(n), 1) +# self.assertEqual(len(c), 1) +# self.assertTrue(c[0].equals(f)) +# self.assertTrue(n[0].equals(c[0])) +# +# def test_CFA_strict(self): +# """Test CFA 'strict' option to the cfa.write 'cfa' keyword.""" +# f = cf.example_field(0) +# +# # By default, can't write as CF-netCDF those variables +# # selected for CFA treatment, but which aren't suitable. +# with self.assertRaises(ValueError): +# cf.write(f, tmpfile1, cfa=True) +# +# # The previous line should have deleted the output file +# self.assertFalse(os.path.exists(tmpfile1)) +# +# cf.write(f, tmpfile1, cfa={"strict": False}) +# g = cf.read(tmpfile1) +# self.assertEqual(len(g), 1) +# self.assertTrue(g[0].equals(f)) +# +# cf.write(g, tmpfile2, cfa={"strict": True}) +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# self.assertTrue(g[0].equals(f)) +# +# def test_CFA_field_ancillaries(self): +# """Test creation of field ancillaries from non-standard CFA terms.""" +# f = cf.example_field(0) +# self.assertFalse(f.field_ancillaries()) +# +# a = f[:2] +# b = f[2:] +# a.set_property("foo", "bar_a") +# b.set_property("foo", "bar_b") +# cf.write(a, tmpfile1) +# cf.write(b, tmpfile2) +# +# c = cf.read( +# [tmpfile1, tmpfile2], aggregate={"field_ancillaries": "foo"} +# ) +# self.assertEqual(len(c), 1) +# c = c[0] +# self.assertEqual(len(c.field_ancillaries()), 1) +# anc = c.field_ancillary() +# self.assertTrue(anc.data.cfa_get_term()) +# self.assertFalse(anc.data.cfa_get_write()) +# +# cf.write(c, tmpfile3, cfa=False) +# c2 = cf.read(tmpfile3) +# self.assertEqual(len(c2), 1) +# self.assertFalse(c2[0].field_ancillaries()) +# +# cf.write(c, tmpfile4, cfa=True) +# d = cf.read(tmpfile4) +# self.assertEqual(len(d), 1) +# d = d[0] +# +# self.assertEqual(len(d.field_ancillaries()), 1) +# anc = d.field_ancillary() +# self.assertTrue(anc.data.cfa_get_term()) +# self.assertFalse(anc.data.cfa_get_write()) +# self.assertTrue(d.equals(c)) +# +# cf.write(d, tmpfile5, cfa=False) +# e = cf.read(tmpfile5) +# self.assertEqual(len(e), 1) +# self.assertFalse(e[0].field_ancillaries()) +# +# cf.write(d, tmpfile5, cfa=True) +# e = cf.read(tmpfile5) +# self.assertEqual(len(e), 1) +# self.assertTrue(e[0].equals(d)) +# +# def test_CFA_substitutions_0(self): +# """Test CFA substitution URI substitutions (0).""" +# f = cf.example_field(0) +# cf.write(f, tmpfile1) +# f = cf.read(tmpfile1)[0] +# +# cwd = os.getcwd() +# +# f.data.cfa_update_file_substitutions({"base": cwd}) +# +# cf.write( +# f, +# tmpfile2, +# cfa={"absolute_paths": True}, +# ) +# +# nc = netCDF4.Dataset(tmpfile2, "r") +# cfa_file = nc.variables["cfa_file"] +# self.assertEqual( +# cfa_file.getncattr("substitutions"), +# f"${{base}}: {cwd}", +# ) +# self.assertEqual( +# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +# ) +# nc.close() +# +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# self.assertTrue(f.equals(g[0])) +# +# def test_CFA_substitutions_1(self): +# """Test CFA substitution URI substitutions (1).""" +# f = cf.example_field(0) +# cf.write(f, tmpfile1) +# f = cf.read(tmpfile1)[0] +# +# cwd = os.getcwd() +# for base in ("base", "${base}"): +# cf.write( +# f, +# tmpfile2, +# cfa={"absolute_paths": True, "substitutions": {base: cwd}}, +# ) +# +# nc = netCDF4.Dataset(tmpfile2, "r") +# cfa_file = nc.variables["cfa_file"] +# self.assertEqual( +# cfa_file.getncattr("substitutions"), +# f"${{base}}: {cwd}", +# ) +# self.assertEqual( +# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +# ) +# nc.close() +# +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# self.assertTrue(f.equals(g[0])) +# +# def test_CFA_substitutions_2(self): +# """Test CFA substitution URI substitutions (2).""" +# f = cf.example_field(0) +# cf.write(f, tmpfile1) +# f = cf.read(tmpfile1)[0] +# +# cwd = os.getcwd() +# +# f.data.cfa_clear_file_substitutions() +# f.data.cfa_update_file_substitutions({"base": cwd}) +# +# cf.write( +# f, +# tmpfile2, +# cfa={ +# "absolute_paths": True, +# "substitutions": {"base2": "/bad/location"}, +# }, +# ) +# +# nc = netCDF4.Dataset(tmpfile2, "r") +# cfa_file = nc.variables["cfa_file"] +# self.assertEqual( +# cfa_file.getncattr("substitutions"), +# f"${{base2}}: /bad/location ${{base}}: {cwd}", +# ) +# self.assertEqual( +# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +# ) +# nc.close() +# +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# self.assertTrue(f.equals(g[0])) +# +# f.data.cfa_clear_file_substitutions() +# f.data.cfa_update_file_substitutions({"base": "/bad/location"}) +# +# cf.write( +# f, +# tmpfile2, +# cfa={"absolute_paths": True, "substitutions": {"base": cwd}}, +# ) +# +# nc = netCDF4.Dataset(tmpfile2, "r") +# cfa_file = nc.variables["cfa_file"] +# self.assertEqual( +# cfa_file.getncattr("substitutions"), +# f"${{base}}: {cwd}", +# ) +# self.assertEqual( +# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +# ) +# nc.close() +# +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# self.assertTrue(f.equals(g[0])) +# +# f.data.cfa_clear_file_substitutions() +# f.data.cfa_update_file_substitutions({"base2": "/bad/location"}) +# +# cf.write( +# f, +# tmpfile2, +# cfa={"absolute_paths": True, "substitutions": {"base": cwd}}, +# ) +# +# nc = netCDF4.Dataset(tmpfile2, "r") +# cfa_file = nc.variables["cfa_file"] +# self.assertEqual( +# cfa_file.getncattr("substitutions"), +# f"${{base2}}: /bad/location ${{base}}: {cwd}", +# ) +# self.assertEqual( +# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +# ) +# nc.close() +# +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# self.assertTrue(f.equals(g[0])) +# +# def test_CFA_absolute_paths(self): +# """Test CFA 'absolute_paths' option to the cfa.write 'cfa' keyword.""" +# f = cf.example_field(0) +# cf.write(f, tmpfile1) +# f = cf.read(tmpfile1)[0] +# +# for absolute_paths, filename in zip( +# (True, False), +# ( +# PurePath(os.path.abspath(tmpfile1)).as_uri(), +# os.path.basename(tmpfile1), +# ), +# ): +# cf.write(f, tmpfile2, cfa={"absolute_paths": absolute_paths}) +# +# nc = netCDF4.Dataset(tmpfile2, "r") +# cfa_file = nc.variables["cfa_file"] +# self.assertEqual(cfa_file[...], filename) +# nc.close() +# +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# self.assertTrue(f.equals(g[0])) +# +# def test_CFA_constructs(self): +# """Test choice of constructs to write as CFA-netCDF variables.""" +# f = cf.example_field(1) +# f.del_construct("T") +# f.del_construct("long_name=Grid latitude name") +# cf.write(f, tmpfile1) +# f = cf.read(tmpfile1)[0] +# +# # No constructs +# cf.write(f, tmpfile2, cfa={"constructs": []}) +# nc = netCDF4.Dataset(tmpfile2, "r") +# for var in nc.variables.values(): +# attrs = var.ncattrs() +# self.assertNotIn("aggregated_dimensions", attrs) +# self.assertNotIn("aggregated_data", attrs) +# +# nc.close() +# +# # Field construct +# cf.write(f, tmpfile2, cfa={"constructs": "field"}) +# nc = netCDF4.Dataset(tmpfile2, "r") +# for ncvar, var in nc.variables.items(): +# attrs = var.ncattrs() +# if ncvar in ("ta",): +# self.assertFalse(var.ndim) +# self.assertIn("aggregated_dimensions", attrs) +# self.assertIn("aggregated_data", attrs) +# else: +# self.assertNotIn("aggregated_dimensions", attrs) +# self.assertNotIn("aggregated_data", attrs) +# +# nc.close() +# +# # Dimension construct +# for constructs in ( +# "dimension_coordinate", +# ["dimension_coordinate"], +# {"dimension_coordinate": None}, +# {"dimension_coordinate": 1}, +# {"dimension_coordinate": cf.eq(1)}, +# ): +# cf.write(f, tmpfile2, cfa={"constructs": constructs}) +# nc = netCDF4.Dataset(tmpfile2, "r") +# for ncvar, var in nc.variables.items(): +# attrs = var.ncattrs() +# if ncvar in ( +# "x", +# "x_bnds", +# "y", +# "y_bnds", +# "atmosphere_hybrid_height_coordinate", +# "atmosphere_hybrid_height_coordinate_bounds", +# ): +# self.assertFalse(var.ndim) +# self.assertIn("aggregated_dimensions", attrs) +# self.assertIn("aggregated_data", attrs) +# else: +# self.assertNotIn("aggregated_dimensions", attrs) +# self.assertNotIn("aggregated_data", attrs) +# +# nc.close() +# +# # Dimension and auxiliary constructs +# for constructs in ( +# ["dimension_coordinate", "auxiliary_coordinate"], +# {"dimension_coordinate": None, "auxiliary_coordinate": cf.ge(2)}, +# ): +# cf.write(f, tmpfile2, cfa={"constructs": constructs}) +# nc = netCDF4.Dataset(tmpfile2, "r") +# for ncvar, var in nc.variables.items(): +# attrs = var.ncattrs() +# if ncvar in ( +# "x", +# "x_bnds", +# "y", +# "y_bnds", +# "atmosphere_hybrid_height_coordinate", +# "atmosphere_hybrid_height_coordinate_bounds", +# "latitude_1", +# "longitude_1", +# ): +# self.assertFalse(var.ndim) +# self.assertIn("aggregated_dimensions", attrs) +# self.assertIn("aggregated_data", attrs) +# else: +# self.assertNotIn("aggregated_dimensions", attrs) +# self.assertNotIn("aggregated_data", attrs) +# +# nc.close() +# +# def test_CFA_PP(self): +# """Test writing CFA-netCDF with PP format fragments.""" +# f = cf.read("file1.pp")[0] +# cf.write(f, tmpfile1, cfa=True) +# +# # Check that only the fields have been aggregated +# nc = netCDF4.Dataset(tmpfile1, "r") +# for ncvar, var in nc.variables.items(): +# attrs = var.ncattrs() +# if ncvar in ("UM_m01s15i201_vn405",): +# self.assertFalse(var.ndim) +# self.assertIn("aggregated_dimensions", attrs) +# self.assertIn("aggregated_data", attrs) +# else: +# self.assertNotIn("aggregated_dimensions", attrs) +# self.assertNotIn("aggregated_data", attrs) +# +# nc.close() +# +# g = cf.read(tmpfile1) +# self.assertEqual(len(g), 1) +# self.assertTrue(f.equals(g[0])) +# +# def test_CFA_multiple_files(self): +# """Test storing multiple CFA frgament locations.""" +# f = cf.example_field(0) +# cf.write(f, tmpfile1) +# f = cf.read(tmpfile1)[0] +# f.add_file_location("/new/location") +# +# cf.write(f, tmpfile2, cfa=True) +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# g = g[0] +# self.assertTrue(f.equals(g)) +# +# self.assertEqual(len(g.data.get_filenames()), 2) +# self.assertEqual(len(g.get_filenames()), 3) +# +# def test_CFA_unlimited_dimension(self): +# """Test CFA with unlimited dimensions""" +# # Create a CFA file from a field that has an unlimited +# # dimension and no metadata constructs spanning that dimension +# f = cf.example_field(0) +# d = f.domain_axis("X") +# d.nc_set_unlimited(True) +# f.del_construct("X") +# cf.write(f, tmpfile1) +# g = cf.read(tmpfile1) +# cf.write(g, tmpfile2, cfa=True) +# +# # Check that the CFA file can be read +# h = cf.read(tmpfile2) +# self.assertEqual(len(h), 1) +# +# +# if __name__ == "__main__": +# print("Run date:", datetime.datetime.now()) +# cf.environment() +# print() +# unittest.main(verbosity=2) diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 3b51206af5..2c4b619ae0 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -2387,7 +2387,6 @@ def test_Data_BINARY_AND_UNARY_OPERATORS(self): self.assertTrue( e.equals(cf.Data(a, "m"), verbose=1), message ) - # --- End: for for x in (cf.Data(2, "metre"), cf.Data(2.0, "metre")): self.assertTrue( @@ -4150,14 +4149,6 @@ def test_Data_flat(self): list(d.flat(ignore_masked=False)), [1, np.ma.masked, 3, 4] ) - def test_Data_tolist(self): - """Test the Data.tolist""" - for x in (1, [1, 2], [[1, 2], [3, 4]]): - d = cf.Data(x) - e = d.tolist() - self.assertEqual(e, np.array(x).tolist()) - self.assertTrue(d.equals(cf.Data(e))) - def test_Data_masked_invalid(self): """Test the `masked_invalid` Data method.""" a = np.array([0, 1, 2]) @@ -4453,33 +4444,6 @@ def test_Data__init__datetime(self): self.assertTrue((q == d).array.all()) self.assertTrue((d == q).array.all()) - def test_Data_get_filenames(self): - """Test `Data.get_filenames`.""" - d = cf.Data.ones((5, 8), float, chunks=4) - self.assertEqual(d.get_filenames(), set()) - - f = cf.example_field(0) - cf.write(f, file_A) - cf.write(f, file_B) - - a = cf.read(file_A, dask_chunks=4)[0].data - b = cf.read(file_B, dask_chunks=4)[0].data - b += 999 - c = cf.Data(b.array, units=b.Units, chunks=4) - - d = cf.Data.concatenate([a, a + 999, b, c], axis=1) - self.assertEqual(d.shape, (5, 32)) - - self.assertEqual(d.get_filenames(), set([file_A, file_B])) - self.assertEqual(d[:, 2:7].get_filenames(), set([file_A])) - self.assertEqual(d[:, 2:14].get_filenames(), set([file_A])) - self.assertEqual(d[:, 2:20].get_filenames(), set([file_A, file_B])) - self.assertEqual(d[:, 2:30].get_filenames(), set([file_A, file_B])) - self.assertEqual(d[:, 29:30].get_filenames(), set()) - - d[2, 3] = -99 - self.assertEqual(d[2, 3].get_filenames(), set([file_A])) - def test_Data__str__(self): """Test `Data.__str__`""" elements0 = (0, -1, 1) @@ -4588,26 +4552,6 @@ def test_Data_convert_reference_time(self): self.assertEqual(e.Units, units) self.assertTrue((e.array == [72, 48, 24, 0]).all()) - def test_Data_clear_after_dask_update(self): - """Test Data._clear_after_dask_update.""" - d = cf.Data([1, 2, 3], "m") - dx = d.to_dask_array() - - d.first_element() - d.second_element() - d.last_element() - - self.assertTrue(d._get_cached_elements()) - - _ALL = cf.Data._ALL - _CACHE = cf.Data._CACHE - - d._set_dask(dx, clear=_ALL ^ _CACHE) - self.assertTrue(d._get_cached_elements()) - - d._set_dask(dx, clear=_ALL) - self.assertFalse(d._get_cached_elements()) - def test_Data_has_deterministic_name(self): """Test Data.has_deterministic_name""" d = cf.Data([1, 2], "m") @@ -4640,108 +4584,6 @@ def test_Data_get_deterministic_name(self): with self.assertRaises(ValueError): d.get_deterministic_name() - def test_Data_cfa_aggregated_data(self): - """Test `Data` CFA aggregated_data methods""" - d = cf.Data(9) - aggregated_data = { - "location": "cfa_location", - "file": "cfa_file", - "address": "cfa_address", - "format": "cfa_format", - "tracking_id": "tracking_id", - } - - self.assertFalse(d.cfa_has_aggregated_data()) - self.assertIsNone(d.cfa_set_aggregated_data(aggregated_data)) - self.assertTrue(d.cfa_has_aggregated_data()) - self.assertEqual(d.cfa_get_aggregated_data(), aggregated_data) - self.assertEqual(d.cfa_del_aggregated_data(), aggregated_data) - self.assertFalse(d.cfa_has_aggregated_data()) - self.assertEqual(d.cfa_get_aggregated_data(), {}) - self.assertEqual(d.cfa_del_aggregated_data(), {}) - - def test_Data_cfa_file_substitutions(self): - """Test `Data` CFA file_substitutions methods""" - d = cf.Data(9) - self.assertFalse(d.cfa_has_file_substitutions()) - self.assertIsNone( - d.cfa_update_file_substitutions({"base": "file:///data/"}) - ) - self.assertTrue(d.cfa_has_file_substitutions()) - self.assertEqual( - d.cfa_file_substitutions(), {"${base}": "file:///data/"} - ) - - d.cfa_update_file_substitutions({"${base2}": "/home/data/"}) - self.assertEqual( - d.cfa_file_substitutions(), - {"${base}": "file:///data/", "${base2}": "/home/data/"}, - ) - - d.cfa_update_file_substitutions({"${base}": "/new/location/"}) - self.assertEqual( - d.cfa_file_substitutions(), - {"${base}": "/new/location/", "${base2}": "/home/data/"}, - ) - self.assertEqual( - d.cfa_del_file_substitution("${base}"), - {"${base}": "/new/location/"}, - ) - self.assertEqual( - d.cfa_clear_file_substitutions(), {"${base2}": "/home/data/"} - ) - self.assertFalse(d.cfa_has_file_substitutions()) - self.assertEqual(d.cfa_file_substitutions(), {}) - self.assertEqual(d.cfa_clear_file_substitutions(), {}) - self.assertEqual(d.cfa_del_file_substitution("base"), {}) - - def test_Data_file_location(self): - """Test `Data` file location methods""" - f = cf.example_field(0) - - self.assertEqual( - f.data.add_file_location("/data/model/"), "/data/model" - ) - - cf.write(f, file_A) - d = cf.read(file_A, dask_chunks=4)[0].data - self.assertGreater(d.npartitions, 1) - - e = d.copy() - location = os.path.dirname(os.path.abspath(file_A)) - - self.assertEqual(d.file_locations(), set((location,))) - self.assertEqual(d.add_file_location("/data/model/"), "/data/model") - self.assertEqual(d.file_locations(), set((location, "/data/model"))) - - # Check that we haven't changed 'e' - self.assertEqual(e.file_locations(), set((location,))) - - self.assertEqual(d.del_file_location("/data/model/"), "/data/model") - self.assertEqual(d.file_locations(), set((location,))) - d.del_file_location("/invalid") - self.assertEqual(d.file_locations(), set((location,))) - - def test_Data_todict(self): - """Test Data.todict.""" - d = cf.Data([1, 2, 3, 4], chunks=2) - key = d.to_dask_array(_apply_mask_hardness=False).name - - x = d.todict() - self.assertIsInstance(x, dict) - self.assertIn((key, 0), x) - self.assertIn((key, 1), x) - - e = d[0] - x = e.todict() - self.assertIn((key, 0), x) - self.assertNotIn((key, 1), x) - - x = e.todict(optimize_graph=False) - self.assertIsInstance(x, dict) - self.assertIn((key, 0), x) - self.assertIn((key, 1), x) - def test_Data_masked_values(self): """Test Data.masked_values.""" array = np.array([[1, 1.1, 2, 1.1, 3]]) diff --git a/cf/test/test_Field.py b/cf/test/test_Field.py index b514fdbed3..6e147010cd 100644 --- a/cf/test/test_Field.py +++ b/cf/test/test_Field.py @@ -930,7 +930,38 @@ def test_Field_cell_area(self): def test_Field_radius(self): f = self.f.copy() - with self.assertRaises(Exception): + # Radius exists in coordiante references + a = cf.Data(6371007.0, "m") + + r = f.radius(default=None) + self.assertEqual(r.Units, cf.Units("m")) + self.assertEqual(r, a) + + cr = f.coordinate_reference( + "standard_name:atmosphere_hybrid_height_coordinate" + ) + cr.datum.set_parameter("earth_radius", cf.Data(5678, "km")) + + with self.assertRaises(ValueError): + f.radius(default=None) + + cr = f.coordinate_reference( + "standard_name:atmosphere_hybrid_height_coordinate" + ) + cr.datum.del_parameter("earth_radius") + + cr = f.coordinate_reference( + "grid_mapping_name:rotated_latitude_longitude" + ) + cr.datum.set_parameter("earth_radius", cf.Data([123, 456], "m")) + + # Radius doesn't exist in in coordiante references + f = self.f.copy() + + for key in f.coordinate_references(todict=True): + f.del_construct(key) + + with self.assertRaises(ValueError): f.radius() for default in ("earth", cf.field._earth_radius): @@ -957,51 +988,9 @@ def test_Field_radius(self): with self.assertRaises(ValueError): f.radius(default=[12, 34]) - with self.assertRaises(ValueError): - f.radius(default=[[12, 34]]) - with self.assertRaises(ValueError): f.radius(default="qwerty") - cr = f.coordinate_reference( - "grid_mapping_name:rotated_latitude_longitude" - ) - cr.datum.set_parameter("earth_radius", a.copy()) - - r = f.radius(default=None) - self.assertEqual(r.Units, cf.Units("m")) - self.assertEqual(r, a) - - cr = f.coordinate_reference( - "standard_name:atmosphere_hybrid_height_coordinate" - ) - cr.datum.set_parameter("earth_radius", a.copy()) - - r = f.radius(default=None) - self.assertEqual(r.Units, cf.Units("m")) - self.assertEqual(r, a) - - cr = f.coordinate_reference( - "standard_name:atmosphere_hybrid_height_coordinate" - ) - cr.datum.set_parameter("earth_radius", cf.Data(5678, "km")) - - with self.assertRaises(ValueError): - f.radius(default=None) - - cr = f.coordinate_reference( - "standard_name:atmosphere_hybrid_height_coordinate" - ) - cr.datum.del_parameter("earth_radius") - - cr = f.coordinate_reference( - "grid_mapping_name:rotated_latitude_longitude" - ) - cr.datum.set_parameter("earth_radius", cf.Data([123, 456], "m")) - - with self.assertRaises(ValueError): - f.radius(default=None) - def test_Field_set_get_del_has_data(self): f = self.f.copy() @@ -1800,15 +1789,17 @@ def test_Field_match_by_construct(self): self.assertTrue(f.match_by_construct("X", "latitude", OR=OR)) self.assertTrue(f.match_by_construct("X", "Y", OR=OR)) self.assertTrue(f.match_by_construct("X", "Y", "latitude", OR=OR)) - self.assertTrue(f.match_by_construct("grid_latitude: max", OR=OR)) + self.assertTrue( + f.match_by_construct("grid_latitude: maximum", OR=OR) + ) self.assertTrue( f.match_by_construct( - "grid_longitude: mean grid_latitude: max", OR=OR + "grid_longitude: mean grid_latitude: maximum", OR=OR ) ) - self.assertTrue(f.match_by_construct("X", "method:max", OR=OR)) + self.assertTrue(f.match_by_construct("X", "method:maximum", OR=OR)) self.assertTrue( - f.match_by_construct("X", "grid_latitude: max", OR=OR) + f.match_by_construct("X", "grid_latitude: maximum", OR=OR) ) self.assertFalse(f.match_by_construct("qwerty")) @@ -1819,12 +1810,12 @@ def test_Field_match_by_construct(self): self.assertTrue(f.match_by_construct("X", "qwerty", OR=True)) self.assertTrue( f.match_by_construct( - "X", "qwerty", "method:max", "over:years", OR=True + "X", "qwerty", "method:maximum", "over:years", OR=True ) ) self.assertTrue( f.match_by_construct( - "X", "qwerty", "grid_latitude: max", "over:years", OR=True + "X", "qwerty", "grid_latitude: maximum", "over:years", OR=True ) ) @@ -2864,27 +2855,27 @@ def test_Field_subspace_ugrid(self): self.assertTrue(g.aux("X").data.range() < 30) self.assertTrue(g.aux("Y").data.range() < 50) - def test_Field_file_location(self): - f = cf.example_field(0) - - self.assertEqual(f.add_file_location("/data/model/"), "/data/model") - - cf.write(f, tmpfile) - f = cf.read(tmpfile)[0] - g = f.copy() - location = os.path.dirname(os.path.abspath(tmpfile)) - - self.assertEqual(f.file_locations(), set((location,))) - self.assertEqual(f.add_file_location("/data/model/"), "/data/model") - self.assertEqual(f.file_locations(), set((location, "/data/model"))) - - # Check that we haven't changed 'g' - self.assertEqual(g.file_locations(), set((location,))) - - self.assertEqual(f.del_file_location("/data/model/"), "/data/model") - self.assertEqual(f.file_locations(), set((location,))) - f.del_file_location("/invalid") - self.assertEqual(f.file_locations(), set((location,))) + # def test_Field_file_location(self): + # f = cf.example_field(0) + # + # self.assertEqual(f.add_file_location("/data/model/"), "/data/model") + # + # cf.write(f, tmpfile) + # f = cf.read(tmpfile)[0] + # g = f.copy() + # location = os.path.dirname(os.path.abspath(tmpfile)) + # + # self.assertEqual(f.file_locations(), set((location,))) + # self.assertEqual(f.add_file_location("/data/model/"), "/data/model") + # self.assertEqual(f.file_locations(), set((location, "/data/model"))) + # + # # Check that we haven't changed 'g' + # self.assertEqual(g.file_locations(), set((location,))) + # + # self.assertEqual(f.del_file_location("/data/model/"), "/data/model") + # self.assertEqual(f.file_locations(), set((location,))) + # f.del_file_location("/invalid") + # self.assertEqual(f.file_locations(), set((location,))) def test_Field_pad_missing(self): """Test Field.pad_missing.""" diff --git a/cf/test/test_functions.py b/cf/test/test_functions.py index ad7a59c4f3..431ab4ad14 100644 --- a/cf/test/test_functions.py +++ b/cf/test/test_functions.py @@ -367,9 +367,6 @@ def test_size(self): x = da.arange(9) self.assertEqual(cf.size(x), x.size) - def test_CFA(self): - self.assertEqual(cf.CFA(), cf.__cfa_version__) - def test_normalize_slice(self): self.assertEqual(cf.normalize_slice(slice(1, 4), 8), slice(1, 4, 1)) self.assertEqual(cf.normalize_slice(slice(None), 8), slice(0, 8, 1)) diff --git a/cf/test/test_pp.py b/cf/test/test_pp.py index ce75fe1b81..08a85a4eef 100644 --- a/cf/test/test_pp.py +++ b/cf/test/test_pp.py @@ -112,7 +112,7 @@ def test_PP_WGDOS_UNPACKING(self): f = cf.read(self.ppfile)[0] - for cfa in (False, True): + for cfa in (None, "auto"): cf.write(f, tmpfile, cfa=cfa) g = cf.read(tmpfile)[0] From 37127663085242b082a22d8769a913933a6c038b Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 7 Nov 2024 22:22:16 +0000 Subject: [PATCH 02/51] dev --- cf/__init__.py | 10 +- cf/cfimplementation.py | 9 +- cf/data/array/__init__.py | 5 +- cf/data/array/abstract/__init__.py | 3 +- cf/data/array/abstract/array.py | 5 +- cf/data/array/abstract/filearray.py | 160 +++--- cf/data/array/aggregatedarray.py | 9 +- cf/data/array/boundsfromnodesarray.py | 7 +- cf/data/array/cellconnectivityarray.py | 7 +- cf/data/array/cfah5netcdfarray.py | 6 +- cf/data/array/cfanetcdf4array.py | 6 +- cf/data/array/gatheredarray.py | 7 +- cf/data/array/h5netcdfarray.py | 8 +- cf/data/array/locks.py | 8 +- cf/data/array/mixin/__init__.py | 9 +- cf/data/array/mixin/arraymixin.py | 63 +-- cf/data/array/mixin/compressedarraymixin.py | 241 ++++---- cf/data/array/mixin/filearraymixin.py | 409 +++++++------- cf/data/array/netcdf4array.py | 6 +- cf/data/array/pointtopologyarray.py | 7 +- cf/data/array/raggedcontiguousarray.py | 7 +- cf/data/array/raggedindexedarray.py | 7 +- cf/data/array/raggedindexedcontiguousarray.py | 5 +- cf/data/array/subsampledarray.py | 7 +- cf/data/array/umarray.py | 7 +- cf/data/fragment/__init__.py | 10 +- cf/data/fragment/fullfragmentarray.py | 182 +++--- cf/data/fragment/h5netcdffragmentarray.py | 194 +++---- cf/data/fragment/mixin/fragmentarraymixin.py | 516 +++++++++--------- cf/data/fragment/netcdf4fragmentarray.py | 216 ++++---- cf/data/fragment/netcdffragmentarray.py | 478 ++++++++-------- cf/data/fragment/umfragmentarray.py | 7 +- cf/functions.py | 1 + cf/read_write/netcdf/netcdfread.py | 3 +- cf/read_write/netcdf/netcdfwrite.py | 11 +- cf/read_write/write.py | 3 +- 36 files changed, 1324 insertions(+), 1315 deletions(-) diff --git a/cf/__init__.py b/cf/__init__.py index dc525e0f87..e22a94b116 100644 --- a/cf/__init__.py +++ b/cf/__init__.py @@ -105,7 +105,7 @@ raise ImportError(_error0 + str(error1)) __cf_version__ = cfdm.core.__cf_version__ -#__cfa_version__ = "0.6.2" +# __cfa_version__ = "0.6.2" from packaging.version import Version import importlib.util @@ -276,8 +276,8 @@ AggregatedArray, BoundsFromNodesArray, CellConnectivityArray, -# CFAH5netcdfArray, -# CFANetCDF4Array, + # CFAH5netcdfArray, + # CFANetCDF4Array, FullArray, GatheredArray, H5netcdfArray, @@ -291,11 +291,11 @@ UMArray, ) -#from .data.fragment import ( +# from .data.fragment import ( # FullFragmentArray, # NetCDFFragmentArray, # UMFragmentArray, -#) +# ) from .aggregate import aggregate, climatology_cells from .query import ( diff --git a/cf/cfimplementation.py b/cf/cfimplementation.py index 6ae37806cb..5a059d9bb3 100644 --- a/cf/cfimplementation.py +++ b/cf/cfimplementation.py @@ -26,12 +26,10 @@ TiePointIndex, ) from .data import Data -from .data.array import ( +from .data.array import ( # CFAH5netcdfArray,; CFANetCDF4Array, AggregatedArray, BoundsFromNodesArray, CellConnectivityArray, -# CFAH5netcdfArray, - # CFANetCDF4Array, GatheredArray, H5netcdfArray, NetCDF4Array, @@ -115,6 +113,7 @@ def set_construct(self, parent, construct, axes=None, copy=True, **kwargs): parent, construct, axes=axes, copy=copy, **kwargs ) + # def initialise_CFANetCDF4Array(self, **kwargs): # """Return a `CFANetCDF4Array` instance. # @@ -157,8 +156,8 @@ def set_construct(self, parent, construct, axes=None, copy=True, **kwargs): CellConnectivity=CellConnectivity, CellMeasure=CellMeasure, CellMethod=CellMethod, -# CFAH5netcdfArray=CFAH5netcdfArray, -# CFANetCDF4Array=CFANetCDF4Array, + # CFAH5netcdfArray=CFAH5netcdfArray, + # CFANetCDF4Array=CFANetCDF4Array, CoordinateReference=CoordinateReference, DimensionCoordinate=DimensionCoordinate, Domain=Domain, diff --git a/cf/data/array/__init__.py b/cf/data/array/__init__.py index 17a75a4312..54edaa65ec 100644 --- a/cf/data/array/__init__.py +++ b/cf/data/array/__init__.py @@ -1,8 +1,9 @@ from .aggregatedarray import AggregatedArray from .boundsfromnodesarray import BoundsFromNodesArray from .cellconnectivityarray import CellConnectivityArray -#from .cfah5netcdfarray import CFAH5netcdfArray -#from .cfanetcdf4array import CFANetCDF4Array + +# from .cfah5netcdfarray import CFAH5netcdfArray +# from .cfanetcdf4array import CFANetCDF4Array from .fullarray import FullArray from .gatheredarray import GatheredArray from .h5netcdfarray import H5netcdfArray diff --git a/cf/data/array/abstract/__init__.py b/cf/data/array/abstract/__init__.py index ed8033a8e0..afe3c59ad7 100644 --- a/cf/data/array/abstract/__init__.py +++ b/cf/data/array/abstract/__init__.py @@ -1,2 +1,3 @@ from .array import Array -from .filearray import FileArray + +# from .filearray import FileArray diff --git a/cf/data/array/abstract/array.py b/cf/data/array/abstract/array.py index c70931d797..36f0a7ad39 100644 --- a/cf/data/array/abstract/array.py +++ b/cf/data/array/abstract/array.py @@ -1,10 +1,11 @@ import cfdm from ....mixin_container import Container -from ..mixin import ArrayMixin +# from ..mixin import ArrayMixin -class Array(ArrayMixin, Container, cfdm.Array): + +class Array(Container, cfdm.Array): """Abstract base class for a container of an underlying array. The form of the array is defined by the initialisation parameters diff --git a/cf/data/array/abstract/filearray.py b/cf/data/array/abstract/filearray.py index 750a7f8f31..0681111390 100644 --- a/cf/data/array/abstract/filearray.py +++ b/cf/data/array/abstract/filearray.py @@ -1,80 +1,80 @@ -from ....functions import _DEPRECATION_ERROR_ATTRIBUTE -from ..mixin import FileArrayMixin -from .array import Array - - -class FileArray(FileArrayMixin, Array): - """Abstract base class for an array stored in a file.""" - - def __getitem__(self, indices): - """Return a subspace of the array. - - x.__getitem__(indices) <==> x[indices] - - Returns a subspace of the array as an independent numpy array. - - """ - raise NotImplementedError( - f"Must implement {self.__class__.__name__}.__getitem__" - ) # pragma: no cover - - def __repr__(self): - """x.__repr__() <==> repr(x)""" - return f"" - - def __str__(self): - """x.__str__() <==> str(x)""" - return f"{self.get_filename()}, {self.get_address()}" - - @property - def dtype(self): - """Data-type of the array.""" - return self._get_component("dtype") - - @property - def filename(self): - """The name of the file containing the array. - - Deprecated at version 3.14.0. Use method `get_filename` instead. - - """ - _DEPRECATION_ERROR_ATTRIBUTE( - self, - "filename", - message="Use method 'get_filename' instead.", - version="3.14.0", - removed_at="5.0.0", - ) # pragma: no cover - - @property - def shape(self): - """Shape of the array.""" - return self._get_component("shape") - - def close(self): - """Close the dataset containing the data.""" - raise NotImplementedError( - f"Must implement {self.__class__.__name__}.close" - ) # pragma: no cover - - def get_address(self): - """The address in the file of the variable. - - .. versionadded:: 3.14.0 - - :Returns: - - `str` or `None` - The address, or `None` if there isn't one. - - """ - raise NotImplementedError( - f"Must implement {self.__class__.__name__}.get_address " - "in subclasses" - ) # pragma: no cover - - def open(self): - """Returns an open dataset containing the data array.""" - raise NotImplementedError( - f"Must implement {self.__class__.__name__}.open" - ) # pragma: no cover +# from ....functions import _DEPRECATION_ERROR_ATTRIBUTE +# from ..mixin import FileArrayMixin +# from .array import Array +# +# +# class FileArray(FileArrayMixin, Array): +# """Abstract base class for an array stored in a file.""" +# +# def __getitem__(self, indices): +# """Return a subspace of the array. +# +# x.__getitem__(indices) <==> x[indices] +# +# Returns a subspace of the array as an independent numpy array. +# +# """ +# raise NotImplementedError( +# f"Must implement {self.__class__.__name__}.__getitem__" +# ) # pragma: no cover +# +# def __repr__(self): +# """x.__repr__() <==> repr(x)""" +# return f"" +# +# def __str__(self): +# """x.__str__() <==> str(x)""" +# return f"{self.get_filename()}, {self.get_address()}" +# +## @property +## def dtype(self): +## """Data-type of the array.""" +## return self._get_component("dtype") +# +# @property +# def filename(self): +# """The name of the file containing the array. +# +# Deprecated at version 3.14.0. Use method `get_filename` instead. +# +# """ +# _DEPRECATION_ERROR_ATTRIBUTE( +# self, +# "filename", +# message="Use method 'get_filename' instead.", +# version="3.14.0", +# removed_at="5.0.0", +# ) # pragma: no cover +# +# @property +# def shape(self): +# """Shape of the array.""" +# return self._get_component("shape") +# +# def close(self): +# """Close the dataset containing the data.""" +# raise NotImplementedError( +# f"Must implement {self.__class__.__name__}.close" +# ) # pragma: no cover +# +# def get_address(self): +# """The address in the file of the variable. +# +# .. versionadded:: 3.14.0 +# +# :Returns: +# +# `str` or `None` +# The address, or `None` if there isn't one. +# +# """ +# raise NotImplementedError( +# f"Must implement {self.__class__.__name__}.get_address " +# "in subclasses" +# ) # pragma: no cover +# +# def open(self): +# """Returns an open dataset containing the data array.""" +# raise NotImplementedError( +# f"Must implement {self.__class__.__name__}.open" +# ) # pragma: no cover diff --git a/cf/data/array/aggregatedarray.py b/cf/data/array/aggregatedarray.py index 3707325bb0..14db9edf9b 100644 --- a/cf/data/array/aggregatedarray.py +++ b/cf/data/array/aggregatedarray.py @@ -1,14 +1,15 @@ import cfdm from ...mixin_container import Container -from .mixin import ArrayMixin, FileArrayMixin + +# from .mixin import ArrayMixin, FileArrayMixin class AggregatedArray( - FileArrayMixin, - ArrayMixin, + # FileArrayMixin, + # ArrayMixin, Container, - cfdm.AggregatedArray + cfdm.AggregatedArray, ): """An array stored in a CF aggregation variable. diff --git a/cf/data/array/boundsfromnodesarray.py b/cf/data/array/boundsfromnodesarray.py index e65177fcd1..b8a32f6c1f 100644 --- a/cf/data/array/boundsfromnodesarray.py +++ b/cf/data/array/boundsfromnodesarray.py @@ -1,12 +1,13 @@ import cfdm from ...mixin_container import Container -from .mixin import ArrayMixin, CompressedArrayMixin + +# from .mixin import ArrayMixin, CompressedArrayMixin class BoundsFromNodesArray( - CompressedArrayMixin, - ArrayMixin, + # CompressedArrayMixin, + # ArrayMixin, Container, cfdm.BoundsFromNodesArray, ): diff --git a/cf/data/array/cellconnectivityarray.py b/cf/data/array/cellconnectivityarray.py index 6f631176d8..5202b3f5c7 100644 --- a/cf/data/array/cellconnectivityarray.py +++ b/cf/data/array/cellconnectivityarray.py @@ -1,12 +1,13 @@ import cfdm from ...mixin_container import Container -from .mixin import ArrayMixin, CompressedArrayMixin + +# from .mixin import ArrayMixin, CompressedArrayMixin class CellConnectivityArray( - CompressedArrayMixin, - ArrayMixin, + # CompressedArrayMixin, + # ArrayMixin, Container, cfdm.CellConnectivityArray, ): diff --git a/cf/data/array/cfah5netcdfarray.py b/cf/data/array/cfah5netcdfarray.py index 30e1cd0500..950b9d7b44 100644 --- a/cf/data/array/cfah5netcdfarray.py +++ b/cf/data/array/cfah5netcdfarray.py @@ -1,8 +1,8 @@ -#from .h5netcdfarray import H5netcdfArray -#from .mixin import CFAMixin +# from .h5netcdfarray import H5netcdfArray +# from .mixin import CFAMixin # # -#class CFAH5netcdfArray(CFAMixin, H5netcdfArray): +# class CFAH5netcdfArray(CFAMixin, H5netcdfArray): # """A CFA-netCDF array accessed with `h5netcdf` # # .. versionadded:: NEXTVERSION diff --git a/cf/data/array/cfanetcdf4array.py b/cf/data/array/cfanetcdf4array.py index 65e53d8abd..c0c991e496 100644 --- a/cf/data/array/cfanetcdf4array.py +++ b/cf/data/array/cfanetcdf4array.py @@ -1,8 +1,8 @@ -#from .mixin import CFAMixin -#from .netcdf4array import NetCDF4Array +# from .mixin import CFAMixin +# from .netcdf4array import NetCDF4Array # # -#class CFANetCDF4Array(CFAMixin, NetCDF4Array): +# class CFANetCDF4Array(CFAMixin, NetCDF4Array): # """A CFA-netCDF array accessed with `netCDF4`. # # .. versionadded:: NEXTVERSION diff --git a/cf/data/array/gatheredarray.py b/cf/data/array/gatheredarray.py index 607b1fb0b7..c110d879d1 100644 --- a/cf/data/array/gatheredarray.py +++ b/cf/data/array/gatheredarray.py @@ -1,12 +1,11 @@ import cfdm from ...mixin_container import Container -from .mixin import ArrayMixin, CompressedArrayMixin +# from .mixin import CompressedArrayMixin -class GatheredArray( - CompressedArrayMixin, ArrayMixin, Container, cfdm.GatheredArray -): + +class GatheredArray(Container, cfdm.GatheredArray): """An underlying gathered array. Compression by gathering combines axes of a multidimensional array diff --git a/cf/data/array/h5netcdfarray.py b/cf/data/array/h5netcdfarray.py index 465a9ebf81..4d083f5186 100644 --- a/cf/data/array/h5netcdfarray.py +++ b/cf/data/array/h5netcdfarray.py @@ -3,18 +3,16 @@ from ...mixin_container import Container # from .locks import netcdf_lock -from .mixin import ( # , IndexMixin +from .mixin import ( # , IndexMixin; ArrayMixin,; FileArrayMixin, ActiveStorageMixin, - ArrayMixin, - FileArrayMixin, ) class H5netcdfArray( ActiveStorageMixin, # IndexMixin, - FileArrayMixin, - ArrayMixin, + # FileArrayMixin, + # ArrayMixin, Container, cfdm.H5netcdfArray, ): diff --git a/cf/data/array/locks.py b/cf/data/array/locks.py index 5a7b2bd333..e29bdf6387 100644 --- a/cf/data/array/locks.py +++ b/cf/data/array/locks.py @@ -1,4 +1,4 @@ -from dask.utils import SerializableLock - -# Global lock for netCDF file access -netcdf_lock = SerializableLock() +# from dask.utils import SerializableLock +# +## Global lock for netCDF file access +# netcdf_lock = SerializableLock() diff --git a/cf/data/array/mixin/__init__.py b/cf/data/array/mixin/__init__.py index f8f0ebca66..0cd2ce966c 100644 --- a/cf/data/array/mixin/__init__.py +++ b/cf/data/array/mixin/__init__.py @@ -1,7 +1,8 @@ from .activestoragemixin import ActiveStorageMixin -from .arraymixin import ArrayMixin -#from .cfamixin import CFAMixin -from .compressedarraymixin import CompressedArrayMixin -from .filearraymixin import FileArrayMixin + +# from .arraymixin import ArrayMixin +# from .cfamixin import CFAMixin +# from .compressedarraymixin import CompressedArrayMixin +# from .filearraymixin import FileArrayMixin # from .indexmixin import IndexMixin diff --git a/cf/data/array/mixin/arraymixin.py b/cf/data/array/mixin/arraymixin.py index 3468253b36..809f5f38a6 100644 --- a/cf/data/array/mixin/arraymixin.py +++ b/cf/data/array/mixin/arraymixin.py @@ -10,35 +10,36 @@ class ArrayMixin: """ - def __array_function__(self, func, types, args, kwargs): - """Implement the `numpy` ``__array_function__`` protocol. - .. versionadded:: 3.14.0 - - """ - return NotImplemented - - @property - def _meta(self): - """Normalise the array to an appropriate Dask meta object. - - The Dask meta can be thought of as a suggestion to Dask. Dask - uses this meta to generate the task graph until it can infer - the actual metadata from the values. It does not force the - output to have the structure or dtype of the specified meta. - - .. versionadded:: NEXTVERSION - - .. seealso:: `dask.utils.meta_from_array` - - """ - return np.array((), dtype=self.dtype) - - @property - def Units(self): - """The `cf.Units` object containing the units of the array. - - .. versionadded:: 3.14.0 - - """ - return Units(self.get_units(None), self.get_calendar(None)) +# def __array_function__(self, func, types, args, kwargs): +# """Implement the `numpy` ``__array_function__`` protocol. +# +# .. versionadded:: 3.14.0 +# +# """ +# return NotImplemented +# +# @property +# def _meta(self): +# """Normalise the array to an appropriate Dask meta object. +# +# The Dask meta can be thought of as a suggestion to Dask. Dask +# uses this meta to generate the task graph until it can infer +# the actual metadata from the values. It does not force the +# output to have the structure or dtype of the specified meta. +# +# .. versionadded:: NEXTVERSION +# +# .. seealso:: `dask.utils.meta_from_array` +# +# """ +# # return np.array((), dtype=self.dtype) +# +# @property +# def Units(self): +# """The `cf.Units` object containing the units of the array. +# +# .. versionadded:: 3.14.0 +# +# """ +# return Units(self.get_units(None), self.get_calendar(None)) diff --git a/cf/data/array/mixin/compressedarraymixin.py b/cf/data/array/mixin/compressedarraymixin.py index 8a1d5dfbe1..0a471e7e57 100644 --- a/cf/data/array/mixin/compressedarraymixin.py +++ b/cf/data/array/mixin/compressedarraymixin.py @@ -8,124 +8,125 @@ class CompressedArrayMixin: """ - def _lock_file_read(self, array): - """Try to return a dask array that does not support concurrent - reads. - .. versionadded:: 3.14.0 - - :Parameters: - - array: array_like - The array to process. - - :Returns" - - `dask.array.Array` or array_like - The new `dask` array, or the orginal array if it - couldn't be ascertained how to form the `dask` array. - - """ - try: - return array.to_dask_array() - except AttributeError: - pass - - try: - chunks = array.chunks - except AttributeError: - chunks = "auto" - - try: - array = array.source() - except (ValueError, AttributeError): - pass - - try: - array.get_filenames() - except AttributeError: - pass - else: - array = da.from_array(array, chunks=chunks, lock=True) - - return array - - def to_dask_array(self, chunks="auto"): - """Convert the data to a `dask` array. - - .. versionadded:: 3.14.0 - - :Parameters: - - chunks: `int`, `tuple`, `dict` or `str`, optional - Specify the chunking of the returned dask array. - - Any value accepted by the *chunks* parameter of the - `dask.array.from_array` function is allowed. - - The chunk sizes implied by *chunks* for a dimension that - has been fragmented are ignored and replaced with values - that are implied by that dimensions fragment sizes. - - :Returns: - - `dask.array.Array` - The `dask` array representation. - - """ - from functools import partial - - import dask.array as da - from cfdm.data.utils import normalize_chunks - from dask import config - from dask.array.core import getter - from dask.base import tokenize - - name = (f"{self.__class__.__name__}-{tokenize(self)}",) - - dtype = self.dtype - - context = partial(config.set, scheduler="synchronous") - - # If possible, convert the compressed data to a dask array - # that doesn't support concurrent reads. This prevents - # "compute called by compute" failures problems at compute - # time. - # - # TODO: This won't be necessary if this is refactored so that - # the compressed data is part of the same dask graph as - # the compressed subarrays. - conformed_data = self.conformed_data() - conformed_data = { - k: self._lock_file_read(v) for k, v in conformed_data.items() - } - subarray_kwargs = {**conformed_data, **self.subarray_parameters()} - - # Get the (cfdm) subarray class - Subarray = self.get_Subarray() - subarray_name = Subarray().__class__.__name__ - - # Set the chunk sizes for the dask array - chunks = normalize_chunks( - self.subarray_shapes(chunks), - shape=self.shape, - dtype=dtype, - ) - - dsk = {} - for u_indices, u_shape, c_indices, chunk_location in zip( - *self.subarrays(chunks) - ): - subarray = Subarray( - indices=c_indices, - shape=u_shape, - context_manager=context, - **subarray_kwargs, - ) - - key = f"{subarray_name}-{tokenize(subarray)}" - dsk[key] = subarray - dsk[name + chunk_location] = (getter, key, Ellipsis, False, False) - - # Return the dask array - return da.Array(dsk, name[0], chunks=chunks, dtype=dtype) +# def _lock_file_read(self, array): +# """Try to return a dask array that does not support concurrent +# reads. +# +# .. versionadded:: 3.14.0 +# +# :Parameters: +# +# array: array_like +# The array to process. +# +# :Returns" +# +# `dask.array.Array` or array_like +# The new `dask` array, or the orginal array if it +# couldn't be ascertained how to form the `dask` array. +# +# """ +# try: +# return array.to_dask_array() +# except AttributeError: +# pass +# +# try: +# chunks = array.chunks +# except AttributeError: +# chunks = "auto" +# +# try: +# array = array.source() +# except (ValueError, AttributeError): +# pass +# +# try: +# array.get_filenames() +# except AttributeError: +# pass +# else: +# array = da.from_array(array, chunks=chunks, lock=True) +# +# return array +# +# def to_dask_array(self, chunks="auto"): +# """Convert the data to a `dask` array. +# +# .. versionadded:: 3.14.0 +# +# :Parameters: +# +# chunks: `int`, `tuple`, `dict` or `str`, optional +# Specify the chunking of the returned dask array. +# +# Any value accepted by the *chunks* parameter of the +# `dask.array.from_array` function is allowed. +# +# The chunk sizes implied by *chunks* for a dimension that +# has been fragmented are ignored and replaced with values +# that are implied by that dimensions fragment sizes. +# +# :Returns: +# +# `dask.array.Array` +# The `dask` array representation. +# +# """ +# from functools import partial +# +# import dask.array as da +# from cfdm.data.utils import normalize_chunks +# from dask import config +# from dask.array.core import getter +# from dask.base import tokenize +# +# name = (f"{self.__class__.__name__}-{tokenize(self)}",) +# +# dtype = self.dtype +# +# context = partial(config.set, scheduler="synchronous") +# +# # If possible, convert the compressed data to a dask array +# # that doesn't support concurrent reads. This prevents +# # "compute called by compute" failures problems at compute +# # time. +# # +# # TODO: This won't be necessary if this is refactored so that +# # the compressed data is part of the same dask graph as +# # the compressed subarrays. +# conformed_data = self.conformed_data() +# conformed_data = { +# k: self._lock_file_read(v) for k, v in conformed_data.items() +# } +# subarray_kwargs = {**conformed_data, **self.subarray_parameters()} +# +# # Get the (cfdm) subarray class +# Subarray = self.get_Subarray() +# subarray_name = Subarray().__class__.__name__ +# +# # Set the chunk sizes for the dask array +# chunks = normalize_chunks( +# self.subarray_shapes(chunks), +# shape=self.shape, +# dtype=dtype, +# ) +# +# dsk = {} +# for u_indices, u_shape, c_indices, chunk_location in zip( +# *self.subarrays(chunks) +# ): +# subarray = Subarray( +# indices=c_indices, +# shape=u_shape, +# context_manager=context, +# **subarray_kwargs, +# ) +# +# key = f"{subarray_name}-{tokenize(subarray)}" +# dsk[key] = subarray +# dsk[name + chunk_location] = (getter, key, Ellipsis, False, False) +# +# # Return the dask array +# return da.Array(dsk, name[0], chunks=chunks, dtype=dtype) diff --git a/cf/data/array/mixin/filearraymixin.py b/cf/data/array/mixin/filearraymixin.py index b5b314b9e2..238d3ceb88 100644 --- a/cf/data/array/mixin/filearraymixin.py +++ b/cf/data/array/mixin/filearraymixin.py @@ -11,208 +11,209 @@ class FileArrayMixin: """ - def __dask_tokenize__(self): - """Return a value fully representative of the object. - .. versionadded:: 3.15.0 - - """ - return ( - self.__class__, - self.shape, - self.get_filenames(), - self.get_addresses(), - ) - - @property - def filename(self): - """The name of the file containing the array. - - Deprecated at version 3.14.0. Use method `get_filename` instead. - - """ - _DEPRECATION_ERROR_ATTRIBUTE( - self, - "filename", - message="Use method 'get_filename' instead.", - version="3.14.0", - removed_at="5.0.0", - ) # pragma: no cover - - def del_file_location(self, location): - """Remove reference to files in the given location. - - .. versionadded:: 3.15.0 - - :Parameters: - - location: `str` - The file location to remove. - - :Returns: - - `{{class}}` - A new {{class}} with reference to files in *location* - removed. - - **Examples** - - >>> a.get_filenames() - ('/data1/file1', '/data2/file2') - >>> a.get_addresses() - ('tas1', 'tas2') - >>> b = a.del_file_location('/data1') - >>> b = get_filenames() - ('/data2/file2',) - >>> b.get_addresses() - ('tas2',) - - >>> a.get_filenames() - ('/data1/file1', '/data2/file1', '/data2/file2') - >>> a.get_addresses() - ('tas1', 'tas1', 'tas2') - >>> b = a.del_file_location('/data2') - >>> b.get_filenames() - ('/data1/file1',) - >>> b.get_addresses() - ('tas1',) - - """ - location = abspath(location).rstrip(sep) - - new_filenames = [] - new_addresses = [] - for filename, address in zip( - self.get_filenames(), self.get_addresses() - ): - if dirname(filename) != location: - new_filenames.append(filename) - new_addresses.append(address) - - if not new_filenames: - raise ValueError( - "Can't delete a file location when it results in there " - "being no files" - ) - - a = self.copy() - a._set_component("filename", tuple(new_filenames), copy=False) - a._set_component("address", tuple(new_addresses), copy=False) - return a - - def file_locations(self): - """The locations of the files, any of which may contain the data. - - .. versionadded:: 3.15.0 - - :Returns: - - `tuple` - The file locations, one for each file, as absolute - paths with no trailing path name component separator. - - **Examples** - - >>> a.get_filenames() - ('/data1/file1',) - >>> a.file_locations() - ('/data1,) - - >>> a.get_filenames() - ('/data1/file1', '/data2/file2') - >>> a.file_locations() - ('/data1', '/data2') - - >>> a.get_filenames() - ('/data1/file1', '/data2/file2', '/data1/file2') - >>> a.file_locations() - ('/data1', '/data2', '/data1') - - """ - return tuple(map(dirname, self.get_filenames())) - - def add_file_location(self, location): - """Add a new file location. - - All existing files are additionally referenced from the given - location. - - .. versionadded:: 3.15.0 - - :Parameters: - - location: `str` - The new location. - - :Returns: - - `{{class}}` - A new {{class}} with all previous files additionally - referenced from *location*. - - **Examples** - - >>> a.get_filenames() - ('/data1/file1',) - >>> a.get_addresses() - ('tas',) - >>> b = a.add_file_location('/home') - >>> b.get_filenames() - ('/data1/file1', '/home/file1') - >>> b.get_addresses() - ('tas', 'tas') - - >>> a.get_filenames() - ('/data1/file1', '/data2/file2',) - >>> a.get_addresses() - ('tas', 'tas') - >>> b = a.add_file_location('/home/') - >>> b = get_filenames() - ('/data1/file1', '/data2/file2', '/home/file1', '/home/file2') - >>> b.get_addresses() - ('tas', 'tas', 'tas', 'tas') - - >>> a.get_filenames() - ('/data1/file1', '/data2/file1',) - >>> a.get_addresses() - ('tas1', 'tas2') - >>> b = a.add_file_location('/home/') - >>> b.get_filenames() - ('/data1/file1', '/data2/file1', '/home/file1') - >>> b.get_addresses() - ('tas1', 'tas2', 'tas1') - - >>> a.get_filenames() - ('/data1/file1', '/data2/file1',) - >>> a.get_addresses() - ('tas1', 'tas2') - >>> b = a.add_file_location('/data1') - >>> b.get_filenames() - ('/data1/file1', '/data2/file1') - >>> b.get_addresses() - ('tas1', 'tas2') - - """ - location = abspath(location).rstrip(sep) - - filenames = self.get_filenames() - addresses = self.get_addresses() - - # Note: It is assumed that each existing file name is either - # an absolute path or a fully qualified URI. - new_filenames = list(filenames) - new_addresses = list(addresses) - for filename, address in zip(filenames, addresses): - new_filename = join(location, basename(filename)) - if new_filename not in new_filenames: - new_filenames.append(new_filename) - new_addresses.append(address) - - a = self.copy() - a._set_component("filename", tuple(new_filenames), copy=False) - a._set_component( - "address", - tuple(new_addresses), - copy=False, - ) - return a +# def __dask_tokenize__(self): +# """Return a value fully representative of the object. +# +# .. versionadded:: 3.15.0 +# +# """ +# return ( +# self.__class__, +# self.shape, +# self.get_filenames(), +# self.get_addresses(), +# ) +# +# @property +# def filename(self): +# """The name of the file containing the array. +# +# Deprecated at version 3.14.0. Use method `get_filename` instead. +# +# """ +# _DEPRECATION_ERROR_ATTRIBUTE( +# self, +# "filename", +# message="Use method 'get_filename' instead.", +# version="3.14.0", +# removed_at="5.0.0", +# ) # pragma: no cover +# +# def del_file_location(self, location): +# """Remove reference to files in the given location. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# location: `str` +# The file location to remove. +# +# :Returns: +# +# `{{class}}` +# A new {{class}} with reference to files in *location* +# removed. +# +# **Examples** +# +# >>> a.get_filenames() +# ('/data1/file1', '/data2/file2') +# >>> a.get_addresses() +# ('tas1', 'tas2') +# >>> b = a.del_file_location('/data1') +# >>> b = get_filenames() +# ('/data2/file2',) +# >>> b.get_addresses() +# ('tas2',) +# +# >>> a.get_filenames() +# ('/data1/file1', '/data2/file1', '/data2/file2') +# >>> a.get_addresses() +# ('tas1', 'tas1', 'tas2') +# >>> b = a.del_file_location('/data2') +# >>> b.get_filenames() +# ('/data1/file1',) +# >>> b.get_addresses() +# ('tas1',) +# +# """ +# location = abspath(location).rstrip(sep) +# +# new_filenames = [] +# new_addresses = [] +# for filename, address in zip( +# self.get_filenames(), self.get_addresses() +# ): +# if dirname(filename) != location: +# new_filenames.append(filename) +# new_addresses.append(address) +# +# if not new_filenames: +# raise ValueError( +# "Can't delete a file location when it results in there " +# "being no files" +# ) +# +# a = self.copy() +# a._set_component("filename", tuple(new_filenames), copy=False) +# a._set_component("address", tuple(new_addresses), copy=False) +# return a +# +# def file_locations(self): +# """The locations of the files, any of which may contain the data. +# +# .. versionadded:: 3.15.0 +# +# :Returns: +# +# `tuple` +# The file locations, one for each file, as absolute +# paths with no trailing path name component separator. +# +# **Examples** +# +# >>> a.get_filenames() +# ('/data1/file1',) +# >>> a.file_locations() +# ('/data1,) +# +# >>> a.get_filenames() +# ('/data1/file1', '/data2/file2') +# >>> a.file_locations() +# ('/data1', '/data2') +# +# >>> a.get_filenames() +# ('/data1/file1', '/data2/file2', '/data1/file2') +# >>> a.file_locations() +# ('/data1', '/data2', '/data1') +# +# """ +# return tuple(map(dirname, self.get_filenames())) +# +# def add_file_location(self, location): +# """Add a new file location. +# +# All existing files are additionally referenced from the given +# location. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# location: `str` +# The new location. +# +# :Returns: +# +# `{{class}}` +# A new {{class}} with all previous files additionally +# referenced from *location*. +# +# **Examples** +# +# >>> a.get_filenames() +# ('/data1/file1',) +# >>> a.get_addresses() +# ('tas',) +# >>> b = a.add_file_location('/home') +# >>> b.get_filenames() +# ('/data1/file1', '/home/file1') +# >>> b.get_addresses() +# ('tas', 'tas') +# +# >>> a.get_filenames() +# ('/data1/file1', '/data2/file2',) +# >>> a.get_addresses() +# ('tas', 'tas') +# >>> b = a.add_file_location('/home/') +# >>> b = get_filenames() +# ('/data1/file1', '/data2/file2', '/home/file1', '/home/file2') +# >>> b.get_addresses() +# ('tas', 'tas', 'tas', 'tas') +# +# >>> a.get_filenames() +# ('/data1/file1', '/data2/file1',) +# >>> a.get_addresses() +# ('tas1', 'tas2') +# >>> b = a.add_file_location('/home/') +# >>> b.get_filenames() +# ('/data1/file1', '/data2/file1', '/home/file1') +# >>> b.get_addresses() +# ('tas1', 'tas2', 'tas1') +# +# >>> a.get_filenames() +# ('/data1/file1', '/data2/file1',) +# >>> a.get_addresses() +# ('tas1', 'tas2') +# >>> b = a.add_file_location('/data1') +# >>> b.get_filenames() +# ('/data1/file1', '/data2/file1') +# >>> b.get_addresses() +# ('tas1', 'tas2') +# +# """ +# location = abspath(location).rstrip(sep) +# +# filenames = self.get_filenames() +# addresses = self.get_addresses() +# +# # Note: It is assumed that each existing file name is either +# # an absolute path or a fully qualified URI. +# new_filenames = list(filenames) +# new_addresses = list(addresses) +# for filename, address in zip(filenames, addresses): +# new_filename = join(location, basename(filename)) +# if new_filename not in new_filenames: +# new_filenames.append(new_filename) +# new_addresses.append(address) +# +# a = self.copy() +# a._set_component("filename", tuple(new_filenames), copy=False) +# a._set_component( +# "address", +# tuple(new_addresses), +# copy=False, +# ) +# return a diff --git a/cf/data/array/netcdf4array.py b/cf/data/array/netcdf4array.py index f8eb5e8ad5..4046305671 100644 --- a/cf/data/array/netcdf4array.py +++ b/cf/data/array/netcdf4array.py @@ -1,13 +1,13 @@ import cfdm from ...mixin_container import Container -from .mixin import ActiveStorageMixin, ArrayMixin, FileArrayMixin +from .mixin import ActiveStorageMixin # , ArrayMixin #,FileArrayMixin class NetCDF4Array( ActiveStorageMixin, - FileArrayMixin, - ArrayMixin, + # FileArrayMixin, + # ArrayMixin, Container, cfdm.NetCDF4Array, ): diff --git a/cf/data/array/pointtopologyarray.py b/cf/data/array/pointtopologyarray.py index d5c00a2ae4..f63d407ef2 100644 --- a/cf/data/array/pointtopologyarray.py +++ b/cf/data/array/pointtopologyarray.py @@ -1,12 +1,13 @@ import cfdm from ...mixin_container import Container -from .mixin import ArrayMixin, CompressedArrayMixin + +# from .mixin import ArrayMixin, CompressedArrayMixin class PointTopologyArray( - CompressedArrayMixin, - ArrayMixin, + # CompressedArrayMixin, + # ArrayMixin, Container, cfdm.PointTopologyArray, ): diff --git a/cf/data/array/raggedcontiguousarray.py b/cf/data/array/raggedcontiguousarray.py index 145e8c22db..365d263423 100644 --- a/cf/data/array/raggedcontiguousarray.py +++ b/cf/data/array/raggedcontiguousarray.py @@ -1,12 +1,11 @@ import cfdm from ...mixin_container import Container -from .mixin import ArrayMixin, CompressedArrayMixin +# from .mixin import ArrayMixin, CompressedArrayMixin -class RaggedContiguousArray( - CompressedArrayMixin, ArrayMixin, Container, cfdm.RaggedContiguousArray -): + +class RaggedContiguousArray(Container, cfdm.RaggedContiguousArray): """An underlying contiguous ragged array. A collection of features stored using a contiguous ragged array diff --git a/cf/data/array/raggedindexedarray.py b/cf/data/array/raggedindexedarray.py index 974327ed26..73f39e7007 100644 --- a/cf/data/array/raggedindexedarray.py +++ b/cf/data/array/raggedindexedarray.py @@ -1,12 +1,11 @@ import cfdm from ...mixin_container import Container -from .mixin import ArrayMixin, CompressedArrayMixin +# from .mixin import ArrayMixin, CompressedArrayMixin -class RaggedIndexedArray( - CompressedArrayMixin, ArrayMixin, Container, cfdm.RaggedIndexedArray -): + +class RaggedIndexedArray(Container, cfdm.RaggedIndexedArray): """An underlying indexed ragged array. A collection of features stored using an indexed ragged array diff --git a/cf/data/array/raggedindexedcontiguousarray.py b/cf/data/array/raggedindexedcontiguousarray.py index 13f65737be..3bd28f4e1f 100644 --- a/cf/data/array/raggedindexedcontiguousarray.py +++ b/cf/data/array/raggedindexedcontiguousarray.py @@ -1,12 +1,11 @@ import cfdm from ...mixin_container import Container -from .mixin import ArrayMixin, CompressedArrayMixin + +# from .mixin import ArrayMixin, CompressedArrayMixin class RaggedIndexedContiguousArray( - CompressedArrayMixin, - ArrayMixin, Container, cfdm.RaggedIndexedContiguousArray, ): diff --git a/cf/data/array/subsampledarray.py b/cf/data/array/subsampledarray.py index 7d8e24f5c5..71fae12925 100644 --- a/cf/data/array/subsampledarray.py +++ b/cf/data/array/subsampledarray.py @@ -1,12 +1,11 @@ import cfdm from ...mixin_container import Container -from .mixin import ArrayMixin, CompressedArrayMixin +# from .mixin import ArrayMixin, CompressedArrayMixin -class SubsampledArray( - CompressedArrayMixin, ArrayMixin, Container, cfdm.SubsampledArray -): + +class SubsampledArray(Container, cfdm.SubsampledArray): """An underlying subsampled array. For some structured coordinate data (e.g. coordinates describing diff --git a/cf/data/array/umarray.py b/cf/data/array/umarray.py index e0d113bddb..416bd96820 100644 --- a/cf/data/array/umarray.py +++ b/cf/data/array/umarray.py @@ -4,13 +4,14 @@ from ...functions import _DEPRECATION_ERROR_ATTRIBUTE, load_stash2standard_name from ...umread_lib.umfile import File, Rec from .abstract import Array -from .mixin import FileArrayMixin + +# from .mixin import FileArrayMixin class UMArray( - FileArrayMixin, + # FileArrayMixin, cfdm.data.mixin.IndexMixin, - cfdm.data.mixin.FileArrayMixin, + # cfdm.data.mixin.FileArrayMixin, cfdm.data.abstract.FileArray, Array, ): diff --git a/cf/data/fragment/__init__.py b/cf/data/fragment/__init__.py index c97dacf225..34903d248c 100644 --- a/cf/data/fragment/__init__.py +++ b/cf/data/fragment/__init__.py @@ -1,5 +1,5 @@ -#from .fullfragmentarray import FullFragmentArray -#from .h5netcdffragmentarray import H5netcdfFragmentArray -#from .netcdffragmentarray import NetCDFFragmentArray -#from .netcdf4fragmentarray import NetCDF4FragmentArray -#from .umfragmentarray import UMFragmentArray +# from .fullfragmentarray import FullFragmentArray +# from .h5netcdffragmentarray import H5netcdfFragmentArray +# from .netcdffragmentarray import NetCDFFragmentArray +# from .netcdf4fragmentarray import NetCDF4FragmentArray +from .umfragmentarray import UMFragmentArray diff --git a/cf/data/fragment/fullfragmentarray.py b/cf/data/fragment/fullfragmentarray.py index e2855b3003..56d6aead43 100644 --- a/cf/data/fragment/fullfragmentarray.py +++ b/cf/data/fragment/fullfragmentarray.py @@ -1,91 +1,91 @@ -from ..array.fullarray import FullArray -from .mixin import FragmentArrayMixin - - -class FullFragmentArray(FragmentArrayMixin, FullArray): - """A CFA fragment array that is filled with a value. - - .. versionadded:: 3.15.0 - - """ - - def __init__( - self, - fill_value=None, - dtype=None, - shape=None, - aggregated_units=False, - aggregated_calendar=False, - attributes=None, - source=None, - copy=True, - ): - """**Initialisation** - - :Parameters: - - fill_value: scalar - The fill value. - - dtype: `numpy.dtype` - The data type of the aggregated array. May be `None` - if the numpy data-type is not known (which can be the - case for netCDF string types, for example). This may - differ from the data type of the netCDF fragment - variable. - - shape: `tuple` - The shape of the fragment within the aggregated - array. This may differ from the shape of the netCDF - fragment variable in that the latter may have fewer - size 1 dimensions. - - {{init attributes: `dict` or `None`, optional}} - - .. versionadded:: NEXTVERSION - - {{aggregated_units: `str` or `None`, optional}} - - {{aggregated_calendar: `str` or `None`, optional}} - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - units: `str` or `None`, optional - Deprecated at version NEXTVERSION. Use the - *attributes* parameter instead. - - calendar: `str` or `None`, optional - Deprecated at version NEXTVERSION. Use the - *attributes* parameter instead. - - """ - super().__init__( - fill_value=fill_value, - dtype=dtype, - shape=shape, - attributes=attributes, - source=source, - copy=False, - ) - - if source is not None: - try: - aggregated_units = source._get_component( - "aggregated_units", False - ) - except AttributeError: - aggregated_units = False - - try: - aggregated_calendar = source._get_component( - "aggregated_calendar", False - ) - except AttributeError: - aggregated_calendar = False - - self._set_component("aggregated_units", aggregated_units, copy=False) - self._set_component( - "aggregated_calendar", aggregated_calendar, copy=False - ) +# from ..array.fullarray import FullArray +# from .mixin import FragmentArrayMixin +# +# +# class FullFragmentArray(FragmentArrayMixin, FullArray): +# """A CFA fragment array that is filled with a value. +# +# .. versionadded:: 3.15.0 +# +# """ +# +# def __init__( +# self, +# fill_value=None, +# dtype=None, +# shape=None, +# aggregated_units=False, +# aggregated_calendar=False, +# attributes=None, +# source=None, +# copy=True, +# ): +# """**Initialisation** +# +# :Parameters: +# +# fill_value: scalar +# The fill value. +# +# dtype: `numpy.dtype` +# The data type of the aggregated array. May be `None` +# if the numpy data-type is not known (which can be the +# case for netCDF string types, for example). This may +# differ from the data type of the netCDF fragment +# variable. +# +# shape: `tuple` +# The shape of the fragment within the aggregated +# array. This may differ from the shape of the netCDF +# fragment variable in that the latter may have fewer +# size 1 dimensions. +# +# {{init attributes: `dict` or `None`, optional}} +# +# .. versionadded:: NEXTVERSION +# +# {{aggregated_units: `str` or `None`, optional}} +# +# {{aggregated_calendar: `str` or `None`, optional}} +# +# {{init source: optional}} +# +# {{init copy: `bool`, optional}} +# +# units: `str` or `None`, optional +# Deprecated at version NEXTVERSION. Use the +# *attributes* parameter instead. +# +# calendar: `str` or `None`, optional +# Deprecated at version NEXTVERSION. Use the +# *attributes* parameter instead. +# +# """ +# super().__init__( +# fill_value=fill_value, +# dtype=dtype, +# shape=shape, +# attributes=attributes, +# source=source, +# copy=False, +# ) +# +# if source is not None: +# try: +# aggregated_units = source._get_component( +# "aggregated_units", False +# ) +# except AttributeError: +# aggregated_units = False +# +# try: +# aggregated_calendar = source._get_component( +# "aggregated_calendar", False +# ) +# except AttributeError: +# aggregated_calendar = False +# +# self._set_component("aggregated_units", aggregated_units, copy=False) +# self._set_component( +# "aggregated_calendar", aggregated_calendar, copy=False +# ) diff --git a/cf/data/fragment/h5netcdffragmentarray.py b/cf/data/fragment/h5netcdffragmentarray.py index 0b70976c7f..0f4caf4210 100644 --- a/cf/data/fragment/h5netcdffragmentarray.py +++ b/cf/data/fragment/h5netcdffragmentarray.py @@ -1,97 +1,97 @@ -from ..array.h5netcdfarray import H5netcdfArray -from .mixin import FragmentArrayMixin - - -class H5netcdfFragmentArray(FragmentArrayMixin, H5netcdfArray): - """A netCDF fragment array accessed with `h5netcdf`. - - .. versionadded:: NEXTVERSION - - """ - - def __init__( - self, - filename=None, - address=None, - dtype=None, - shape=None, - aggregated_units=False, - aggregated_calendar=False, - attributes=None, - storage_options=None, - source=None, - copy=True, - ): - """**Initialisation** - - :Parameters: - - filename: (sequence of `str`), optional - The names of the netCDF fragment files containing the - array. - - address: (sequence of `str`), optional - The name of the netCDF variable containing the - fragment array. Required unless *varid* is set. - - dtype: `numpy.dtype`, optional - The data type of the aggregated array. May be `None` - if the numpy data-type is not known (which can be the - case for netCDF string types, for example). This may - differ from the data type of the netCDF fragment - variable. - - shape: `tuple`, optional - The shape of the fragment within the aggregated - array. This may differ from the shape of the netCDF - fragment variable in that the latter may have fewer - size 1 dimensions. - - {{init attributes: `dict` or `None`, optional}} - - If *attributes* is `None`, the default, then the - attributes will be set from the netCDF variable during - the first `__getitem__` call. - - {{aggregated_units: `str` or `None`, optional}} - - {{aggregated_calendar: `str` or `None`, optional}} - - {{init storage_options: `dict` or `None`, optional}} - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - """ - super().__init__( - filename=filename, - address=address, - dtype=dtype, - shape=shape, - mask=True, - attributes=attributes, - storage_options=storage_options, - source=source, - copy=copy, - ) - - if source is not None: - try: - aggregated_units = source._get_component( - "aggregated_units", False - ) - except AttributeError: - aggregated_units = False - - try: - aggregated_calendar = source._get_component( - "aggregated_calendar", False - ) - except AttributeError: - aggregated_calendar = False - - self._set_component("aggregated_units", aggregated_units, copy=False) - self._set_component( - "aggregated_calendar", aggregated_calendar, copy=False - ) +# from ..array.h5netcdfarray import H5netcdfArray +# from .mixin import FragmentArrayMixin +# +# +# class H5netcdfFragmentArray(FragmentArrayMixin, H5netcdfArray): +# """A netCDF fragment array accessed with `h5netcdf`. +# +# .. versionadded:: NEXTVERSION +# +# """ +# +# def __init__( +# self, +# filename=None, +# address=None, +# dtype=None, +# shape=None, +# aggregated_units=False, +# aggregated_calendar=False, +# attributes=None, +# storage_options=None, +# source=None, +# copy=True, +# ): +# """**Initialisation** +# +# :Parameters: +# +# filename: (sequence of `str`), optional +# The names of the netCDF fragment files containing the +# array. +# +# address: (sequence of `str`), optional +# The name of the netCDF variable containing the +# fragment array. Required unless *varid* is set. +# +# dtype: `numpy.dtype`, optional +# The data type of the aggregated array. May be `None` +# if the numpy data-type is not known (which can be the +# case for netCDF string types, for example). This may +# differ from the data type of the netCDF fragment +# variable. +# +# shape: `tuple`, optional +# The shape of the fragment within the aggregated +# array. This may differ from the shape of the netCDF +# fragment variable in that the latter may have fewer +# size 1 dimensions. +# +# {{init attributes: `dict` or `None`, optional}} +# +# If *attributes* is `None`, the default, then the +# attributes will be set from the netCDF variable during +# the first `__getitem__` call. +# +# {{aggregated_units: `str` or `None`, optional}} +# +# {{aggregated_calendar: `str` or `None`, optional}} +# +# {{init storage_options: `dict` or `None`, optional}} +# +# {{init source: optional}} +# +# {{init copy: `bool`, optional}} +# +# """ +# super().__init__( +# filename=filename, +# address=address, +# dtype=dtype, +# shape=shape, +# mask=True, +# attributes=attributes, +# storage_options=storage_options, +# source=source, +# copy=copy, +# ) +# +# if source is not None: +# try: +# aggregated_units = source._get_component( +# "aggregated_units", False +# ) +# except AttributeError: +# aggregated_units = False +# +# try: +# aggregated_calendar = source._get_component( +# "aggregated_calendar", False +# ) +# except AttributeError: +# aggregated_calendar = False +# +# self._set_component("aggregated_units", aggregated_units, copy=False) +# self._set_component( +# "aggregated_calendar", aggregated_calendar, copy=False +# ) diff --git a/cf/data/fragment/mixin/fragmentarraymixin.py b/cf/data/fragment/mixin/fragmentarraymixin.py index f02c779002..7567d7f667 100644 --- a/cf/data/fragment/mixin/fragmentarraymixin.py +++ b/cf/data/fragment/mixin/fragmentarraymixin.py @@ -1,258 +1,258 @@ -from math import prod - -import numpy as np - -from ....units import Units - - -class FragmentArrayMixin: - """Mixin class for a CFA fragment array. - - .. versionadded:: 3.15.0 - - """ - - def _get_array(self, index=None): - """Returns a subspace of the dataset variable. - - .. versionadded:: NEXTVERSION - - .. seealso:: `__array__`, `index` - - :Parameters: - - {{index: `tuple` or `None`, optional}} - - It is important that there is a distinct value for each - fragment dimension, which is guaranteed when the - default of the `index` attribute is being used. - - :Returns: - - `numpy.ndarray` - The subspace. - - """ - if index is None: - index = self.index() - - try: - array = super()._get_array(index) - except ValueError: - # A ValueError is expected to be raised when the fragment - # variable has fewer than 'self.ndim' dimensions (we know - # that this is the case because 'index' has 'self.ndim' - # elements). - axis = self._size_1_axis(index) - if axis is not None: - # There is a unique size 1 index that must correspond - # to the missing dimension => Remove it from the - # indices, get the fragment array with the new - # indices; and then insert the missing size one - # dimension. - index = list(index) - index.pop(axis) - array = super()._get_array(tuple(index)) - array = np.expand_dims(array, axis) - else: - # There are multiple size 1 indices so we don't know - # how many missing dimensions the fragment has, nor - # their positions => Get the full fragment array and - # then reshape it to the shape of the dask compute - # chunk; and then apply the index. - array = super()._get_array(Ellipsis) - if array.size > prod(self.original_shape): - raise ValueError( - f"Can't get CFA fragment data from ({self}) when " - "the fragment has two or more missing size 1 " - "dimensions, whilst also spanning two or more " - "Dask compute chunks." - "\n\n" - "Consider re-creating the data with exactly one " - "Dask compute chunk per fragment (e.g. by setting " - "'chunks=None' as a keyword to cf.read)." - ) - - array = array.reshape(self.original_shape) - array = array[index] - - array = self._conform_to_aggregated_units(array) - return array - - def _conform_to_aggregated_units(self, array): - """Conform the array to have the aggregated units. - - .. versionadded:: 3.15.0 - - :Parameters: - - array: `numpy.ndarray` or `dict` - The array to be conformed. If *array* is a `dict` with - `numpy` array values then selected values are - conformed. - - :Returns: - - `numpy.ndarray` or `dict` - The conformed array. The returned array may or may not - be the input array updated in-place, depending on its - data type and the nature of its units and the - aggregated units. - - If *array* is a `dict` then a dictionary of conformed - arrays is returned. - - """ - units = self.Units - if units: - aggregated_units = self.aggregated_Units - if not units.equivalent(aggregated_units): - raise ValueError( - f"Can't convert fragment data with units {units!r} to " - f"have aggregated units {aggregated_units!r}" - ) - - if units != aggregated_units: - if isinstance(array, dict): - # 'array' is a dictionary. - raise ValueError( - "TODOACTIVE. Placeholder notification that " - "we can't yet deal with active " - "storage reductions on CFA fragments." - ) - else: - # 'array' is a numpy array - array = Units.conform( - array, units, aggregated_units, inplace=True - ) - - return array - - def _size_1_axis(self, indices): - """Find the position of a unique size 1 index. - - .. versionadded:: 3.15.0 - - .. seealso:: `_parse_indices`, `__getitem__` - - :Paramealso:: `_parse_indices`, `__getitem__` - - :Parameters: - - indices: sequence of index - The array indices to be parsed, as returned by - `_parse_indices`. - - :Returns: - - `int` or `None` - The position of the unique size 1 index, or `None` if - there are zero or at least two of them. - - **Examples** - - >>> a._size_1_axis(([2, 4, 5], slice(0, 1), slice(0, 73))) - 1 - >>> a._size_1_axis(([2, 4, 5], slice(3, 4), slice(0, 73))) - 1 - >>> a._size_1_axis(([2, 4, 5], [0], slice(0, 73))) - 1 - >>> a._size_1_axis(([2, 4, 5], slice(0, 144), slice(0, 73))) - None - >>> a._size_1_axis(([2, 4, 5], slice(3, 7), [0, 1])) - None - >>> a._size_1_axis(([2, 4, 5], slice(0, 1), [0])) - None - - """ - original_shape = self.original_shape - if original_shape.count(1): - return original_shape.index(1) - - return - - @property - def aggregated_Units(self): - """The units of the aggregated data. - - .. versionadded:: 3.15.0 - - :Returns: - - `Units` - The units of the aggregated data. - - """ - return Units( - self.get_aggregated_units(), self.get_aggregated_calendar(None) - ) - - def get_aggregated_calendar(self, default=ValueError()): - """The calendar of the aggregated array. - - If the calendar is `None` then the CF default calendar is - assumed, if applicable. - - .. versionadded:: 3.15.0 - - :Parameters: - - default: optional - Return the value of the *default* parameter if the - aggregated calendar has not been set. If set to an - `Exception` instance then it will be raised instead. - - :Returns: - - `str` or `None` - The calendar value. - - """ - calendar = self._get_component("aggregated_calendar", False) - if calendar is False: - if default is None: - return - - return self._default( - default, - f"{self.__class__.__name__} 'aggregated_calendar' has not " - "been set", - ) - - return calendar - - def get_aggregated_units(self, default=ValueError()): - """The units of the aggregated array. - - If the units are `None` then the aggregated array has no - defined units. - - .. versionadded:: 3.15.0 - - .. seealso:: `get_aggregated_calendar` - - :Parameters: - - default: optional - Return the value of the *default* parameter if the - aggregated units have not been set. If set to an - `Exception` instance then it will be raised instead. - - :Returns: - - `str` or `None` - The units value. - - """ - units = self._get_component("aggregated_units", False) - if units is False: - if default is None: - return - - return self._default( - default, - f"{self.__class__.__name__} 'aggregated_units' have not " - "been set", - ) - - return units +# rom math import prod +# +# mport numpy as np +# +# rom ....units import Units +# +# +# lass FragmentArrayMixin: +# """Mixin class for a CFA fragment array. +# +# .. versionadded:: 3.15.0 +# +# """ +# +# def _get_array(self, index=None): +# """Returns a subspace of the dataset variable. +# +# .. versionadded:: NEXTVERSION +# +# .. seealso:: `__array__`, `index` +# +# :Parameters: +# +# {{index: `tuple` or `None`, optional}} +# +# It is important that there is a distinct value for each +# fragment dimension, which is guaranteed when the +# default of the `index` attribute is being used. +# +# :Returns: +# +# `numpy.ndarray` +# The subspace. +# +# """ +# if index is None: +# index = self.index() +# +# try: +# array = super()._get_array(index) +# except ValueError: +# # A ValueError is expected to be raised when the fragment +# # variable has fewer than 'self.ndim' dimensions (we know +# # that this is the case because 'index' has 'self.ndim' +# # elements). +# axis = self._size_1_axis(index) +# if axis is not None: +# # There is a unique size 1 index that must correspond +# # to the missing dimension => Remove it from the +# # indices, get the fragment array with the new +# # indices; and then insert the missing size one +# # dimension. +# index = list(index) +# index.pop(axis) +# array = super()._get_array(tuple(index)) +# array = np.expand_dims(array, axis) +# else: +# # There are multiple size 1 indices so we don't know +# # how many missing dimensions the fragment has, nor +# # their positions => Get the full fragment array and +# # then reshape it to the shape of the dask compute +# # chunk; and then apply the index. +# array = super()._get_array(Ellipsis) +# if array.size > prod(self.original_shape): +# raise ValueError( +# f"Can't get CFA fragment data from ({self}) when " +# "the fragment has two or more missing size 1 " +# "dimensions, whilst also spanning two or more " +# "Dask compute chunks." +# "\n\n" +# "Consider re-creating the data with exactly one " +# "Dask compute chunk per fragment (e.g. by setting " +# "'chunks=None' as a keyword to cf.read)." +# ) +# +# array = array.reshape(self.original_shape) +# array = array[index] +# +# array = self._conform_to_aggregated_units(array) +# return array +# +# def _conform_to_aggregated_units(self, array): +# """Conform the array to have the aggregated units. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# array: `numpy.ndarray` or `dict` +# The array to be conformed. If *array* is a `dict` with +# `numpy` array values then selected values are +# conformed. +# +# :Returns: +# +# `numpy.ndarray` or `dict` +# The conformed array. The returned array may or may not +# be the input array updated in-place, depending on its +# data type and the nature of its units and the +# aggregated units. +# +# If *array* is a `dict` then a dictionary of conformed +# arrays is returned. +# +# """ +# units = self.Units +# if units: +# aggregated_units = self.aggregated_Units +# if not units.equivalent(aggregated_units): +# raise ValueError( +# f"Can't convert fragment data with units {units!r} to " +# f"have aggregated units {aggregated_units!r}" +# ) +# +# if units != aggregated_units: +# if isinstance(array, dict): +# # 'array' is a dictionary. +# raise ValueError( +# "TODOACTIVE. Placeholder notification that " +# "we can't yet deal with active " +# "storage reductions on CFA fragments." +# ) +# else: +# # 'array' is a numpy array +# array = Units.conform( +# array, units, aggregated_units, inplace=True +# ) +# +# return array +# +# def _size_1_axis(self, indices): +# """Find the position of a unique size 1 index. +# +# .. versionadded:: 3.15.0 +# +# .. seealso:: `_parse_indices`, `__getitem__` +# +# :Paramealso:: `_parse_indices`, `__getitem__` +# +# :Parameters: +# +# indices: sequence of index +# The array indices to be parsed, as returned by +# `_parse_indices`. +# +# :Returns: +# +# `int` or `None` +# The position of the unique size 1 index, or `None` if +# there are zero or at least two of them. +# +# **Examples** +# +# >>> a._size_1_axis(([2, 4, 5], slice(0, 1), slice(0, 73))) +# 1 +# >>> a._size_1_axis(([2, 4, 5], slice(3, 4), slice(0, 73))) +# 1 +# >>> a._size_1_axis(([2, 4, 5], [0], slice(0, 73))) +# 1 +# >>> a._size_1_axis(([2, 4, 5], slice(0, 144), slice(0, 73))) +# None +# >>> a._size_1_axis(([2, 4, 5], slice(3, 7), [0, 1])) +# None +# >>> a._size_1_axis(([2, 4, 5], slice(0, 1), [0])) +# None +# +# """ +# original_shape = self.original_shape +# if original_shape.count(1): +# return original_shape.index(1) +# +# return +# +# @property +# def aggregated_Units(self): +# """The units of the aggregated data. +# +# .. versionadded:: 3.15.0 +# +# :Returns: +# +# `Units` +# The units of the aggregated data. +# +# """ +# return Units( +# self.get_aggregated_units(), self.get_aggregated_calendar(None) +# ) +# +# def get_aggregated_calendar(self, default=ValueError()): +# """The calendar of the aggregated array. +# +# If the calendar is `None` then the CF default calendar is +# assumed, if applicable. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# default: optional +# Return the value of the *default* parameter if the +# aggregated calendar has not been set. If set to an +# `Exception` instance then it will be raised instead. +# +# :Returns: +# +# `str` or `None` +# The calendar value. +# +# """ +# calendar = self._get_component("aggregated_calendar", False) +# if calendar is False: +# if default is None: +# return +# +# return self._default( +# default, +# f"{self.__class__.__name__} 'aggregated_calendar' has not " +# "been set", +# ) +# +# return calendar +# +# def get_aggregated_units(self, default=ValueError()): +# """The units of the aggregated array. +# +# If the units are `None` then the aggregated array has no +# defined units. +# +# .. versionadded:: 3.15.0 +# +# .. seealso:: `get_aggregated_calendar` +# +# :Parameters: +# +# default: optional +# Return the value of the *default* parameter if the +# aggregated units have not been set. If set to an +# `Exception` instance then it will be raised instead. +# +# :Returns: +# +# `str` or `None` +# The units value. +# +# """ +# units = self._get_component("aggregated_units", False) +# if units is False: +# if default is None: +# return +# +# return self._default( +# default, +# f"{self.__class__.__name__} 'aggregated_units' have not " +# "been set", +# ) +# +# return units diff --git a/cf/data/fragment/netcdf4fragmentarray.py b/cf/data/fragment/netcdf4fragmentarray.py index 91f87dc2e8..ab1b761c47 100644 --- a/cf/data/fragment/netcdf4fragmentarray.py +++ b/cf/data/fragment/netcdf4fragmentarray.py @@ -1,108 +1,108 @@ -from ..array.netcdf4array import NetCDF4Array -from .mixin import FragmentArrayMixin - - -class NetCDF4FragmentArray(FragmentArrayMixin, NetCDF4Array): - """A netCDF fragment array accessed with `netCDF4`. - - .. versionadded:: NEXTVERSION - - """ - - def __init__( - self, - filename=None, - address=None, - dtype=None, - shape=None, - aggregated_units=False, - aggregated_calendar=False, - attributes=None, - storage_options=None, - source=None, - copy=True, - ): - """**Initialisation** - - :Parameters: - - filename: (sequence of `str`), optional - The names of the netCDF fragment files containing the - array. - - address: (sequence of `str`), optional - The name of the netCDF variable containing the - fragment array. Required unless *varid* is set. - - dtype: `numpy.dtype`, optional - The data type of the aggregated array. May be `None` - if the numpy data-type is not known (which can be the - case for netCDF string types, for example). This may - differ from the data type of the netCDF fragment - variable. - - shape: `tuple`, optional - The shape of the fragment within the aggregated - array. This may differ from the shape of the netCDF - fragment variable in that the latter may have fewer - size 1 dimensions. - - units: `str` or `None`, optional - The units of the fragment data. Set to `None` to - indicate that there are no units. If unset then the - units will be set during the first `__getitem__` call. - - calendar: `str` or `None`, optional - The calendar of the fragment data. Set to `None` to - indicate the CF default calendar, if applicable. If - unset then the calendar will be set during the first - `__getitem__` call. - - {{init attributes: `dict` or `None`, optional}} - - If *attributes* is `None`, the default, then the - attributes will be set from the netCDF variable during - the first `__getitem__` call. - - {{aggregated_units: `str` or `None`, optional}} - - {{aggregated_calendar: `str` or `None`, optional}} - - {{init storage_options: `dict` or `None`, optional}} - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - """ - super().__init__( - filename=filename, - address=address, - dtype=dtype, - shape=shape, - mask=True, - attributes=attributes, - storage_options=storage_options, - source=source, - copy=copy, - ) - - if source is not None: - try: - aggregated_units = source._get_component( - "aggregated_units", False - ) - except AttributeError: - aggregated_units = False - - try: - aggregated_calendar = source._get_component( - "aggregated_calendar", False - ) - except AttributeError: - aggregated_calendar = False - - self._set_component("aggregated_units", aggregated_units, copy=False) - self._set_component( - "aggregated_calendar", aggregated_calendar, copy=False - ) +# from ..array.netcdf4array import NetCDF4Array +# from .mixin import FragmentArrayMixin +# +# +# class NetCDF4FragmentArray(FragmentArrayMixin, NetCDF4Array): +# """A netCDF fragment array accessed with `netCDF4`. +# +# .. versionadded:: NEXTVERSION +# +# """ +# +# def __init__( +# self, +# filename=None, +# address=None, +# dtype=None, +# shape=None, +# aggregated_units=False, +# aggregated_calendar=False, +# attributes=None, +# storage_options=None, +# source=None, +# copy=True, +# ): +# """**Initialisation** +# +# :Parameters: +# +# filename: (sequence of `str`), optional +# The names of the netCDF fragment files containing the +# array. +# +# address: (sequence of `str`), optional +# The name of the netCDF variable containing the +# fragment array. Required unless *varid* is set. +# +# dtype: `numpy.dtype`, optional +# The data type of the aggregated array. May be `None` +# if the numpy data-type is not known (which can be the +# case for netCDF string types, for example). This may +# differ from the data type of the netCDF fragment +# variable. +# +# shape: `tuple`, optional +# The shape of the fragment within the aggregated +# array. This may differ from the shape of the netCDF +# fragment variable in that the latter may have fewer +# size 1 dimensions. +# +# units: `str` or `None`, optional +# The units of the fragment data. Set to `None` to +# indicate that there are no units. If unset then the +# units will be set during the first `__getitem__` call. +# +# calendar: `str` or `None`, optional +# The calendar of the fragment data. Set to `None` to +# indicate the CF default calendar, if applicable. If +# unset then the calendar will be set during the first +# `__getitem__` call. +# +# {{init attributes: `dict` or `None`, optional}} +# +# If *attributes* is `None`, the default, then the +# attributes will be set from the netCDF variable during +# the first `__getitem__` call. +# +# {{aggregated_units: `str` or `None`, optional}} +# +# {{aggregated_calendar: `str` or `None`, optional}} +# +# {{init storage_options: `dict` or `None`, optional}} +# +# {{init source: optional}} +# +# {{init copy: `bool`, optional}} +# +# """ +# super().__init__( +# filename=filename, +# address=address, +# dtype=dtype, +# shape=shape, +# mask=True, +# attributes=attributes, +# storage_options=storage_options, +# source=source, +# copy=copy, +# ) +# +# if source is not None: +# try: +# aggregated_units = source._get_component( +# "aggregated_units", False +# ) +# except AttributeError: +# aggregated_units = False +# +# try: +# aggregated_calendar = source._get_component( +# "aggregated_calendar", False +# ) +# except AttributeError: +# aggregated_calendar = False +# +# self._set_component("aggregated_units", aggregated_units, copy=False) +# self._set_component( +# "aggregated_calendar", aggregated_calendar, copy=False +# ) diff --git a/cf/data/fragment/netcdffragmentarray.py b/cf/data/fragment/netcdffragmentarray.py index 4719741904..e701f6b862 100644 --- a/cf/data/fragment/netcdffragmentarray.py +++ b/cf/data/fragment/netcdffragmentarray.py @@ -1,239 +1,239 @@ -import cfdm - -from ..array.abstract import Array -from ..array.mixin import FileArrayMixin -from .h5netcdffragmentarray import H5netcdfFragmentArray -from .mixin import FragmentArrayMixin -from .netcdf4fragmentarray import NetCDF4FragmentArray - - -class NetCDFFragmentArray( - FragmentArrayMixin, - cfdm.data.mixin.NetCDFFileMixin, - FileArrayMixin, - cfdm.data.mixin.IndexMixin, - cfdm.data.mixin.FileArrayMixin, - Array, -): - """A netCDF fragment array. - - Access will be with either `netCDF4` or `h5netcdf`. - - .. versionadded:: 3.15.0 - - """ - - def __init__( - self, - filename=None, - address=None, - dtype=None, - shape=None, - aggregated_units=False, - aggregated_calendar=False, - attributes=None, - storage_options=None, - source=None, - copy=True, - ): - """**Initialisation** - - :Parameters: - - filename: (sequence of `str`), optional - The names of the netCDF fragment files containing the - array. - - address: (sequence of `str`), optional - The name of the netCDF variable containing the - fragment array. Required unless *varid* is set. - - dtype: `numpy.dtype`, optional - The data type of the aggregated array. May be `None` - if the numpy data-type is not known (which can be the - case for netCDF string types, for example). This may - differ from the data type of the netCDF fragment - variable. - - shape: `tuple`, optional - The shape of the fragment within the aggregated - array. This may differ from the shape of the netCDF - fragment variable in that the latter may have fewer - size 1 dimensions. - - {{init attributes: `dict` or `None`, optional}} - - If *attributes* is `None`, the default, then the - attributes will be set from the netCDF variable during - the first `__getitem__` call. - - .. versionadded:: NEXTVERSION - - {{aggregated_units: `str` or `None`, optional}} - - {{aggregated_calendar: `str` or `None`, optional}} - - {{init storage_options: `dict` or `None`, optional}} - - .. versionadded:: NEXTVERSION - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - units: `str` or `None`, optional - Deprecated at version NEXTVERSION. Use the - *attributes* parameter instead. - - calendar: `str` or `None`, optional - Deprecated at version NEXTVERSION. Use the - *attributes* parameter instead. - - """ - super().__init__( - source=source, - copy=copy, - ) - - if source is not None: - try: - shape = source._get_component("shape", None) - except AttributeError: - shape = None - - try: - filename = source._get_component("filename", None) - except AttributeError: - filename = None - - try: - address = source._get_component("address", None) - except AttributeError: - address = None - - try: - dtype = source._get_component("dtype", None) - except AttributeError: - dtype = None - - try: - attributes = source._get_component("attributes", None) - except AttributeError: - attributes = None - - try: - aggregated_units = source._get_component( - "aggregated_units", False - ) - except AttributeError: - aggregated_units = False - - try: - aggregated_calendar = source._get_component( - "aggregated_calendar", False - ) - except AttributeError: - aggregated_calendar = False - - try: - storage_options = source._get_component( - "storage_options", None - ) - except AttributeError: - storage_options = None - - if filename is not None: - if isinstance(filename, str): - filename = (filename,) - else: - filename = tuple(filename) - - self._set_component("filename", filename, copy=False) - - if address is not None: - if isinstance(address, int): - address = (address,) - else: - address = tuple(address) - - self._set_component("address", address, copy=False) - - if storage_options is not None: - self._set_component("storage_options", storage_options, copy=False) - - self._set_component("shape", shape, copy=False) - self._set_component("dtype", dtype, copy=False) - self._set_component("attributes", attributes, copy=False) - self._set_component("mask", True, copy=False) - - self._set_component("aggregated_units", aggregated_units, copy=False) - self._set_component( - "aggregated_calendar", aggregated_calendar, copy=False - ) - - # By default, close the file after data array access - self._set_component("close", True, copy=False) - - def _get_array(self, index=None): - """Returns a subspace of the dataset variable. - - The method acts as a factory for either a - `NetCDF4FragmentArray` or a `H5netcdfFragmentArray` class, and - it is the result of calling `!_get_array` on the newly created - instance that is returned. - - `H5netcdfFragmentArray` will only be used if - `NetCDF4FragmentArray` returns a `FileNotFoundError` exception. - - .. versionadded:: NEXTVERSION - - .. seealso:: `__array__`, `index` - - :Parameters: - - {{index: `tuple` or `None`, optional}} - - It is important that there is a distinct value for each - fragment dimension, which is guaranteed when the - default of the `index` attribute is being used. - - :Returns: - - `numpy.ndarray` - The subspace. - - """ - kwargs = { - "dtype": self.dtype, - "shape": self.shape, - "aggregated_units": self.get_aggregated_units(None), - "aggregated_calendar": self.get_aggregated_calendar(None), - "attributes": self.get_attributes(None), - "copy": False, - } - - # Loop round the files, returning as soon as we find one that - # is accessible. - filenames = self.get_filenames() - for filename, address in zip(filenames, self.get_addresses()): - kwargs["filename"] = filename - kwargs["address"] = address - kwargs["storage_options"] = self.get_storage_options( - create_endpoint_url=False - ) - - try: - return NetCDF4FragmentArray(**kwargs)._get_array(index) - except FileNotFoundError: - pass - except Exception: - return H5netcdfFragmentArray(**kwargs)._get_array(index) - - # Still here? - if not filenames: - raise FileNotFoundError("No fragment files") - - if len(filenames) == 1: - raise FileNotFoundError(f"No such fragment file: {filenames[0]}") - - raise FileNotFoundError(f"No such fragment files: {filenames}") +# import cfdm +# +# from ..array.abstract import Array +# from ..array.mixin import FileArrayMixin +# from .h5netcdffragmentarray import H5netcdfFragmentArray +# from .mixin import FragmentArrayMixin +# from .netcdf4fragmentarray import NetCDF4FragmentArray +# +# +# class NetCDFFragmentArray( +# FragmentArrayMixin, +# cfdm.data.mixin.NetCDFFileMixin, +# FileArrayMixin, +# cfdm.data.mixin.IndexMixin, +# cfdm.data.mixin.FileArrayMixin, +# Array, +# ): +# """A netCDF fragment array. +# +# Access will be with either `netCDF4` or `h5netcdf`. +# +# .. versionadded:: 3.15.0 +# +# """ +# +# def __init__( +# self, +# filename=None, +# address=None, +# dtype=None, +# shape=None, +# aggregated_units=False, +# aggregated_calendar=False, +# attributes=None, +# storage_options=None, +# source=None, +# copy=True, +# ): +# """**Initialisation** +# +# :Parameters: +# +# filename: (sequence of `str`), optional +# The names of the netCDF fragment files containing the +# array. +# +# address: (sequence of `str`), optional +# The name of the netCDF variable containing the +# fragment array. Required unless *varid* is set. +# +# dtype: `numpy.dtype`, optional +# The data type of the aggregated array. May be `None` +# if the numpy data-type is not known (which can be the +# case for netCDF string types, for example). This may +# differ from the data type of the netCDF fragment +# variable. +# +# shape: `tuple`, optional +# The shape of the fragment within the aggregated +# array. This may differ from the shape of the netCDF +# fragment variable in that the latter may have fewer +# size 1 dimensions. +# +# {{init attributes: `dict` or `None`, optional}} +# +# If *attributes* is `None`, the default, then the +# attributes will be set from the netCDF variable during +# the first `__getitem__` call. +# +# .. versionadded:: NEXTVERSION +# +# {{aggregated_units: `str` or `None`, optional}} +# +# {{aggregated_calendar: `str` or `None`, optional}} +# +# {{init storage_options: `dict` or `None`, optional}} +# +# .. versionadded:: NEXTVERSION +# +# {{init source: optional}} +# +# {{init copy: `bool`, optional}} +# +# units: `str` or `None`, optional +# Deprecated at version NEXTVERSION. Use the +# *attributes* parameter instead. +# +# calendar: `str` or `None`, optional +# Deprecated at version NEXTVERSION. Use the +# *attributes* parameter instead. +# +# """ +# super().__init__( +# source=source, +# copy=copy, +# ) +# +# if source is not None: +# try: +# shape = source._get_component("shape", None) +# except AttributeError: +# shape = None +# +# try: +# filename = source._get_component("filename", None) +# except AttributeError: +# filename = None +# +# try: +# address = source._get_component("address", None) +# except AttributeError: +# address = None +# +# try: +# dtype = source._get_component("dtype", None) +# except AttributeError: +# dtype = None +# +# try: +# attributes = source._get_component("attributes", None) +# except AttributeError: +# attributes = None +# +# try: +# aggregated_units = source._get_component( +# "aggregated_units", False +# ) +# except AttributeError: +# aggregated_units = False +# +# try: +# aggregated_calendar = source._get_component( +# "aggregated_calendar", False +# ) +# except AttributeError: +# aggregated_calendar = False +# +# try: +# storage_options = source._get_component( +# "storage_options", None +# ) +# except AttributeError: +# storage_options = None +# +# if filename is not None: +# if isinstance(filename, str): +# filename = (filename,) +# else: +# filename = tuple(filename) +# +# self._set_component("filename", filename, copy=False) +# +# if address is not None: +# if isinstance(address, int): +# address = (address,) +# else: +# address = tuple(address) +# +# self._set_component("address", address, copy=False) +# +# if storage_options is not None: +# self._set_component("storage_options", storage_options, copy=False) +# +# self._set_component("shape", shape, copy=False) +# self._set_component("dtype", dtype, copy=False) +# self._set_component("attributes", attributes, copy=False) +# self._set_component("mask", True, copy=False) +# +# self._set_component("aggregated_units", aggregated_units, copy=False) +# self._set_component( +# "aggregated_calendar", aggregated_calendar, copy=False +# ) +# +# # By default, close the file after data array access +# self._set_component("close", True, copy=False) +# +# def _get_array(self, index=None): +# """Returns a subspace of the dataset variable. +# +# The method acts as a factory for either a +# `NetCDF4FragmentArray` or a `H5netcdfFragmentArray` class, and +# it is the result of calling `!_get_array` on the newly created +# instance that is returned. +# +# `H5netcdfFragmentArray` will only be used if +# `NetCDF4FragmentArray` returns a `FileNotFoundError` exception. +# +# .. versionadded:: NEXTVERSION +# +# .. seealso:: `__array__`, `index` +# +# :Parameters: +# +# {{index: `tuple` or `None`, optional}} +# +# It is important that there is a distinct value for each +# fragment dimension, which is guaranteed when the +# default of the `index` attribute is being used. +# +# :Returns: +# +# `numpy.ndarray` +# The subspace. +# +# """ +# kwargs = { +# "dtype": self.dtype, +# "shape": self.shape, +# "aggregated_units": self.get_aggregated_units(None), +# "aggregated_calendar": self.get_aggregated_calendar(None), +# "attributes": self.get_attributes(None), +# "copy": False, +# } +# +# # Loop round the files, returning as soon as we find one that +# # is accessible. +# filenames = self.get_filenames() +# for filename, address in zip(filenames, self.get_addresses()): +# kwargs["filename"] = filename +# kwargs["address"] = address +# kwargs["storage_options"] = self.get_storage_options( +# create_endpoint_url=False +# ) +# +# try: +# return NetCDF4FragmentArray(**kwargs)._get_array(index) +# except FileNotFoundError: +# pass +# except Exception: +# return H5netcdfFragmentArray(**kwargs)._get_array(index) +# +# # Still here? +# if not filenames: +# raise FileNotFoundError("No fragment files") +# +# if len(filenames) == 1: +# raise FileNotFoundError(f"No such fragment file: {filenames[0]}") +# +# raise FileNotFoundError(f"No such fragment files: {filenames}") diff --git a/cf/data/fragment/umfragmentarray.py b/cf/data/fragment/umfragmentarray.py index 9168225945..d4c141c896 100644 --- a/cf/data/fragment/umfragmentarray.py +++ b/cf/data/fragment/umfragmentarray.py @@ -1,8 +1,11 @@ +import cfdm + from ..array.umarray import UMArray -from .mixin import FragmentArrayMixin + +# from .mixin import FragmentArrayMixin -class UMFragmentArray(FragmentArrayMixin, UMArray): +class UMFragmentArray(cfdm.data.fragment.mixin.FragmentArrayMixin, UMArray): """A CFA fragment array stored in a UM or PP file. .. versionadded:: 3.14.0 diff --git a/cf/functions.py b/cf/functions.py index bf47bb7d19..9eb1b9eb4b 100644 --- a/cf/functions.py +++ b/cf/functions.py @@ -1386,6 +1386,7 @@ def CFA(): "CFA", version="NEXTVERSION", removed_at="5.0.0" ) # pragma: no cover + # Module-level alias to avoid name clashes with function keyword # arguments (corresponding to 'import atol as cf_atol' etc. in other # modules) diff --git a/cf/read_write/netcdf/netcdfread.py b/cf/read_write/netcdf/netcdfread.py index 5c26412ccb..95cbf8911d 100644 --- a/cf/read_write/netcdf/netcdfread.py +++ b/cf/read_write/netcdf/netcdfread.py @@ -1,5 +1,6 @@ import cfdm -from packaging.version import Version + +# from packaging.version import Version class NetCDFRead(cfdm.read_write.netcdf.NetCDFRead): diff --git a/cf/read_write/netcdf/netcdfwrite.py b/cf/read_write/netcdf/netcdfwrite.py index 6d2d438e25..81217bff10 100644 --- a/cf/read_write/netcdf/netcdfwrite.py +++ b/cf/read_write/netcdf/netcdfwrite.py @@ -1,12 +1,13 @@ -from os import remove - +# from os import remove +# import cfdm -import dask.array as da -import numpy as np -from cfdm.data.dask_utils import cfdm_asanyarray from .netcdfread import NetCDFRead +# import dask.array as da +# import numpy as np +# from cfdm.data.dask_utils import cfdm_asanyarray + class NetCDFWrite(cfdm.read_write.netcdf.NetCDFWrite): """A container for writing Fields to a netCDF dataset.""" diff --git a/cf/read_write/write.py b/cf/read_write/write.py index 5ca803eb86..47d05bbeee 100644 --- a/cf/read_write/write.py +++ b/cf/read_write/write.py @@ -2,10 +2,9 @@ from ..cfimplementation import implementation from ..decorators import _manage_log_level_via_verbosity -from ..functions import ( +from ..functions import ( # CFA, _DEPRECATION_ERROR_FUNCTION_KWARG, _DEPRECATION_ERROR_FUNCTION_KWARG_VALUE, - CFA, flat, ) from .netcdf import NetCDFWrite From fc0564ef8f0d39812a46eba2e6c39f4f2d961ed7 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 8 Nov 2024 08:51:36 +0000 Subject: [PATCH 03/51] dev --- cf/test/test_Maths.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/cf/test/test_Maths.py b/cf/test/test_Maths.py index add50ae710..5c02305e51 100644 --- a/cf/test/test_Maths.py +++ b/cf/test/test_Maths.py @@ -43,13 +43,23 @@ def test_curl_xy(self): c0 = (term1 - term2) / (sin_theta * r) + print('p', c.data._get_cached_elements()) + print('p', c0.data._get_cached_elements()) # Check the data with cf.rtol(1e-10): self.assertTrue(c.data.allclose(c0.data)) del c.long_name c0.set_data(c.data) - self.assertTrue(c.equals(c0)) + print (wrap, one_sided) +# print (c.array) +# print(c0.array) +# print(c0.array- c.array) + print(c.data._get_cached_elements()) + print(c0.data._get_cached_elements()) + print( "________________") + with cf.rtol(1e-10): + self.assertTrue(c.equals(c0, verbose=-1)) # Cartesian coordinates dim_x = f.dimension_coordinate("X") From bf07d57a4415430322484308dd6dee4353a4d508 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 8 Nov 2024 16:11:30 +0000 Subject: [PATCH 04/51] dev --- cf/test/test_Maths.py | 12 +- cf/test/test_NetCDF4Array.py | 342 +++++++++++++++++------------------ 2 files changed, 172 insertions(+), 182 deletions(-) diff --git a/cf/test/test_Maths.py b/cf/test/test_Maths.py index 5c02305e51..add50ae710 100644 --- a/cf/test/test_Maths.py +++ b/cf/test/test_Maths.py @@ -43,23 +43,13 @@ def test_curl_xy(self): c0 = (term1 - term2) / (sin_theta * r) - print('p', c.data._get_cached_elements()) - print('p', c0.data._get_cached_elements()) # Check the data with cf.rtol(1e-10): self.assertTrue(c.data.allclose(c0.data)) del c.long_name c0.set_data(c.data) - print (wrap, one_sided) -# print (c.array) -# print(c0.array) -# print(c0.array- c.array) - print(c.data._get_cached_elements()) - print(c0.data._get_cached_elements()) - print( "________________") - with cf.rtol(1e-10): - self.assertTrue(c.equals(c0, verbose=-1)) + self.assertTrue(c.equals(c0)) # Cartesian coordinates dim_x = f.dimension_coordinate("X") diff --git a/cf/test/test_NetCDF4Array.py b/cf/test/test_NetCDF4Array.py index 0d049ff497..11ee5d9095 100644 --- a/cf/test/test_NetCDF4Array.py +++ b/cf/test/test_NetCDF4Array.py @@ -1,171 +1,171 @@ -import atexit -import datetime -import faulthandler -import os -import tempfile -import unittest - -import numpy as np -from dask.base import tokenize - -faulthandler.enable() # to debug seg faults and timeouts - -import cf - -n_tmpfiles = 1 -tmpfiles = [ - tempfile.mkstemp("_test_NetCDF4Array.nc", dir=os.getcwd())[1] - for i in range(n_tmpfiles) -] -(tmpfile1,) = tmpfiles - - -def _remove_tmpfiles(): - """Try to remove defined temporary files by deleting their paths.""" - for f in tmpfiles: - try: - os.remove(f) - except OSError: - pass - - -atexit.register(_remove_tmpfiles) - - -class NetCDF4ArrayTest(unittest.TestCase): - n = cf.NetCDF4Array( - filename="filename.nc", - address="x", - shape=(5, 8), - dtype=np.dtype(float), - ) - - def test_NetCDF4Array_del_file_location(self): - a = cf.NetCDF4Array(("/data1/file1", "/data2/file2"), ("tas1", "tas2")) - b = a.del_file_location("/data1") - self.assertIsNot(b, a) - self.assertEqual(b.get_filenames(), ("/data2/file2",)) - self.assertEqual(b.get_addresses(), ("tas2",)) - - a = cf.NetCDF4Array( - ("/data1/file1", "/data2/file1", "/data2/file2"), - ("tas1", "tas1", "tas2"), - ) - b = a.del_file_location("/data2") - self.assertEqual(b.get_filenames(), ("/data1/file1",)) - self.assertEqual(b.get_addresses(), ("tas1",)) - - # Can't be left with no files - self.assertEqual(b.file_locations(), ("/data1",)) - with self.assertRaises(ValueError): - b.del_file_location("/data1/") - - def test_NetCDF4Array_file_locations(self): - a = cf.NetCDF4Array("/data1/file1") - self.assertEqual(a.file_locations(), ("/data1",)) - - a = cf.NetCDF4Array(("/data1/file1", "/data2/file2")) - self.assertEqual(a.file_locations(), ("/data1", "/data2")) - - a = cf.NetCDF4Array(("/data1/file1", "/data2/file2", "/data1/file2")) - self.assertEqual(a.file_locations(), ("/data1", "/data2", "/data1")) - - def test_NetCDF4Array_add_file_location(self): - a = cf.NetCDF4Array("/data1/file1", "tas") - b = a.add_file_location("/home/user") - self.assertIsNot(b, a) - self.assertEqual( - b.get_filenames(), ("/data1/file1", "/home/user/file1") - ) - self.assertEqual(b.get_addresses(), ("tas", "tas")) - - a = cf.NetCDF4Array(("/data1/file1", "/data2/file2"), ("tas1", "tas2")) - b = a.add_file_location("/home/user") - self.assertEqual( - b.get_filenames(), - ( - "/data1/file1", - "/data2/file2", - "/home/user/file1", - "/home/user/file2", - ), - ) - self.assertEqual(b.get_addresses(), ("tas1", "tas2", "tas1", "tas2")) - - a = cf.NetCDF4Array(("/data1/file1", "/data2/file1"), ("tas1", "tas2")) - b = a.add_file_location("/home/user") - self.assertEqual( - b.get_filenames(), - ("/data1/file1", "/data2/file1", "/home/user/file1"), - ) - self.assertEqual(b.get_addresses(), ("tas1", "tas2", "tas1")) - - a = cf.NetCDF4Array(("/data1/file1", "/data2/file1"), ("tas1", "tas2")) - b = a.add_file_location("/data1/") - self.assertEqual(b.get_filenames(), a.get_filenames()) - self.assertEqual(b.get_addresses(), a.get_addresses()) - - def test_NetCDF4Array__dask_tokenize__(self): - a = cf.NetCDF4Array("/data1/file1", "tas", shape=(12, 2), mask=False) - self.assertEqual(tokenize(a), tokenize(a.copy())) - - b = cf.NetCDF4Array("/home/file2", "tas", shape=(12, 2)) - self.assertNotEqual(tokenize(a), tokenize(b)) - - def test_NetCDF4Array_multiple_files(self): - f = cf.example_field(0) - cf.write(f, tmpfile1) - - # Create instance with non-existent file - n = cf.NetCDF4Array( - filename=os.path.join("/bad/location", os.path.basename(tmpfile1)), - address=f.nc_get_variable(), - shape=f.shape, - dtype=f.dtype, - ) - # Add file that exists - n = n.add_file_location(os.path.dirname(tmpfile1)) - - self.assertEqual(len(n.get_filenames()), 2) - self.assertTrue((n[...] == f.array).all()) - - def test_NetCDF4Array_shape(self): - shape = (12, 73, 96) - a = cf.NetCDF4Array("/home/file2", "tas", shape=shape) - self.assertEqual(a.shape, shape) - self.assertEqual(a.original_shape, shape) - a = a[::2] - self.assertEqual(a.shape, (shape[0] // 2,) + shape[1:]) - self.assertEqual(a.original_shape, shape) - - def test_NetCDF4Array_index(self): - shape = (12, 73, 96) - a = cf.NetCDF4Array("/home/file2", "tas", shape=shape) - self.assertEqual( - a.index(), - ( - slice( - None, - ), - ) - * len(shape), - ) - a = a[8:7:-1, 10:19:3, [15, 1, 4, 12]] - a = a[[0], [True, False, True], ::-2] - self.assertEqual(a.shape, (1, 2, 2)) - self.assertEqual( - a.index(), - (slice(8, 9, None), slice(10, 17, 6), slice(12, -1, -11)), - ) - - index = a.index(conform=False) - self.assertTrue((index[0] == [8]).all()) - self.assertTrue((index[1] == [10, 16]).all()) - self.assertTrue((index[2] == [12, 1]).all()) - - -if __name__ == "__main__": - print("Run date:", datetime.datetime.now()) - cf.environment() - print() - unittest.main(verbosity=2) +#import atexit +#import datetime +#import faulthandler +#import os +#import tempfile +#import unittest +# +#import numpy as np +#from dask.base import tokenize +# +#faulthandler.enable() # to debug seg faults and timeouts +# +#import cf +# +#n_tmpfiles = 1 +#tmpfiles = [ +# tempfile.mkstemp("_test_NetCDF4Array.nc", dir=os.getcwd())[1] +# for i in range(n_tmpfiles) +#] +#(tmpfile1,) = tmpfiles +# +# +#def _remove_tmpfiles(): +# """Try to remove defined temporary files by deleting their paths.""" +# for f in tmpfiles: +# try: +# os.remove(f) +# except OSError: +# pass +# +# +#atexit.register(_remove_tmpfiles) +# +# +#class NetCDF4ArrayTest(unittest.TestCase): +# n = cf.NetCDF4Array( +# filename="filename.nc", +# address="x", +# shape=(5, 8), +# dtype=np.dtype(float), +# ) +# +# def test_NetCDF4Array_del_file_location(self): +# a = cf.NetCDF4Array(("/data1/file1", "/data2/file2"), ("tas1", "tas2")) +# b = a.del_file_location("/data1") +# self.assertIsNot(b, a) +# self.assertEqual(b.get_filenames(), ("/data2/file2",)) +# self.assertEqual(b.get_addresses(), ("tas2",)) +# +# a = cf.NetCDF4Array( +# ("/data1/file1", "/data2/file1", "/data2/file2"), +# ("tas1", "tas1", "tas2"), +# ) +# b = a.del_file_location("/data2") +# self.assertEqual(b.get_filenames(), ("/data1/file1",)) +# self.assertEqual(b.get_addresses(), ("tas1",)) +# +# # Can't be left with no files +# self.assertEqual(b.file_locations(), ("/data1",)) +# with self.assertRaises(ValueError): +# b.del_file_location("/data1/") +# +# def test_NetCDF4Array_file_locations(self): +# a = cf.NetCDF4Array("/data1/file1") +# self.assertEqual(a.file_locations(), ("/data1",)) +# +# a = cf.NetCDF4Array(("/data1/file1", "/data2/file2")) +# self.assertEqual(a.file_locations(), ("/data1", "/data2")) +# +# a = cf.NetCDF4Array(("/data1/file1", "/data2/file2", "/data1/file2")) +# self.assertEqual(a.file_locations(), ("/data1", "/data2", "/data1")) +# +# def test_NetCDF4Array_add_file_location(self): +# a = cf.NetCDF4Array("/data1/file1", "tas") +# b = a.add_file_location("/home/user") +# self.assertIsNot(b, a) +# self.assertEqual( +# b.get_filenames(), ("/data1/file1", "/home/user/file1") +# ) +# self.assertEqual(b.get_addresses(), ("tas", "tas")) +# +# a = cf.NetCDF4Array(("/data1/file1", "/data2/file2"), ("tas1", "tas2")) +# b = a.add_file_location("/home/user") +# self.assertEqual( +# b.get_filenames(), +# ( +# "/data1/file1", +# "/data2/file2", +# "/home/user/file1", +# "/home/user/file2", +# ), +# ) +# self.assertEqual(b.get_addresses(), ("tas1", "tas2", "tas1", "tas2")) +# +# a = cf.NetCDF4Array(("/data1/file1", "/data2/file1"), ("tas1", "tas2")) +# b = a.add_file_location("/home/user") +# self.assertEqual( +# b.get_filenames(), +# ("/data1/file1", "/data2/file1", "/home/user/file1"), +# ) +# self.assertEqual(b.get_addresses(), ("tas1", "tas2", "tas1")) +# +# a = cf.NetCDF4Array(("/data1/file1", "/data2/file1"), ("tas1", "tas2")) +# b = a.add_file_location("/data1/") +# self.assertEqual(b.get_filenames(), a.get_filenames()) +# self.assertEqual(b.get_addresses(), a.get_addresses()) +# +# def test_NetCDF4Array__dask_tokenize__(self): +# a = cf.NetCDF4Array("/data1/file1", "tas", shape=(12, 2), mask=False) +# self.assertEqual(tokenize(a), tokenize(a.copy())) +# +# b = cf.NetCDF4Array("/home/file2", "tas", shape=(12, 2)) +# self.assertNotEqual(tokenize(a), tokenize(b)) +# +# def test_NetCDF4Array_multiple_files(self): +# f = cf.example_field(0) +# cf.write(f, tmpfile1) +# +# # Create instance with non-existent file +# n = cf.NetCDF4Array( +# filename=os.path.join("/bad/location", os.path.basename(tmpfile1)), +# address=f.nc_get_variable(), +# shape=f.shape, +# dtype=f.dtype, +# ) +# # Add file that exists +# n = n.add_file_location(os.path.dirname(tmpfile1)) +# +# self.assertEqual(len(n.get_filenames()), 2) +# self.assertTrue((n[...] == f.array).all()) +# +# def test_NetCDF4Array_shape(self): +# shape = (12, 73, 96) +# a = cf.NetCDF4Array("/home/file2", "tas", shape=shape) +# self.assertEqual(a.shape, shape) +# self.assertEqual(a.original_shape, shape) +# a = a[::2] +# self.assertEqual(a.shape, (shape[0] // 2,) + shape[1:]) +# self.assertEqual(a.original_shape, shape) +# +# def test_NetCDF4Array_index(self): +# shape = (12, 73, 96) +# a = cf.NetCDF4Array("/home/file2", "tas", shape=shape) +# self.assertEqual( +# a.index(), +# ( +# slice( +# None, +# ), +# ) +# * len(shape), +# ) +# a = a[8:7:-1, 10:19:3, [15, 1, 4, 12]] +# a = a[[0], [True, False, True], ::-2] +# self.assertEqual(a.shape, (1, 2, 2)) +# self.assertEqual( +# a.index(), +# (slice(8, 9, None), slice(10, 17, 6), slice(12, -1, -11)), +# ) +# +# index = a.index(conform=False) +# self.assertTrue((index[0] == [8]).all()) +# self.assertTrue((index[1] == [10, 16]).all()) +# self.assertTrue((index[2] == [12, 1]).all()) +# +# +#if __name__ == "__main__": +# print("Run date:", datetime.datetime.now()) +# cf.environment() +# print() +# unittest.main(verbosity=2) From 68a61c77cd67ea07fa2ae2ca8cb2aa41af6f65c3 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 8 Nov 2024 18:49:20 +0000 Subject: [PATCH 05/51] dev --- cf/test/test_NetCDF4Array.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/cf/test/test_NetCDF4Array.py b/cf/test/test_NetCDF4Array.py index 11ee5d9095..a956daed54 100644 --- a/cf/test/test_NetCDF4Array.py +++ b/cf/test/test_NetCDF4Array.py @@ -1,26 +1,26 @@ -#import atexit -#import datetime -#import faulthandler -#import os -#import tempfile -#import unittest +# import atexit +# import datetime +# import faulthandler +# import os +# import tempfile +# import unittest # -#import numpy as np -#from dask.base import tokenize +# import numpy as np +# from dask.base import tokenize # -#faulthandler.enable() # to debug seg faults and timeouts +# faulthandler.enable() # to debug seg faults and timeouts # -#import cf +# import cf # -#n_tmpfiles = 1 -#tmpfiles = [ +# n_tmpfiles = 1 +# tmpfiles = [ # tempfile.mkstemp("_test_NetCDF4Array.nc", dir=os.getcwd())[1] # for i in range(n_tmpfiles) -#] -#(tmpfile1,) = tmpfiles +# ] +# (tmpfile1,) = tmpfiles # # -#def _remove_tmpfiles(): +# def _remove_tmpfiles(): # """Try to remove defined temporary files by deleting their paths.""" # for f in tmpfiles: # try: @@ -29,10 +29,10 @@ # pass # # -#atexit.register(_remove_tmpfiles) +# atexit.register(_remove_tmpfiles) # # -#class NetCDF4ArrayTest(unittest.TestCase): +# class NetCDF4ArrayTest(unittest.TestCase): # n = cf.NetCDF4Array( # filename="filename.nc", # address="x", @@ -164,7 +164,7 @@ # self.assertTrue((index[2] == [12, 1]).all()) # # -#if __name__ == "__main__": +# if __name__ == "__main__": # print("Run date:", datetime.datetime.now()) # cf.environment() # print() From df5d574dc02e7030656f54955d3d1a9c612f7bb5 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Sat, 9 Nov 2024 17:22:13 +0000 Subject: [PATCH 06/51] dev --- cf/data/array/abstract/filearray.py | 8 +-- cf/data/array/locks.py | 2 +- cf/regrid/regrid.py | 87 +++++++++++++++-------------- cf/regrid/regridoperator.py | 32 +++++------ cf/test/test_Field.py | 2 +- 5 files changed, 66 insertions(+), 65 deletions(-) diff --git a/cf/data/array/abstract/filearray.py b/cf/data/array/abstract/filearray.py index 0681111390..96e7dcbd7c 100644 --- a/cf/data/array/abstract/filearray.py +++ b/cf/data/array/abstract/filearray.py @@ -26,10 +26,10 @@ # """x.__str__() <==> str(x)""" # return f"{self.get_filename()}, {self.get_address()}" # -## @property -## def dtype(self): -## """Data-type of the array.""" -## return self._get_component("dtype") +# @property +# def dtype(self): +# """Data-type of the array.""" +# return self._get_component("dtype") # # @property # def filename(self): diff --git a/cf/data/array/locks.py b/cf/data/array/locks.py index e29bdf6387..a52ce7a28e 100644 --- a/cf/data/array/locks.py +++ b/cf/data/array/locks.py @@ -1,4 +1,4 @@ # from dask.utils import SerializableLock # -## Global lock for netCDF file access +# Global lock for netCDF file access # netcdf_lock = SerializableLock() diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index eb7ee6656a..b2ce54ee55 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -2462,10 +2462,10 @@ def create_esmpy_weights( # Write the weights to a netCDF file (copying the # dimension and variable names and structure of a weights # file created by ESMF). + from cfdm.data.locks import netcdf_lock from netCDF4 import Dataset from .. import __version__ - from ..data.array.locks import netcdf_lock if ( max(dst_esmpy_field.data.size, src_esmpy_field.data.size) @@ -2491,48 +2491,51 @@ def create_esmpy_weights( if src_grid.ln_z: regrid_method += f", ln {src_grid.method} in vertical" - netcdf_lock.acquire() - nc = Dataset(weights_file, "w", format="NETCDF4") + with netcdf_lock: + nc = Dataset(weights_file, "w", format="NETCDF4") - nc.title = ( - f"Regridding weights from source {src_grid.type} " - f"with shape {src_shape} to destination " - f"{dst_grid.type} with shape {dst_shape}" - ) - nc.source = f"cf v{__version__}, esmpy v{esmpy.__version__}" - nc.history = f"Created at {datetime.now()}" - nc.regrid_method = regrid_method - nc.ESMF_unmapped_action = r.unmapped_action - nc.ESMF_ignore_degenerate = int(r.ignore_degenerate) - - nc.createDimension("n_s", weights.size) - nc.createDimension("src_grid_rank", src_esmpy_grid.rank) - nc.createDimension("dst_grid_rank", dst_esmpy_grid.rank) - - v = nc.createVariable("src_grid_dims", i_dtype, ("src_grid_rank",)) - v.long_name = "Source grid shape" - v[...] = src_shape - - v = nc.createVariable("dst_grid_dims", i_dtype, ("dst_grid_rank",)) - v.long_name = "Destination grid shape" - v[...] = dst_shape - - v = nc.createVariable("S", weights.dtype, ("n_s",)) - v.long_name = "Weights values" - v[...] = weights - - v = nc.createVariable("row", i_dtype, ("n_s",), zlib=True) - v.long_name = "Destination/row indices" - v.start_index = start_index - v[...] = row - - v = nc.createVariable("col", i_dtype, ("n_s",), zlib=True) - v.long_name = "Source/col indices" - v.start_index = start_index - v[...] = col - - nc.close() - netcdf_lock.release() + nc.title = ( + f"Regridding weights from source {src_grid.type} " + f"with shape {src_shape} to destination " + f"{dst_grid.type} with shape {dst_shape}" + ) + nc.source = f"cf v{__version__}, esmpy v{esmpy.__version__}" + nc.history = f"Created at {datetime.now()}" + nc.regrid_method = regrid_method + nc.ESMF_unmapped_action = r.unmapped_action + nc.ESMF_ignore_degenerate = int(r.ignore_degenerate) + + nc.createDimension("n_s", weights.size) + nc.createDimension("src_grid_rank", src_esmpy_grid.rank) + nc.createDimension("dst_grid_rank", dst_esmpy_grid.rank) + + v = nc.createVariable( + "src_grid_dims", i_dtype, ("src_grid_rank",) + ) + v.long_name = "Source grid shape" + v[...] = src_shape + + v = nc.createVariable( + "dst_grid_dims", i_dtype, ("dst_grid_rank",) + ) + v.long_name = "Destination grid shape" + v[...] = dst_shape + + v = nc.createVariable("S", weights.dtype, ("n_s",)) + v.long_name = "Weights values" + v[...] = weights + + v = nc.createVariable("row", i_dtype, ("n_s",), zlib=True) + v.long_name = "Destination/row indices" + v.start_index = start_index + v[...] = row + + v = nc.createVariable("col", i_dtype, ("n_s",), zlib=True) + v.long_name = "Source/col indices" + v.start_index = start_index + v[...] = col + + nc.close() if esmpy_regrid_operator is None: # Destroy esmpy objects (the esmpy.Grid objects exist even if diff --git a/cf/regrid/regridoperator.py b/cf/regrid/regridoperator.py index 10a77bc641..7621addc7e 100644 --- a/cf/regrid/regridoperator.py +++ b/cf/regrid/regridoperator.py @@ -725,28 +725,26 @@ def tosparse(self): weights_file = self.weights_file if weights_file is not None: # Read the weights from the weights file + from cfdm.data.locks import netcdf_lock from netCDF4 import Dataset - from ..data.array.locks import netcdf_lock + with netcdf_lock: + nc = Dataset(weights_file, "r") + weights = nc.variables["S"][...] + row = nc.variables["row"][...] + col = nc.variables["col"][...] - netcdf_lock.acquire() - nc = Dataset(weights_file, "r") - weights = nc.variables["S"][...] - row = nc.variables["row"][...] - col = nc.variables["col"][...] + try: + col_start_index = nc.variables["col"].start_index + except AttributeError: + col_start_index = 1 - try: - col_start_index = nc.variables["col"].start_index - except AttributeError: - col_start_index = 1 + try: + row_start_index = nc.variables["row"].start_index + except AttributeError: + row_start_index = 1 - try: - row_start_index = nc.variables["row"].start_index - except AttributeError: - row_start_index = 1 - - nc.close() - netcdf_lock.release() + nc.close() else: raise ValueError( "Conversion to sparse array format requires at least " diff --git a/cf/test/test_Field.py b/cf/test/test_Field.py index 6e147010cd..fdc2baa01b 100644 --- a/cf/test/test_Field.py +++ b/cf/test/test_Field.py @@ -2471,7 +2471,7 @@ def test_Field_percentile(self): # TODO: add loop to check get same shape and close enough data # for every possible axis combo (see also test_Data_percentile). - def test_Field_grad_xy(self): + def test_Field__aaa_grad_xy(self): f = cf.example_field(0) # theta=0 is at the north pole From 5a320016d8c02ebb57153b71b4b6803582d8c975 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 11 Nov 2024 09:02:33 +0000 Subject: [PATCH 07/51] dev --- cf/data/array/mixin/__init__.py | 7 - cf/data/array/mixin/arraymixin.py | 26 ++-- cf/data/array/mixin/compressedarraymixin.py | 22 +-- cf/data/array/mixin/filearraymixin.py | 28 ++-- cf/data/data.py | 2 - cf/field.py | 2 - cf/functions.py | 146 +++++++------------- cf/mixin2/__init__.py | 2 +- cf/mixin2/cfanetcdf.py | 40 +++--- 9 files changed, 107 insertions(+), 168 deletions(-) diff --git a/cf/data/array/mixin/__init__.py b/cf/data/array/mixin/__init__.py index 0cd2ce966c..309087cfeb 100644 --- a/cf/data/array/mixin/__init__.py +++ b/cf/data/array/mixin/__init__.py @@ -1,8 +1 @@ from .activestoragemixin import ActiveStorageMixin - -# from .arraymixin import ArrayMixin -# from .cfamixin import CFAMixin -# from .compressedarraymixin import CompressedArrayMixin -# from .filearraymixin import FileArrayMixin - -# from .indexmixin import IndexMixin diff --git a/cf/data/array/mixin/arraymixin.py b/cf/data/array/mixin/arraymixin.py index 809f5f38a6..5225723d80 100644 --- a/cf/data/array/mixin/arraymixin.py +++ b/cf/data/array/mixin/arraymixin.py @@ -1,16 +1,16 @@ -import numpy as np - -from ....units import Units - - -class ArrayMixin: - """Mixin class for a container of an array. - - .. versionadded:: 3.14.0 - - """ - - +# import numpy as np +# +# from ....units import Units +# +# +# class ArrayMixin: +# """Mixin class for a container of an array. +# +# .. versionadded:: 3.14.0 +# +# """ +# +# # def __array_function__(self, func, types, args, kwargs): # """Implement the `numpy` ``__array_function__`` protocol. # diff --git a/cf/data/array/mixin/compressedarraymixin.py b/cf/data/array/mixin/compressedarraymixin.py index 0a471e7e57..103c61199c 100644 --- a/cf/data/array/mixin/compressedarraymixin.py +++ b/cf/data/array/mixin/compressedarraymixin.py @@ -1,14 +1,14 @@ -import dask.array as da - - -class CompressedArrayMixin: - """Mixin class for compressed arrays. - - .. versionadded:: 3.14.0 - - """ - - +# import dask.array as da +# +# +# class CompressedArrayMixin: +# """Mixin class for compressed arrays. +# +# .. versionadded:: 3.14.0 +# +# """ +# +# # def _lock_file_read(self, array): # """Try to return a dask array that does not support concurrent # reads. diff --git a/cf/data/array/mixin/filearraymixin.py b/cf/data/array/mixin/filearraymixin.py index 238d3ceb88..a4092262f0 100644 --- a/cf/data/array/mixin/filearraymixin.py +++ b/cf/data/array/mixin/filearraymixin.py @@ -1,17 +1,17 @@ -from os import sep -from os.path import basename, dirname, join - -from ....functions import _DEPRECATION_ERROR_ATTRIBUTE, abspath - - -class FileArrayMixin: - """Mixin class for an array stored in a file. - - .. versionadded:: 3.14.0 - - """ - - +# from os import sep +# from os.path import basename, dirname, join +# +# from ....functions import _DEPRECATION_ERROR_ATTRIBUTE, abspath +# +# +# class FileArrayMixin: +# """Mixin class for an array stored in a file. +# +# .. versionadded:: 3.14.0 +# +# """ +# +# # def __dask_tokenize__(self): # """Return a value fully representative of the object. # diff --git a/cf/data/data.py b/cf/data/data.py index a009348076..4ecc5e8fb1 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -3,7 +3,6 @@ from functools import partial, reduce from itertools import product from operator import mul -from os import sep import cfdm import cftime @@ -28,7 +27,6 @@ from ..functions import ( _DEPRECATION_ERROR_KWARGS, _section, - abspath, free_memory, parse_indices, ) diff --git a/cf/field.py b/cf/field.py index 48f80bf42f..b25bc5b1ef 100644 --- a/cf/field.py +++ b/cf/field.py @@ -2,7 +2,6 @@ from dataclasses import dataclass from functools import reduce from operator import mul as operator_mul -from os import sep import cfdm import numpy as np @@ -49,7 +48,6 @@ _DEPRECATION_ERROR_METHOD, DeprecationError, _section, - abspath, flat, parse_indices, ) diff --git a/cf/functions.py b/cf/functions.py index 9eb1b9eb4b..be4c70d62d 100644 --- a/cf/functions.py +++ b/cf/functions.py @@ -11,7 +11,6 @@ from collections.abc import Iterable from itertools import product from math import isnan -from numbers import Integral from os import mkdir from os.path import abspath as _os_path_abspath from os.path import dirname as _os_path_dirname @@ -2129,6 +2128,10 @@ def parse_indices(shape, indices, cyclic=False, keepdims=True): indices: `tuple` The indices to be applied. + cyclic: `bool`, optional + If True then allow cyclic slices (such as ``slice(-4, 3, + 1)``). + keepdims: `bool`, optional If True then an integral index is converted to a slice. For instance, ``3`` would become ``slice(3, 4)``. @@ -2157,108 +2160,55 @@ def parse_indices(shape, indices, cyclic=False, keepdims=True): >>> cf.parse_indices((5, 8), (cf.Data([1, 3]),)) [dask.array, slice(None, None, None)] - """ - parsed_indices = [] - roll = {} - - if not isinstance(indices, tuple): - indices = (indices,) - - # Initialise the list of parsed indices as the input indices with any - # Ellipsis objects expanded - length = len(indices) - n = len(shape) - ndim = n - for index in indices: - if index is Ellipsis: - m = n - length + 1 - parsed_indices.extend([slice(None)] * m) - n -= m - else: - parsed_indices.append(index) - n -= 1 - - length -= 1 - - len_parsed_indices = len(parsed_indices) + parsed_indices = cfdm.parse_indices(shape, indices, keepdims=keepdims) - if ndim and len_parsed_indices > ndim: - raise IndexError( - f"Invalid indices {parsed_indices} for array with shape {shape}" - ) - - if len_parsed_indices < ndim: - parsed_indices.extend([slice(None)] * (ndim - len_parsed_indices)) - - if not ndim and parsed_indices: - raise IndexError( - "Scalar array can only be indexed with () or Ellipsis" - ) + if not cyclic: + return parsed_indices + roll = {} for i, (index, size) in enumerate(zip(parsed_indices, shape)): - if cyclic and isinstance(index, slice): - # Check for a cyclic slice - try: - index = normalize_slice(index, size, cyclic=True) - except IndexError: - # Non-cyclic slice - pass - else: - # Cyclic slice - start = index.start - stop = index.stop - step = index.step - if ( - step > 0 - and -size <= start < 0 - and 0 <= stop <= size + start - ): - # x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - # x[ -1:0:1] => [9] - # x[ -1:1:1] => [9, 0] - # x[ -1:3:1] => [9, 0, 1, 2] - # x[ -1:9:1] => [9, 0, 1, 2, 3, 4, 5, 6, 7, 8] - # x[ -4:0:1] => [6, 7, 8, 9] - # x[ -4:1:1] => [6, 7, 8, 9, 0] - # x[ -4:3:1] => [6, 7, 8, 9, 0, 1, 2] - # x[ -4:6:1] => [6, 7, 8, 9, 0, 1, 2, 3, 4, 5] - # x[ -9:0:1] => [1, 2, 3, 4, 5, 6, 7, 8, 9] - # x[ -9:1:1] => [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] - # x[-10:0:1] => [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - index = slice(0, stop - start, step) - roll[i] = -start - - elif ( - step < 0 and 0 <= start < size and start - size <= stop < 0 - ): - # x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - # x[0: -4:-1] => [0, 9, 8, 7] - # x[6: -1:-1] => [6, 5, 4, 3, 2, 1, 0] - # x[6: -2:-1] => [6, 5, 4, 3, 2, 1, 0, 9] - # x[6: -4:-1] => [6, 5, 4, 3, 2, 1, 0, 9, 8, 7] - # x[0: -2:-1] => [0, 9] - # x[0:-10:-1] => [0, 9, 8, 7, 6, 5, 4, 3, 2, 1] - index = slice(start - stop - 1, None, step) - roll[i] = -1 - stop - - elif keepdims and isinstance(index, Integral): - # Convert an integral index to a slice - if index == -1: - index = slice(-1, None, None) - else: - index = slice(index, index + 1, 1) - - elif hasattr(index, "to_dask_array"): - to_dask_array = index.to_dask_array - if callable(to_dask_array): - # Replace index with its Dask array - index = to_dask_array() - - parsed_indices[i] = index + if not isinstance(index, slice): + continue - if not cyclic: - return parsed_indices + try: + index = normalize_slice(index, size, cyclic=True) + except IndexError: + # Non-cyclic slice + pass + else: + # Cyclic slice + start = index.start + stop = index.stop + step = index.step + if step > 0 and -size <= start < 0 and 0 <= stop <= size + start: + # x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + # x[ -1:0:1] => [9] + # x[ -1:1:1] => [9, 0] + # x[ -1:3:1] => [9, 0, 1, 2] + # x[ -1:9:1] => [9, 0, 1, 2, 3, 4, 5, 6, 7, 8] + # x[ -4:0:1] => [6, 7, 8, 9] + # x[ -4:1:1] => [6, 7, 8, 9, 0] + # x[ -4:3:1] => [6, 7, 8, 9, 0, 1, 2] + # x[ -4:6:1] => [6, 7, 8, 9, 0, 1, 2, 3, 4, 5] + # x[ -9:0:1] => [1, 2, 3, 4, 5, 6, 7, 8, 9] + # x[ -9:1:1] => [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] + # x[-10:0:1] => [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + index = slice(0, stop - start, step) + roll[i] = -start + + elif step < 0 and 0 <= start < size and start - size <= stop < 0: + # x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + # x[0: -4:-1] => [0, 9, 8, 7] + # x[6: -1:-1] => [6, 5, 4, 3, 2, 1, 0] + # x[6: -2:-1] => [6, 5, 4, 3, 2, 1, 0, 9] + # x[6: -4:-1] => [6, 5, 4, 3, 2, 1, 0, 9, 8, 7] + # x[0: -2:-1] => [0, 9] + # x[0:-10:-1] => [0, 9, 8, 7, 6, 5, 4, 3, 2, 1] + index = slice(start - stop - 1, None, step) + roll[i] = -1 - stop + + parsed_indices[i] = index return parsed_indices, roll diff --git a/cf/mixin2/__init__.py b/cf/mixin2/__init__.py index 3dc304f232..4531d515c3 100644 --- a/cf/mixin2/__init__.py +++ b/cf/mixin2/__init__.py @@ -1,2 +1,2 @@ -from .cfanetcdf import CFANetCDF +# from .cfanetcdf import CFANetCDF from .container import Container diff --git a/cf/mixin2/cfanetcdf.py b/cf/mixin2/cfanetcdf.py index 390717df47..80575ae8cd 100644 --- a/cf/mixin2/cfanetcdf.py +++ b/cf/mixin2/cfanetcdf.py @@ -1,23 +1,23 @@ -"""This class is not in the cf.mixin package because it needs to be -imported by cf.Data, and some of the other mixin classes in cf.mixin -themsleves import cf.Data, which would lead to a circular import -situation. - -""" - -from re import split - -from cfdm.mixin import NetCDFMixin - - -class CFANetCDF(NetCDFMixin): - """Mixin class for CFA-netCDF. - - .. versionadded:: 3.15.0 - - """ - - +# """This class is not in the cf.mixin package because it needs to be +# imported by cf.Data, and some of the other mixin classes in cf.mixin +# themsleves import cf.Data, which would lead to a circular import +# situation. +# +# """ +# +# from re import split +# +# from cfdm.mixin import NetCDFMixin +# +# +# class CFANetCDF(NetCDFMixin): +# """Mixin class for CFA-netCDF. +# +# .. versionadded:: 3.15.0 +# +# """ +# +# # # def cfa_del_aggregated_data(self): # """Remove the CFA-netCDF aggregation instruction terms. From 5932aeaf30f0314cf7bbc326b1e101bfa47b2f45 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 12 Nov 2024 18:37:52 +0000 Subject: [PATCH 08/51] dev --- cf/functions.py | 33 +++------------------------------ 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/cf/functions.py b/cf/functions.py index be4c70d62d..7f420ac044 100644 --- a/cf/functions.py +++ b/cf/functions.py @@ -2784,39 +2784,12 @@ def relpath(filename, start=None): return _os_path_relpath(filename) -def dirname(filename): - """Return the directory name of a file. +def dirname(path, isdir=False): + return cfdm.dirname(path, isdir=isdir) - If a string containing URL is provided then everything up to, but - not including, the last slash (/) is returned. - .. seealso:: `cf.abspath`, `cf.pathjoin`, `cf.relpath` +dirname.__doc__ = cfdm.dirname.__doc__.replace("cfdm.", "cf.") - :Parameters: - - filename: `str` - The name of the file. - - :Returns: - - `str` - The directory name. - - **Examples** - - >>> cf.dirname('/data/archive/file.nc') - '/data/archive' - >>> cf.dirname('..//file.nc') - '..' - >>> cf.dirname('http://data/archive/file.nc') - 'http://data/archive' - - """ - u = urlparse(filename) - if u.scheme != "": - return filename.rpartition("/")[0] - - return _os_path_dirname(filename) def pathjoin(path1, path2): From b4bd7c229d5d57c7a1abafd151a773e543a0dfcb Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 13 Nov 2024 17:08:48 +0000 Subject: [PATCH 09/51] dev --- cf/domain.py | 194 ++-- cf/mixin/propertiesdata.py | 186 ++-- cf/mixin/propertiesdatabounds.py | 228 ++--- cf/read_write/read.py | 8 +- cf/test/test_CFA.py | 1628 +++++++++++++++--------------- 5 files changed, 1122 insertions(+), 1122 deletions(-) diff --git a/cf/domain.py b/cf/domain.py index 8889fdf97a..49d7e9f8e6 100644 --- a/cf/domain.py +++ b/cf/domain.py @@ -162,103 +162,103 @@ def add_file_location( return location - def cfa_clear_file_substitutions( - self, - ): - """Remove all of the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Returns: - - `dict` - {{Returns cfa_clear_file_substitutions}} - - **Examples** - - >>> d.cfa_clear_file_substitutions() - {} - - """ - out = {} - for c in self.constructs.filter_by_data(todict=True).values(): - out.update(c.cfa_clear_file_substitutions()) - - return out - - def cfa_file_substitutions(self): - """Return the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Returns: - - `dict` - {{Returns cfa_file_substitutions}} - - **Examples** - - >>> d.cfa_file_substitutions() - {} - - """ - out = {} - for c in self.constructs.filter_by_data(todict=True).values(): - out.update(c.cfa_file_substitutions()) - - return out - - def cfa_del_file_substitution( - self, - base, - ): - """Remove a CFA-netCDF file name substitution. - - .. versionadded:: 3.15.0 - - :Parameters: - - base: `str` - {{cfa base: `str`}} - - :Returns: - - `dict` - {{Returns cfa_del_file_substitution}} - - **Examples** - - >>> f.cfa_del_file_substitution('base') - - """ - for c in self.constructs.filter_by_data(todict=True).values(): - c.cfa_del_file_substitution( - base, - ) - - def cfa_update_file_substitutions( - self, - substitutions, - ): - """Set CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Parameters: - - {{cfa substitutions: `dict`}} - - :Returns: - - `None` - - **Examples** - - >>> d.cfa_update_file_substitutions({'base': '/data/model'}) - - """ - for c in self.constructs.filter_by_data(todict=True).values(): - c.cfa_update_file_substitutions(substitutions) +# def cfa_clear_file_substitutions( +# self, +# ): +# """Remove all of the CFA-netCDF file name substitutions. +# +# .. versionadded:: 3.15.0 +# +# :Returns: +# +# `dict` +# {{Returns cfa_clear_file_substitutions}} +# +# **Examples** +# +# >>> d.cfa_clear_file_substitutions() +# {} +# +# """ +# out = {} +# for c in self.constructs.filter_by_data(todict=True).values(): +# out.update(c.cfa_clear_file_substitutions()) +# +# return out +# +# def cfa_file_substitutions(self): +# """Return the CFA-netCDF file name substitutions. +# +# .. versionadded:: 3.15.0 +# +# :Returns: +# +# `dict` +# {{Returns cfa_file_substitutions}} +# +# **Examples** +# +# >>> d.cfa_file_substitutions() +# {} +# +# """ +# out = {} +# for c in self.constructs.filter_by_data(todict=True).values(): +# out.update(c.cfa_file_substitutions()) +# +# return out +# +# def cfa_del_file_substitution( +# self, +# base, +# ): +# """Remove a CFA-netCDF file name substitution. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# base: `str` +# {{cfa base: `str`}} +# +# :Returns: +# +# `dict` +# {{Returns cfa_del_file_substitution}} +# +# **Examples** +# +# >>> f.cfa_del_file_substitution('base') +# +# """ +# for c in self.constructs.filter_by_data(todict=True).values(): +# c.cfa_del_file_substitution( +# base, +# ) +# +# def cfa_update_file_substitutions( +# self, +# substitutions, +# ): +# """Set CFA-netCDF file name substitutions. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# {{cfa substitutions: `dict`}} +# +# :Returns: +# +# `None` +# +# **Examples** +# +# >>> d.cfa_update_file_substitutions({'base': '/data/model'}) +# +# """ +# for c in self.constructs.filter_by_data(todict=True).values(): +# c.cfa_update_file_substitutions(substitutions) def close(self): """Close all files referenced by the domain construct. diff --git a/cf/mixin/propertiesdata.py b/cf/mixin/propertiesdata.py index 6437ed3fc7..100095c07d 100644 --- a/cf/mixin/propertiesdata.py +++ b/cf/mixin/propertiesdata.py @@ -2577,99 +2577,99 @@ def ceil(self, inplace=False, i=False): delete_props=True, ) - def cfa_update_file_substitutions(self, substitutions): - """Set CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Parameters: - - {{cfa substitutions: `dict`}} - - :Returns: - - `None` - - **Examples** - - >>> f.cfa_update_file_substitutions({'base', '/data/model'}) - - """ - data = self.get_data(None, _fill_value=False, _units=False) - if data is not None: - data.cfa_update_file_substitutions(substitutions) - - @_inplace_enabled(default=False) - def cfa_clear_file_substitutions(self, inplace=False): - """Remove all of the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Parameters: - - {{inplace: `bool`, optional}} - - :Returns: - - `dict` - {{Returns cfa_clear_file_substitutions}} - - **Examples** - - >>> f.cfa_clear_file_substitutions() - {} - - """ - data = self.get_data(None) - if data is None: - return {} - - return data.cfa_clear_file_substitutions({}) - - def cfa_del_file_substitution( - self, - base, - ): - """Remove a CFA-netCDF file name substitution. - - .. versionadded:: 3.15.0 - - :Parameters: - - `dict` - {{Returns cfa_del_file_substitution}} - - **Examples** - - >>> f.cfa_del_file_substitution('base') - - """ - data = self.get_data(None, _fill_value=False, _units=False) - if data is not None: - data.cfa_del_file_substitution(base) - - def cfa_file_substitutions( - self, - ): - """Return the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Returns: - - `dict` - {{Returns cfa_file_substitutions}} - - **Examples** - - >>> g = f.cfa_file_substitutions() - - """ - data = self.get_data(None) - if data is None: - return {} - - return data.cfa_file_substitutions({}) +# def cfa_update_file_substitutions(self, substitutions): +# """Set CFA-netCDF file name substitutions. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# {{cfa substitutions: `dict`}} +# +# :Returns: +# +# `None` +# +# **Examples** +# +# >>> f.cfa_update_file_substitutions({'base', '/data/model'}) +# +# """ +# data = self.get_data(None, _fill_value=False, _units=False) +# if data is not None: +# data.cfa_update_file_substitutions(substitutions) +# +# @_inplace_enabled(default=False) +# def cfa_clear_file_substitutions(self, inplace=False): +# """Remove all of the CFA-netCDF file name substitutions. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# {{inplace: `bool`, optional}} +# +# :Returns: +# +# `dict` +# {{Returns cfa_clear_file_substitutions}} +# +# **Examples** +# +# >>> f.cfa_clear_file_substitutions() +# {} +# +# """ +# data = self.get_data(None) +# if data is None: +# return {} +# +# return data.cfa_clear_file_substitutions({}) +# +# def cfa_del_file_substitution( +# self, +# base, +# ): +# """Remove a CFA-netCDF file name substitution. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# `dict` +# {{Returns cfa_del_file_substitution}} +# +# **Examples** +# +# >>> f.cfa_del_file_substitution('base') +# +# """ +# data = self.get_data(None, _fill_value=False, _units=False) +# if data is not None: +# data.cfa_del_file_substitution(base) +# +# def cfa_file_substitutions( +# self, +# ): +# """Return the CFA-netCDF file name substitutions. +# +# .. versionadded:: 3.15.0 +# +# :Returns: +# +# `dict` +# {{Returns cfa_file_substitutions}} +# +# **Examples** +# +# >>> g = f.cfa_file_substitutions() +# +# """ +# data = self.get_data(None) +# if data is None: +# return {} +# +# return data.cfa_file_substitutions({}) def chunk(self, chunksize=None): """Partition the data array. diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index b369db4336..1f41c2b464 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -1236,120 +1236,120 @@ def ceil(self, bounds=True, inplace=False, i=False): i=i, ) - def cfa_clear_file_substitutions( - self, - ): - """Remove all of the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Returns: - - `dict` - {{Returns cfa_clear_file_substitutions}} - - **Examples** - - >>> f.cfa_clear_file_substitutions() - {} - - """ - out = super().cfa_clear_file_substitutions() - - bounds = self.get_bounds(None) - if bounds is not None: - out.update(bounds.cfa_clear_file_substitutions()) - - interior_ring = self.get_interior_ring(None) - if interior_ring is not None: - out.update(interior_ring.cfa_clear_file_substitutions()) - - return out - - def cfa_del_file_substitution(self, base): - """Remove a CFA-netCDF file name substitution. - - .. versionadded:: 3.15.0 - - :Parameters: - - {{cfa base: `str`}} - - :Returns: - - `dict` - {{Returns cfa_del_file_substitution}} - - **Examples** - - >>> c.cfa_del_file_substitution('base') - - """ - super().cfa_del_file_substitution(base) - - bounds = self.get_bounds(None) - if bounds is not None: - bounds.cfa_del_file_substitution(base) - - interior_ring = self.get_interior_ring(None) - if interior_ring is not None: - interior_ring.cfa_del_file_substitution(base) - - def cfa_file_substitutions(self): - """Return the CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Returns: - - `dict` - {{Returns cfa_file_substitutions}} - - **Examples** - - >>> c.cfa_file_substitutions() - {} - - """ - out = super().cfa_file_substitutions() - - bounds = self.get_bounds(None) - if bounds is not None: - out.update(bounds.cfa_file_substitutions({})) - - interior_ring = self.get_interior_ring(None) - if interior_ring is not None: - out.update(interior_ring.cfa_file_substitutions({})) - - return out - - def cfa_update_file_substitutions(self, substitutions): - """Set CFA-netCDF file name substitutions. - - .. versionadded:: 3.15.0 - - :Parameters: - - {{cfa substitutions: `dict`}} - - :Returns: - - `None` - - **Examples** - - >>> c.cfa_add_file_substitutions({'base', '/data/model'}) - - """ - super().cfa_update_file_substitutions(substitutions) - - bounds = self.get_bounds(None) - if bounds is not None: - bounds.cfa_update_file_substitutions(substitutions) - - interior_ring = self.get_interior_ring(None) - if interior_ring is not None: - interior_ring.cfa_update_file_substitutions(substitutions) +# def cfa_clear_file_substitutions( +# self, +# ): +# """Remove all of the CFA-netCDF file name substitutions. +# +# .. versionadded:: 3.15.0 +# +# :Returns: +# +# `dict` +# {{Returns cfa_clear_file_substitutions}} +# +# **Examples** +# +# >>> f.cfa_clear_file_substitutions() +# {} +# +# """ +# out = super().cfa_clear_file_substitutions() +# +# bounds = self.get_bounds(None) +# if bounds is not None: +# out.update(bounds.cfa_clear_file_substitutions()) +# +# interior_ring = self.get_interior_ring(None) +# if interior_ring is not None: +# out.update(interior_ring.cfa_clear_file_substitutions()) +# +# return out +# +# def cfa_del_file_substitution(self, base): +# """Remove a CFA-netCDF file name substitution. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# {{cfa base: `str`}} +# +# :Returns: +# +# `dict` +# {{Returns cfa_del_file_substitution}} +# +# **Examples** +# +# >>> c.cfa_del_file_substitution('base') +# +# """ +# super().cfa_del_file_substitution(base) +# +# bounds = self.get_bounds(None) +# if bounds is not None: +# bounds.cfa_del_file_substitution(base) +# +# interior_ring = self.get_interior_ring(None) +# if interior_ring is not None: +# interior_ring.cfa_del_file_substitution(base) +# +# def cfa_file_substitutions(self): +# """Return the CFA-netCDF file name substitutions. +# +# .. versionadded:: 3.15.0 +# +# :Returns: +# +# `dict` +# {{Returns cfa_file_substitutions}} +# +# **Examples** +# +# >>> c.cfa_file_substitutions() +# {} +# +# """ +# out = super().cfa_file_substitutions() +# +# bounds = self.get_bounds(None) +# if bounds is not None: +# out.update(bounds.cfa_file_substitutions({})) +# +# interior_ring = self.get_interior_ring(None) +# if interior_ring is not None: +# out.update(interior_ring.cfa_file_substitutions({})) +# +# return out +# +# def cfa_update_file_substitutions(self, substitutions): +# """Set CFA-netCDF file name substitutions. +# +# .. versionadded:: 3.15.0 +# +# :Parameters: +# +# {{cfa substitutions: `dict`}} +# +# :Returns: +# +# `None` +# +# **Examples** +# +# >>> c.cfa_add_file_substitutions({'base', '/data/model'}) +# +# """ +# super().cfa_update_file_substitutions(substitutions) +# +# bounds = self.get_bounds(None) +# if bounds is not None: +# bounds.cfa_update_file_substitutions(substitutions) +# +# interior_ring = self.get_interior_ring(None) +# if interior_ring is not None: +# interior_ring.cfa_update_file_substitutions(substitutions) def chunk(self, chunksize=None): """Partition the data array. diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 07c9132e53..158fc4d5d4 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -64,7 +64,7 @@ def read( dask_chunks="storage-aligned", store_hdf5_chunks=True, domain=False, - cfa=None, +# cfa=None, cfa_write=None, netcdf_backend=None, storage_options=None, @@ -1173,7 +1173,7 @@ def read( warn_valid=warn_valid, select=select, domain=domain, - cfa=cfa, +# cfa=cfa, cfa_write=cfa_write, netcdf_backend=netcdf_backend, storage_options=storage_options, @@ -1293,7 +1293,7 @@ def _read_a_file( store_hdf5_chunks=True, select=None, domain=False, - cfa=None, +# cfa=None, cfa_write=None, netcdf_backend=None, storage_options=None, @@ -1429,7 +1429,7 @@ def _read_a_file( dask_chunks=dask_chunks, store_hdf5_chunks=store_hdf5_chunks, cache=cache, - cfa=cfa, +# cfa=cfa, cfa_write=cfa_write, ) except MaskError: diff --git a/cf/test/test_CFA.py b/cf/test/test_CFA.py index f0bc1100c8..e13a81c77e 100644 --- a/cf/test/test_CFA.py +++ b/cf/test/test_CFA.py @@ -1,652 +1,32 @@ -import atexit -import datetime -import faulthandler -import os -import tempfile -import unittest -from pathlib import PurePath - -import netCDF4 - -faulthandler.enable() # to debug seg faults and timeouts - -import cf - -n_tmpfiles = 5 -tmpfiles = [ - tempfile.mkstemp("_test_CFA.nc", dir=os.getcwd())[1] - for i in range(n_tmpfiles) -] -( - tmpfile1, - tmpfile2, - nc_file, - cfa_file, - cfa_file2, -) = tmpfiles - - -def _remove_tmpfiles(): - """Try to remove defined temporary files by deleting their paths.""" - for f in tmpfiles: - try: - os.remove(f) - except OSError: - pass - - -atexit.register(_remove_tmpfiles) - - -class CFATest(unittest.TestCase): - """Unit test for aggregation variables.""" - - netcdf3_fmts = [ - "NETCDF3_CLASSIC", - "NETCDF3_64BIT", - "NETCDF3_64BIT_OFFSET", - "NETCDF3_64BIT_DATA", - ] - netcdf4_fmts = ["NETCDF4", "NETCDF4_CLASSIC"] - netcdf_fmts = netcdf3_fmts + netcdf4_fmts - - aggregation_value = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "aggregation_value.nc" - ) - - def test_CFA_fmt(self): - """Test the cf.read 'fmt' keyword with cfa.""" - f = cf.example_field(0) - cf.write(f, tmpfile1) - f = cf.read(tmpfile1)[0] - - for fmt in self.netcdf_fmts: - cf.write(f, cfa_file, fmt=fmt, cfa="field") - g = cf.read(cfa_file) - self.assertEqual(len(g), 1) - self.assertTrue(f.equals(g[0])) - - def test_CFA_multiple_fragments(self): - """Test aggregation variables with more than one fragment.""" - f = cf.example_field(0) - - cf.write(f[:2], tmpfile1) - cf.write(f[2:], tmpfile2) - - a = cf.read(tmpfile1)[0] - b = cf.read(tmpfile2)[0] - a = cf.Field.concatenate([a, b], axis=0) - - cf.write(a, nc_file) - cf.write(a, cfa_file, cfa="field") - - n = cf.read(nc_file) - c = cf.read(cfa_file) - self.assertEqual(len(n), 1) - self.assertEqual(len(c), 1) - self.assertTrue(c[0].equals(f)) - self.assertTrue(n[0].equals(c[0])) - - def test_CFA_strict(self): - """Test 'strict' option to the cf.write 'cfa' keyword.""" - f = cf.example_field(0) - - # By default, can't write in-memory arrays as aggregation - # variables - with self.assertRaises(ValueError): - cf.write(f, cfa_file, cfa="field") - - # The previous line should have deleted the output file - self.assertFalse(os.path.exists(cfa_file)) - - cf.write(f, nc_file, cfa={"constructs": "field", "strict": False}) - g = cf.read(nc_file) - self.assertEqual(len(g), 1) - self.assertTrue(g[0].equals(f)) - - cf.write(g, cfa_file, cfa={"constructs": "field", "strict": True}) - g = cf.read(cfa_file) - self.assertEqual(len(g), 1) - self.assertTrue(g[0].equals(f)) - - def test_CFA_substitutions_0(self): - """Test aggregation substitution URI substitutions (0).""" - f = cf.example_field(0) - cf.write(f, tmpfile1) - f = cf.read(tmpfile1)[0] - - cwd = os.getcwd() - f.data.nc_update_aggregation_substitutions({"base": cwd}) - - cf.write( - f, - cfa_file, - cfa={"constructs": "field", "uri": "absolute"}, - ) - - nc = netCDF4.Dataset(cfa_file, "r") - cfa_location = nc.variables["cfa_location"] - self.assertEqual( - cfa_location.getncattr("substitutions"), - f"${{base}}: {cwd}", - ) - self.assertEqual( - cfa_location[...], f"${{base}}/{os.path.basename(tmpfile1)}" - ) - nc.close() - - g = cf.read(cfa_file) - self.assertEqual(len(g), 1) - self.assertTrue(f.equals(g[0])) - - def test_CFA_substitutions_1(self): - """Test aggregation substitution URI substitutions (1).""" - f = cf.example_field(0) - cf.write(f, tmpfile1) - f = cf.read(tmpfile1)[0] - - cwd = os.getcwd() - for base in ("base", "${base}"): - cf.write( - f, - cfa_file, - cfa={ - "constructs": "field", - "uri": "absolute", - "substitutions": {base: cwd}, - }, - ) - - nc = netCDF4.Dataset(cfa_file, "r") - cfa_location = nc.variables["cfa_location"] - self.assertEqual( - cfa_location.getncattr("substitutions"), - f"${{base}}: {cwd}", - ) - self.assertEqual( - cfa_location[...], - f"${{base}}/{os.path.basename(tmpfile1)}", - ) - nc.close() - - g = cf.read(cfa_file) - self.assertEqual(len(g), 1) - self.assertTrue(f.equals(g[0])) - - def test_CFA_substitutions_2(self): - """Test aggregation substitution URI substitutions (2).""" - # TODOCFA: delete - tmpfile1 = "tmpfile1.nc" - - f = cf.example_field(0) - - cf.write(f, tmpfile1) - f = cf.read(tmpfile1)[0] - - cwd = os.getcwd() - basename = os.path.basename(tmpfile1) - - # TODOCFA: delete - cfa_file = "cfa_file.nc" - - f.data.nc_clear_aggregation_substitutions() - f.data.nc_update_aggregation_substitutions({"base": f"{cwd}"}) - cf.write( - f, - cfa_file, - cfa={ - "constructs": "field", - "uri": "absolute", - "substitutions": {"base2": "/bad/location"}, - }, - ) - - nc = netCDF4.Dataset(cfa_file, "r") - cfa_location = nc.variables["cfa_location"] - self.assertEqual( - cfa_location.getncattr("substitutions"), - f"${{base2}}: /bad/location ${{base}}: {cwd}", - ) - self.assertEqual(cfa_location[...], f"file://${{base}}/{basename}") - nc.close() - - g = cf.read(cfa_file) - self.assertEqual(len(g), 1) - self.assertTrue(f.equals(g[0])) - - f.data.nc_clear_aggregation_substitutions() - f.data.nc_update_aggregation_substitutions({"base": "/bad/location"}) - - cf.write( - f, - cfa_file, - cfa={ - "constructs": "field", - "uri": "absolute", - "substitutions": {"base": cwd}, - }, - ) - - nc = netCDF4.Dataset(cfa_file, "r") - cfa_location = nc.variables["cfa_location"] - self.assertEqual( - cfa_location.getncattr("substitutions"), - f"${{base}}: {cwd}", - ) - self.assertEqual(cfa_location[...], f"file://${{base}}/{basename}") - nc.close() - - g = cf.read(cfa_file) - self.assertEqual(len(g), 1) - self.assertTrue(f.equals(g[0])) - - f.data.nc_clear_aggregation_substitutions() - f.data.nc_update_aggregation_substitutions({"base2": "/bad/location"}) - - cf.write( - f, - cfa_file, - cfa={ - "constructs": "field", - "uri": "absolute", - "substitutions": {"base": cwd}, - }, - ) - - nc = netCDF4.Dataset(cfa_file, "r") - cfa_location = nc.variables["cfa_location"] - self.assertEqual( - cfa_location.getncattr("substitutions"), - f"${{base2}}: /bad/location ${{base}}: {cwd}", - ) - self.assertEqual( - cfa_location[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" - ) - nc.close() - - g = cf.read(cfa_file) - self.assertEqual(len(g), 1) - g = g[0] - self.assertTrue(f.equals(g)) - - self.assertEqual( - g.data.get_filenames(normalise=False), - set((f"file://${{base}}/{os.path.basename(tmpfile1)}",)), - ) - g.data.nc_update_aggregation_substitutions({"base": "/new/location"}) - self.assertEqual( - g.data.nc_aggregation_substitutions(), - {"${base2}": "/bad/location", "${base}": "/new/location"}, - ) - self.assertEqual( - g.data.get_filenames(normalise=False), - set((f"file://${{base}}/{os.path.basename(tmpfile1)}",)), - ) - - # TODOCFA: delete - cfa_file2 = "cfa_file2.nc" - cf.write( - g, - cfa_file2, - cfa={ - "constructs": "field", - "uri": "absolute", - }, - ) - nc = netCDF4.Dataset(cfa_file2, "r") - cfa_location = nc.variables["cfa_location"] - self.assertEqual( - cfa_location.getncattr("substitutions"), - "${base2}: /bad/location ${base}: /new/location", - ) - self.assertEqual( - cfa_location[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" - ) - nc.close() - - def test_CFA_substitutions_3(self): - """Test aggregation substitution URI substitutions (2).""" - f = cf.example_field(0) - cf.write(f, tmpfile1) - f = cf.read(tmpfile1)[0] - - cwd = os.getcwd() - basename = os.path.basename(tmpfile1) - - f.data.nc_clear_aggregation_substitutions() - f.data.nc_update_aggregation_substitutions({"base": f"{cwd}/"}) - cf.write( - f, - cfa_file, - cfa={ - "constructs": "field", - "uri": "absolute", - }, - ) - - nc = netCDF4.Dataset(cfa_file, "r") - cfa_location = nc.variables["cfa_location"] - self.assertEqual( - cfa_location.getncattr("substitutions"), f"${{base}}: {cwd}/" - ) - self.assertEqual(cfa_location[...], f"${{base}}{basename}") - nc.close() - - # TODOCFA: delete - cfa_file2 = "cfa_file2.nc" - - g = cf.read(cfa_file)[0] - self.assertTrue(f.equals(g)) - cf.write( - g, - cfa_file2, - cfa={ - "constructs": "field", - "uri": "absolute", - }, - ) - - def test_CFA_substitutions_4(self): - """Test aggregation substitution URI substitutions (2).""" - f = cf.example_field(0) - - # TODOCFA: delete - tmpfile1 = "tmpfile1.nc" - - cf.write(f, tmpfile1) - f = cf.read(tmpfile1)[0] - - cwd = os.getcwd() - basename = os.path.basename(tmpfile1) - - # TODOCFA: delete - cfa_file = "cfa_file.nc" - - replacement = f"{cwd}/" - f.data.nc_clear_aggregation_substitutions() - f.data.nc_update_aggregation_substitutions({"base": replacement}) - cf.write( - f, - cfa_file, - cfa={ - "constructs": "field", - "uri": "relative", - }, - ) - - nc = netCDF4.Dataset(cfa_file, "r") - cfa_location = nc.variables["cfa_location"] - self.assertEqual( - cfa_location.getncattr("substitutions"), - f"${{base}}: {replacement}", - ) - self.assertEqual(cfa_location[...], basename) - nc.close() - - cf.write( - f, - cfa_file, - cfa={"constructs": "field", "uri": "absolute"}, - ) - nc = netCDF4.Dataset(cfa_file, "r") - cfa_location = nc.variables["cfa_location"] - self.assertEqual( - cfa_location.getncattr("substitutions"), - f"${{base}}: {replacement}", - ) - self.assertEqual(cfa_location[...], f"file://${{base}}{basename}") - nc.close() - - def test_CFA_uri(self): - """Test aggregation 'uri' option to cf.write.""" - f = cf.example_field(0) - cf.write(f, tmpfile1) - f = cf.read(tmpfile1)[0] - - for uri, filename in zip( - ("absolute", "relative"), - ( - PurePath(os.path.abspath(tmpfile1)).as_uri(), - os.path.basename(tmpfile1), - ), - ): - cf.write( - f, - cfa_file, - cfa={"constructs": "field", "uri": uri}, - ) - - nc = netCDF4.Dataset(cfa_file, "r") - cfa_location = nc.variables["cfa_location"] - self.assertEqual(cfa_location[...], filename) - nc.close() - - g = cf.read(cfa_file) - self.assertEqual(len(g), 1) - self.assertTrue(f.equals(g[0])) - - def test_CFA_constructs(self): - """Test aggregation 'constructs' option to cf.write.""" - f = cf.example_field(1) - f.del_construct("time") - f.del_construct("long_name=Grid latitude name") - cf.write(f, tmpfile1) - f = cf.read(tmpfile1)[0] - - # No constructs - cf.write(f, tmpfile2, cfa={"constructs": []}) - nc = netCDF4.Dataset(tmpfile2, "r") - for var in nc.variables.values(): - attrs = var.ncattrs() - self.assertNotIn("aggregated_dimensions", attrs) - self.assertNotIn("aggregated_data", attrs) - - nc.close() - - # Field construct - cf.write(f, tmpfile2, cfa={"constructs": "field"}) - nc = netCDF4.Dataset(tmpfile2, "r") - for ncvar, var in nc.variables.items(): - attrs = var.ncattrs() - if ncvar in ("ta",): - self.assertFalse(var.ndim) - self.assertIn("aggregated_dimensions", attrs) - self.assertIn("aggregated_data", attrs) - else: - self.assertNotIn("aggregated_dimensions", attrs) - self.assertNotIn("aggregated_data", attrs) - - nc.close() - - # Dimension construct - for constructs in ( - "dimension_coordinate", - ["dimension_coordinate"], - {"dimension_coordinate": None}, - {"dimension_coordinate": 1}, - ): - cf.write(f, tmpfile2, cfa={"constructs": constructs}) - nc = netCDF4.Dataset(tmpfile2, "r") - for ncvar, var in nc.variables.items(): - attrs = var.ncattrs() - if ncvar in ( - "x", - "x_bnds", - "y", - "y_bnds", - "atmosphere_hybrid_height_coordinate", - "atmosphere_hybrid_height_coordinate_bounds", - ): - self.assertFalse(var.ndim) - self.assertIn("aggregated_dimensions", attrs) - self.assertIn("aggregated_data", attrs) - else: - self.assertNotIn("aggregated_dimensions", attrs) - self.assertNotIn("aggregated_data", attrs) - - nc.close() - - # Dimension and auxiliary constructs - for constructs in ( - ["dimension_coordinate", "auxiliary_coordinate"], - {"dimension_coordinate": None, "auxiliary_coordinate": 2}, - ): - cf.write(f, tmpfile2, cfa={"constructs": constructs}) - nc = netCDF4.Dataset(tmpfile2, "r") - for ncvar, var in nc.variables.items(): - attrs = var.ncattrs() - if ncvar in ( - "x", - "x_bnds", - "y", - "y_bnds", - "atmosphere_hybrid_height_coordinate", - "atmosphere_hybrid_height_coordinate_bounds", - "latitude_1", - "longitude_1", - ): - self.assertFalse(var.ndim) - self.assertIn("aggregated_dimensions", attrs) - self.assertIn("aggregated_data", attrs) - else: - self.assertNotIn("aggregated_dimensions", attrs) - self.assertNotIn("aggregated_data", attrs) - - nc.close() - - def test_CFA_multiple_files(self): - """Test storing multiple locations for the same fragment.""" - f = cf.example_field(0) - cf.write(f, tmpfile1) - f = cf.read(tmpfile1)[0] - f.add_file_directory("/new/path") - - cf.write(f, cfa_file, cfa="field") - g = cf.read(cfa_file) - self.assertEqual(len(g), 1) - g = g[0] - self.assertTrue(f.equals(g)) - - self.assertEqual(len(g.data.get_filenames()), 2) - self.assertEqual(len(g.get_filenames()), 3) - - def test_CFA_unlimited_dimension(self): - """Test aggregation files with unlimited dimensions.""" - # Aggregated dimensions cannot be unlimited - f = cf.example_field(0) - axis = f.domain_axis("longitude") - axis.nc_set_unlimited(True) - cf.write(f, tmpfile1) - g = cf.read(tmpfile1) - with self.assertRaises(ValueError): - cf.write(g, cfa_file, cfa="field") - - def test_CFA_scalar(self): - """Test scalar aggregation variable.""" - f = cf.example_field(0) - f = f[0, 0].squeeze() - cf.write(f, tmpfile1) - g = cf.read(tmpfile1)[0] - cf.write(g, cfa_file, cfa="field") - h = cf.read(cfa_file)[0] - self.assertTrue(h.equals(f)) - - def test_CFA_value(self): - """Test the value fragment array variable.""" - write = True - for aggregation_value_file in (self.aggregation_value, cfa_file): - f = cf.read(aggregation_value_file) - self.assertEqual(len(f), 1) - f = f[0] - fa = f.field_ancillary() - self.assertEqual(fa.shape, (12,)) - self.assertEqual(fa.data.chunks, ((3, 9),)) - self.assertEqual( - fa.data.nc_get_aggregation_fragment_type(), "value" - ) - self.assertEqual( - fa.data.nc_get_aggregated_data(), - {"shape": "fragment_shape_uid", "value": "fragment_value_uid"}, - ) - - nc = netCDF4.Dataset(aggregation_value_file, "r") - fragment_value_uid = nc.variables["fragment_value_uid"][...] - nc.close() - - self.assertTrue((fa[:3].array == fragment_value_uid[0]).all()) - self.assertTrue((fa[3:].array == fragment_value_uid[1]).all()) - - if write: - cf.write(f, cfa_file) - write = False - - def test_CFA_cfa(self): - """Test the cf.write 'cfa' keyword.""" - f = cf.example_field(0) - cf.write(f, tmpfile1) - f = cf.read(tmpfile1)[0] - cf.write(f, tmpfile2, cfa="field") - g = cf.read(tmpfile2)[0] - - # Default of cfa="auto" - check that aggregation variable - # gets written - cf.write(g, cfa_file) - nc = netCDF4.Dataset(cfa_file, "r") - self.assertIsNotNone( - getattr(nc.variables["q"], "aggregated_data", None) - ) - nc.close() - - cf.write(g, cfa_file, cfa={"constructs": {"auto": 2}}) - nc = netCDF4.Dataset(cfa_file, "r") - self.assertIsNotNone( - getattr(nc.variables["q"], "aggregated_data", None) - ) - nc.close() - - cf.write( - g, - cfa_file, - cfa={ - "constructs": ["auto", "dimension_coordinate"], - "strict": False, - }, - ) - nc = netCDF4.Dataset(cfa_file, "r") - for ncvar in ("q", "lat", "lon"): - self.assertIsNotNone( - getattr(nc.variables[ncvar], "aggregated_data", None) - ) - - nc.close() - - # Check bad values of cfa - for cfa in (False, True, (), []): - with self.assertRaises(ValueError): - cf.write(g, cfa_file, cfa=cfa) - - -if __name__ == "__main__": - print("Run date:", datetime.datetime.now()) - cf.environment() - print() - unittest.main(verbosity=2) - -# n_tmpfiles = 5 -# tmpfiles = [ +#import atexit +#import datetime +#import faulthandler +#import os +#import tempfile +#import unittest +#from pathlib import PurePath +# +#import netCDF4 +# +#faulthandler.enable() # to debug seg faults and timeouts +# +#import cf +# +#n_tmpfiles = 5 +#tmpfiles = [ # tempfile.mkstemp("_test_CFA.nc", dir=os.getcwd())[1] # for i in range(n_tmpfiles) -# ] -# ( +#] +#( # tmpfile1, # tmpfile2, -# tmpfile3, -# tmpfile4, -# tmpfile5, -# ) = tmpfiles +# nc_file, +# cfa_file, +# cfa_file2, +#) = tmpfiles # # -# def _remove_tmpfiles(): +#def _remove_tmpfiles(): # """Try to remove defined temporary files by deleting their paths.""" # for f in tmpfiles: # try: @@ -655,10 +35,12 @@ def test_CFA_cfa(self): # pass # # -# atexit.register(_remove_tmpfiles) +#atexit.register(_remove_tmpfiles) +# # +#class CFATest(unittest.TestCase): +# """Unit test for aggregation variables.""" # -# class CFATest(unittest.TestCase): # netcdf3_fmts = [ # "NETCDF3_CLASSIC", # "NETCDF3_64BIT", @@ -668,33 +50,35 @@ def test_CFA_cfa(self): # netcdf4_fmts = ["NETCDF4", "NETCDF4_CLASSIC"] # netcdf_fmts = netcdf3_fmts + netcdf4_fmts # +# aggregation_value = os.path.join( +# os.path.dirname(os.path.abspath(__file__)), "aggregation_value.nc" +# ) +# # def test_CFA_fmt(self): -# """Test the cf.read 'fmt' and 'cfa' keywords.""" +# """Test the cf.read 'fmt' keyword with cfa.""" # f = cf.example_field(0) # cf.write(f, tmpfile1) # f = cf.read(tmpfile1)[0] # # for fmt in self.netcdf_fmts: -# cf.write(f, tmpfile2, fmt=fmt, cfa=True) -# g = cf.read(tmpfile2) +# cf.write(f, cfa_file, fmt=fmt, cfa="field") +# g = cf.read(cfa_file) # self.assertEqual(len(g), 1) # self.assertTrue(f.equals(g[0])) # # def test_CFA_multiple_fragments(self): -# """Test CFA with more than one fragment.""" +# """Test aggregation variables with more than one fragment.""" # f = cf.example_field(0) # # cf.write(f[:2], tmpfile1) # cf.write(f[2:], tmpfile2) # -# a = cf.read([tmpfile1, tmpfile2]) -# self.assertEqual(len(a), 1) -# a = a[0] +# a = cf.read(tmpfile1)[0] +# b = cf.read(tmpfile2)[0] +# a = cf.Field.concatenate([a, b], axis=0) # -# nc_file = tmpfile3 -# cfa_file = tmpfile4 # cf.write(a, nc_file) -# cf.write(a, cfa_file, cfa=True) +# cf.write(a, cfa_file, cfa="field") # # n = cf.read(nc_file) # c = cf.read(cfa_file) @@ -704,108 +88,59 @@ def test_CFA_cfa(self): # self.assertTrue(n[0].equals(c[0])) # # def test_CFA_strict(self): -# """Test CFA 'strict' option to the cfa.write 'cfa' keyword.""" +# """Test 'strict' option to the cf.write 'cfa' keyword.""" # f = cf.example_field(0) # -# # By default, can't write as CF-netCDF those variables -# # selected for CFA treatment, but which aren't suitable. +# # By default, can't write in-memory arrays as aggregation +# # variables # with self.assertRaises(ValueError): -# cf.write(f, tmpfile1, cfa=True) +# cf.write(f, cfa_file, cfa="field") # # # The previous line should have deleted the output file -# self.assertFalse(os.path.exists(tmpfile1)) +# self.assertFalse(os.path.exists(cfa_file)) # -# cf.write(f, tmpfile1, cfa={"strict": False}) -# g = cf.read(tmpfile1) +# cf.write(f, nc_file, cfa={"constructs": "field", "strict": False}) +# g = cf.read(nc_file) # self.assertEqual(len(g), 1) # self.assertTrue(g[0].equals(f)) # -# cf.write(g, tmpfile2, cfa={"strict": True}) -# g = cf.read(tmpfile2) +# cf.write(g, cfa_file, cfa={"constructs": "field", "strict": True}) +# g = cf.read(cfa_file) # self.assertEqual(len(g), 1) # self.assertTrue(g[0].equals(f)) # -# def test_CFA_field_ancillaries(self): -# """Test creation of field ancillaries from non-standard CFA terms.""" -# f = cf.example_field(0) -# self.assertFalse(f.field_ancillaries()) -# -# a = f[:2] -# b = f[2:] -# a.set_property("foo", "bar_a") -# b.set_property("foo", "bar_b") -# cf.write(a, tmpfile1) -# cf.write(b, tmpfile2) -# -# c = cf.read( -# [tmpfile1, tmpfile2], aggregate={"field_ancillaries": "foo"} -# ) -# self.assertEqual(len(c), 1) -# c = c[0] -# self.assertEqual(len(c.field_ancillaries()), 1) -# anc = c.field_ancillary() -# self.assertTrue(anc.data.cfa_get_term()) -# self.assertFalse(anc.data.cfa_get_write()) -# -# cf.write(c, tmpfile3, cfa=False) -# c2 = cf.read(tmpfile3) -# self.assertEqual(len(c2), 1) -# self.assertFalse(c2[0].field_ancillaries()) -# -# cf.write(c, tmpfile4, cfa=True) -# d = cf.read(tmpfile4) -# self.assertEqual(len(d), 1) -# d = d[0] -# -# self.assertEqual(len(d.field_ancillaries()), 1) -# anc = d.field_ancillary() -# self.assertTrue(anc.data.cfa_get_term()) -# self.assertFalse(anc.data.cfa_get_write()) -# self.assertTrue(d.equals(c)) -# -# cf.write(d, tmpfile5, cfa=False) -# e = cf.read(tmpfile5) -# self.assertEqual(len(e), 1) -# self.assertFalse(e[0].field_ancillaries()) -# -# cf.write(d, tmpfile5, cfa=True) -# e = cf.read(tmpfile5) -# self.assertEqual(len(e), 1) -# self.assertTrue(e[0].equals(d)) -# # def test_CFA_substitutions_0(self): -# """Test CFA substitution URI substitutions (0).""" +# """Test aggregation substitution URI substitutions (0).""" # f = cf.example_field(0) # cf.write(f, tmpfile1) # f = cf.read(tmpfile1)[0] # # cwd = os.getcwd() -# -# f.data.cfa_update_file_substitutions({"base": cwd}) +# f.data.nc_update_aggregation_substitutions({"base": cwd}) # # cf.write( # f, -# tmpfile2, -# cfa={"absolute_paths": True}, +# cfa_file, +# cfa={"constructs": "field", "uri": "absolute"}, # ) # -# nc = netCDF4.Dataset(tmpfile2, "r") -# cfa_file = nc.variables["cfa_file"] +# nc = netCDF4.Dataset(cfa_file, "r") +# cfa_location = nc.variables["cfa_location"] # self.assertEqual( -# cfa_file.getncattr("substitutions"), +# cfa_location.getncattr("substitutions"), # f"${{base}}: {cwd}", # ) # self.assertEqual( -# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +# cfa_location[...], f"${{base}}/{os.path.basename(tmpfile1)}" # ) # nc.close() # -# g = cf.read(tmpfile2) +# g = cf.read(cfa_file) # self.assertEqual(len(g), 1) # self.assertTrue(f.equals(g[0])) # # def test_CFA_substitutions_1(self): -# """Test CFA substitution URI substitutions (1).""" +# """Test aggregation substitution URI substitutions (1).""" # f = cf.example_field(0) # cf.write(f, tmpfile1) # f = cf.read(tmpfile1)[0] @@ -814,136 +149,286 @@ def test_CFA_cfa(self): # for base in ("base", "${base}"): # cf.write( # f, -# tmpfile2, -# cfa={"absolute_paths": True, "substitutions": {base: cwd}}, +# cfa_file, +# cfa={ +# "constructs": "field", +# "uri": "absolute", +# "substitutions": {base: cwd}, +# }, # ) # -# nc = netCDF4.Dataset(tmpfile2, "r") -# cfa_file = nc.variables["cfa_file"] +# nc = netCDF4.Dataset(cfa_file, "r") +# cfa_location = nc.variables["cfa_location"] # self.assertEqual( -# cfa_file.getncattr("substitutions"), +# cfa_location.getncattr("substitutions"), # f"${{base}}: {cwd}", # ) # self.assertEqual( -# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +# cfa_location[...], +# f"${{base}}/{os.path.basename(tmpfile1)}", # ) # nc.close() # -# g = cf.read(tmpfile2) +# g = cf.read(cfa_file) # self.assertEqual(len(g), 1) # self.assertTrue(f.equals(g[0])) # # def test_CFA_substitutions_2(self): -# """Test CFA substitution URI substitutions (2).""" +# """Test aggregation substitution URI substitutions (2).""" +# # TODOCFA: delete +# tmpfile1 = "tmpfile1.nc" +# # f = cf.example_field(0) +# # cf.write(f, tmpfile1) # f = cf.read(tmpfile1)[0] # # cwd = os.getcwd() +# basename = os.path.basename(tmpfile1) # -# f.data.cfa_clear_file_substitutions() -# f.data.cfa_update_file_substitutions({"base": cwd}) +# # TODOCFA: delete +# cfa_file = "cfa_file.nc" # +# f.data.nc_clear_aggregation_substitutions() +# f.data.nc_update_aggregation_substitutions({"base": f"{cwd}"}) # cf.write( # f, -# tmpfile2, +# cfa_file, # cfa={ -# "absolute_paths": True, +# "constructs": "field", +# "uri": "absolute", # "substitutions": {"base2": "/bad/location"}, # }, # ) # -# nc = netCDF4.Dataset(tmpfile2, "r") -# cfa_file = nc.variables["cfa_file"] +# nc = netCDF4.Dataset(cfa_file, "r") +# cfa_location = nc.variables["cfa_location"] # self.assertEqual( -# cfa_file.getncattr("substitutions"), +# cfa_location.getncattr("substitutions"), # f"${{base2}}: /bad/location ${{base}}: {cwd}", # ) -# self.assertEqual( -# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" -# ) +# self.assertEqual(cfa_location[...], f"file://${{base}}/{basename}") # nc.close() # -# g = cf.read(tmpfile2) +# g = cf.read(cfa_file) # self.assertEqual(len(g), 1) # self.assertTrue(f.equals(g[0])) # -# f.data.cfa_clear_file_substitutions() -# f.data.cfa_update_file_substitutions({"base": "/bad/location"}) +# f.data.nc_clear_aggregation_substitutions() +# f.data.nc_update_aggregation_substitutions({"base": "/bad/location"}) # # cf.write( # f, -# tmpfile2, -# cfa={"absolute_paths": True, "substitutions": {"base": cwd}}, +# cfa_file, +# cfa={ +# "constructs": "field", +# "uri": "absolute", +# "substitutions": {"base": cwd}, +# }, # ) # -# nc = netCDF4.Dataset(tmpfile2, "r") -# cfa_file = nc.variables["cfa_file"] +# nc = netCDF4.Dataset(cfa_file, "r") +# cfa_location = nc.variables["cfa_location"] # self.assertEqual( -# cfa_file.getncattr("substitutions"), +# cfa_location.getncattr("substitutions"), # f"${{base}}: {cwd}", # ) -# self.assertEqual( -# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" -# ) +# self.assertEqual(cfa_location[...], f"file://${{base}}/{basename}") # nc.close() # -# g = cf.read(tmpfile2) +# g = cf.read(cfa_file) # self.assertEqual(len(g), 1) # self.assertTrue(f.equals(g[0])) # -# f.data.cfa_clear_file_substitutions() -# f.data.cfa_update_file_substitutions({"base2": "/bad/location"}) +# f.data.nc_clear_aggregation_substitutions() +# f.data.nc_update_aggregation_substitutions({"base2": "/bad/location"}) # # cf.write( # f, -# tmpfile2, -# cfa={"absolute_paths": True, "substitutions": {"base": cwd}}, +# cfa_file, +# cfa={ +# "constructs": "field", +# "uri": "absolute", +# "substitutions": {"base": cwd}, +# }, # ) # -# nc = netCDF4.Dataset(tmpfile2, "r") -# cfa_file = nc.variables["cfa_file"] +# nc = netCDF4.Dataset(cfa_file, "r") +# cfa_location = nc.variables["cfa_location"] # self.assertEqual( -# cfa_file.getncattr("substitutions"), +# cfa_location.getncattr("substitutions"), # f"${{base2}}: /bad/location ${{base}}: {cwd}", # ) # self.assertEqual( -# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +# cfa_location[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" # ) # nc.close() # -# g = cf.read(tmpfile2) +# g = cf.read(cfa_file) # self.assertEqual(len(g), 1) -# self.assertTrue(f.equals(g[0])) +# g = g[0] +# self.assertTrue(f.equals(g)) +# +# self.assertEqual( +# g.data.get_filenames(normalise=False), +# set((f"file://${{base}}/{os.path.basename(tmpfile1)}",)), +# ) +# g.data.nc_update_aggregation_substitutions({"base": "/new/location"}) +# self.assertEqual( +# g.data.nc_aggregation_substitutions(), +# {"${base2}": "/bad/location", "${base}": "/new/location"}, +# ) +# self.assertEqual( +# g.data.get_filenames(normalise=False), +# set((f"file://${{base}}/{os.path.basename(tmpfile1)}",)), +# ) +# +# # TODOCFA: delete +# cfa_file2 = "cfa_file2.nc" +# cf.write( +# g, +# cfa_file2, +# cfa={ +# "constructs": "field", +# "uri": "absolute", +# }, +# ) +# nc = netCDF4.Dataset(cfa_file2, "r") +# cfa_location = nc.variables["cfa_location"] +# self.assertEqual( +# cfa_location.getncattr("substitutions"), +# "${base2}: /bad/location ${base}: /new/location", +# ) +# self.assertEqual( +# cfa_location[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +# ) +# nc.close() +# +# def test_CFA_substitutions_3(self): +# """Test aggregation substitution URI substitutions (2).""" +# f = cf.example_field(0) +# cf.write(f, tmpfile1) +# f = cf.read(tmpfile1)[0] +# +# cwd = os.getcwd() +# basename = os.path.basename(tmpfile1) +# +# f.data.nc_clear_aggregation_substitutions() +# f.data.nc_update_aggregation_substitutions({"base": f"{cwd}/"}) +# cf.write( +# f, +# cfa_file, +# cfa={ +# "constructs": "field", +# "uri": "absolute", +# }, +# ) +# +# nc = netCDF4.Dataset(cfa_file, "r") +# cfa_location = nc.variables["cfa_location"] +# self.assertEqual( +# cfa_location.getncattr("substitutions"), f"${{base}}: {cwd}/" +# ) +# self.assertEqual(cfa_location[...], f"${{base}}{basename}") +# nc.close() +# +# # TODOCFA: delete +# cfa_file2 = "cfa_file2.nc" +# +# g = cf.read(cfa_file)[0] +# self.assertTrue(f.equals(g)) +# cf.write( +# g, +# cfa_file2, +# cfa={ +# "constructs": "field", +# "uri": "absolute", +# }, +# ) +# +# def test_CFA_substitutions_4(self): +# """Test aggregation substitution URI substitutions (2).""" +# f = cf.example_field(0) # -# def test_CFA_absolute_paths(self): -# """Test CFA 'absolute_paths' option to the cfa.write 'cfa' keyword.""" +# # TODOCFA: delete +# tmpfile1 = "tmpfile1.nc" +# +# cf.write(f, tmpfile1) +# f = cf.read(tmpfile1)[0] +# +# cwd = os.getcwd() +# basename = os.path.basename(tmpfile1) +# +# # TODOCFA: delete +# cfa_file = "cfa_file.nc" +# +# replacement = f"{cwd}/" +# f.data.nc_clear_aggregation_substitutions() +# f.data.nc_update_aggregation_substitutions({"base": replacement}) +# cf.write( +# f, +# cfa_file, +# cfa={ +# "constructs": "field", +# "uri": "relative", +# }, +# ) +# +# nc = netCDF4.Dataset(cfa_file, "r") +# cfa_location = nc.variables["cfa_location"] +# self.assertEqual( +# cfa_location.getncattr("substitutions"), +# f"${{base}}: {replacement}", +# ) +# self.assertEqual(cfa_location[...], basename) +# nc.close() +# +# cf.write( +# f, +# cfa_file, +# cfa={"constructs": "field", "uri": "absolute"}, +# ) +# nc = netCDF4.Dataset(cfa_file, "r") +# cfa_location = nc.variables["cfa_location"] +# self.assertEqual( +# cfa_location.getncattr("substitutions"), +# f"${{base}}: {replacement}", +# ) +# self.assertEqual(cfa_location[...], f"file://${{base}}{basename}") +# nc.close() +# +# def test_CFA_uri(self): +# """Test aggregation 'uri' option to cf.write.""" # f = cf.example_field(0) # cf.write(f, tmpfile1) # f = cf.read(tmpfile1)[0] # -# for absolute_paths, filename in zip( -# (True, False), +# for uri, filename in zip( +# ("absolute", "relative"), # ( # PurePath(os.path.abspath(tmpfile1)).as_uri(), # os.path.basename(tmpfile1), # ), # ): -# cf.write(f, tmpfile2, cfa={"absolute_paths": absolute_paths}) +# cf.write( +# f, +# cfa_file, +# cfa={"constructs": "field", "uri": uri}, +# ) # -# nc = netCDF4.Dataset(tmpfile2, "r") -# cfa_file = nc.variables["cfa_file"] -# self.assertEqual(cfa_file[...], filename) +# nc = netCDF4.Dataset(cfa_file, "r") +# cfa_location = nc.variables["cfa_location"] +# self.assertEqual(cfa_location[...], filename) # nc.close() # -# g = cf.read(tmpfile2) +# g = cf.read(cfa_file) # self.assertEqual(len(g), 1) # self.assertTrue(f.equals(g[0])) # # def test_CFA_constructs(self): -# """Test choice of constructs to write as CFA-netCDF variables.""" +# """Test aggregation 'constructs' option to cf.write.""" # f = cf.example_field(1) -# f.del_construct("T") +# f.del_construct("time") # f.del_construct("long_name=Grid latitude name") # cf.write(f, tmpfile1) # f = cf.read(tmpfile1)[0] @@ -979,7 +464,6 @@ def test_CFA_cfa(self): # ["dimension_coordinate"], # {"dimension_coordinate": None}, # {"dimension_coordinate": 1}, -# {"dimension_coordinate": cf.eq(1)}, # ): # cf.write(f, tmpfile2, cfa={"constructs": constructs}) # nc = netCDF4.Dataset(tmpfile2, "r") @@ -1005,7 +489,7 @@ def test_CFA_cfa(self): # # Dimension and auxiliary constructs # for constructs in ( # ["dimension_coordinate", "auxiliary_coordinate"], -# {"dimension_coordinate": None, "auxiliary_coordinate": cf.ge(2)}, +# {"dimension_coordinate": None, "auxiliary_coordinate": 2}, # ): # cf.write(f, tmpfile2, cfa={"constructs": constructs}) # nc = netCDF4.Dataset(tmpfile2, "r") @@ -1030,38 +514,15 @@ def test_CFA_cfa(self): # # nc.close() # -# def test_CFA_PP(self): -# """Test writing CFA-netCDF with PP format fragments.""" -# f = cf.read("file1.pp")[0] -# cf.write(f, tmpfile1, cfa=True) -# -# # Check that only the fields have been aggregated -# nc = netCDF4.Dataset(tmpfile1, "r") -# for ncvar, var in nc.variables.items(): -# attrs = var.ncattrs() -# if ncvar in ("UM_m01s15i201_vn405",): -# self.assertFalse(var.ndim) -# self.assertIn("aggregated_dimensions", attrs) -# self.assertIn("aggregated_data", attrs) -# else: -# self.assertNotIn("aggregated_dimensions", attrs) -# self.assertNotIn("aggregated_data", attrs) -# -# nc.close() -# -# g = cf.read(tmpfile1) -# self.assertEqual(len(g), 1) -# self.assertTrue(f.equals(g[0])) -# # def test_CFA_multiple_files(self): -# """Test storing multiple CFA frgament locations.""" +# """Test storing multiple locations for the same fragment.""" # f = cf.example_field(0) # cf.write(f, tmpfile1) # f = cf.read(tmpfile1)[0] -# f.add_file_location("/new/location") +# f.add_file_directory("/new/path") # -# cf.write(f, tmpfile2, cfa=True) -# g = cf.read(tmpfile2) +# cf.write(f, cfa_file, cfa="field") +# g = cf.read(cfa_file) # self.assertEqual(len(g), 1) # g = g[0] # self.assertTrue(f.equals(g)) @@ -1070,24 +531,563 @@ def test_CFA_cfa(self): # self.assertEqual(len(g.get_filenames()), 3) # # def test_CFA_unlimited_dimension(self): -# """Test CFA with unlimited dimensions""" -# # Create a CFA file from a field that has an unlimited -# # dimension and no metadata constructs spanning that dimension +# """Test aggregation files with unlimited dimensions.""" +# # Aggregated dimensions cannot be unlimited # f = cf.example_field(0) -# d = f.domain_axis("X") -# d.nc_set_unlimited(True) -# f.del_construct("X") +# axis = f.domain_axis("longitude") +# axis.nc_set_unlimited(True) # cf.write(f, tmpfile1) # g = cf.read(tmpfile1) -# cf.write(g, tmpfile2, cfa=True) +# with self.assertRaises(ValueError): +# cf.write(g, cfa_file, cfa="field") # -# # Check that the CFA file can be read -# h = cf.read(tmpfile2) -# self.assertEqual(len(h), 1) +# def test_CFA_scalar(self): +# """Test scalar aggregation variable.""" +# f = cf.example_field(0) +# f = f[0, 0].squeeze() +# cf.write(f, tmpfile1) +# g = cf.read(tmpfile1)[0] +# cf.write(g, cfa_file, cfa="field") +# h = cf.read(cfa_file)[0] +# self.assertTrue(h.equals(f)) +# +# def test_CFA_value(self): +# """Test the value fragment array variable.""" +# write = True +# for aggregation_value_file in (self.aggregation_value, cfa_file): +# f = cf.read(aggregation_value_file) +# self.assertEqual(len(f), 1) +# f = f[0] +# fa = f.field_ancillary() +# self.assertEqual(fa.shape, (12,)) +# self.assertEqual(fa.data.chunks, ((3, 9),)) +# self.assertEqual( +# fa.data.nc_get_aggregation_fragment_type(), "value" +# ) +# self.assertEqual( +# fa.data.nc_get_aggregated_data(), +# {"shape": "fragment_shape_uid", "value": "fragment_value_uid"}, +# ) # +# nc = netCDF4.Dataset(aggregation_value_file, "r") +# fragment_value_uid = nc.variables["fragment_value_uid"][...] +# nc.close() # -# if __name__ == "__main__": +# self.assertTrue((fa[:3].array == fragment_value_uid[0]).all()) +# self.assertTrue((fa[3:].array == fragment_value_uid[1]).all()) +# +# if write: +# cf.write(f, cfa_file) +# write = False +# +# def test_CFA_cfa(self): +# """Test the cf.write 'cfa' keyword.""" +# f = cf.example_field(0) +# cf.write(f, tmpfile1) +# f = cf.read(tmpfile1)[0] +# cf.write(f, tmpfile2, cfa="field") +# g = cf.read(tmpfile2)[0] +# +# # Default of cfa="auto" - check that aggregation variable +# # gets written +# cf.write(g, cfa_file) +# nc = netCDF4.Dataset(cfa_file, "r") +# self.assertIsNotNone( +# getattr(nc.variables["q"], "aggregated_data", None) +# ) +# nc.close() +# +# cf.write(g, cfa_file, cfa={"constructs": {"auto": 2}}) +# nc = netCDF4.Dataset(cfa_file, "r") +# self.assertIsNotNone( +# getattr(nc.variables["q"], "aggregated_data", None) +# ) +# nc.close() +# +# cf.write( +# g, +# cfa_file, +# cfa={ +# "constructs": ["auto", "dimension_coordinate"], +# "strict": False, +# }, +# ) +# nc = netCDF4.Dataset(cfa_file, "r") +# for ncvar in ("q", "lat", "lon"): +# self.assertIsNotNone( +# getattr(nc.variables[ncvar], "aggregated_data", None) +# ) +# +# nc.close() +# +# # Check bad values of cfa +# for cfa in (False, True, (), []): +# with self.assertRaises(ValueError): +# cf.write(g, cfa_file, cfa=cfa) +# +# +#if __name__ == "__main__": # print("Run date:", datetime.datetime.now()) # cf.environment() # print() # unittest.main(verbosity=2) +# +## n_tmpfiles = 5 +## tmpfiles = [ +## tempfile.mkstemp("_test_CFA.nc", dir=os.getcwd())[1] +## for i in range(n_tmpfiles) +## ] +## ( +## tmpfile1, +## tmpfile2, +## tmpfile3, +## tmpfile4, +## tmpfile5, +## ) = tmpfiles +## +## +## def _remove_tmpfiles(): +## """Try to remove defined temporary files by deleting their paths.""" +## for f in tmpfiles: +## try: +## os.remove(f) +## except OSError: +## pass +## +## +## atexit.register(_remove_tmpfiles) +## +## +## class CFATest(unittest.TestCase): +## netcdf3_fmts = [ +## "NETCDF3_CLASSIC", +## "NETCDF3_64BIT", +## "NETCDF3_64BIT_OFFSET", +## "NETCDF3_64BIT_DATA", +## ] +## netcdf4_fmts = ["NETCDF4", "NETCDF4_CLASSIC"] +## netcdf_fmts = netcdf3_fmts + netcdf4_fmts +## +## def test_CFA_fmt(self): +## """Test the cf.read 'fmt' and 'cfa' keywords.""" +## f = cf.example_field(0) +## cf.write(f, tmpfile1) +## f = cf.read(tmpfile1)[0] +## +## for fmt in self.netcdf_fmts: +## cf.write(f, tmpfile2, fmt=fmt, cfa=True) +## g = cf.read(tmpfile2) +## self.assertEqual(len(g), 1) +## self.assertTrue(f.equals(g[0])) +## +## def test_CFA_multiple_fragments(self): +## """Test CFA with more than one fragment.""" +## f = cf.example_field(0) +## +## cf.write(f[:2], tmpfile1) +## cf.write(f[2:], tmpfile2) +## +## a = cf.read([tmpfile1, tmpfile2]) +## self.assertEqual(len(a), 1) +## a = a[0] +## +## nc_file = tmpfile3 +## cfa_file = tmpfile4 +## cf.write(a, nc_file) +## cf.write(a, cfa_file, cfa=True) +## +## n = cf.read(nc_file) +## c = cf.read(cfa_file) +## self.assertEqual(len(n), 1) +## self.assertEqual(len(c), 1) +## self.assertTrue(c[0].equals(f)) +## self.assertTrue(n[0].equals(c[0])) +## +## def test_CFA_strict(self): +## """Test CFA 'strict' option to the cfa.write 'cfa' keyword.""" +## f = cf.example_field(0) +## +## # By default, can't write as CF-netCDF those variables +## # selected for CFA treatment, but which aren't suitable. +## with self.assertRaises(ValueError): +## cf.write(f, tmpfile1, cfa=True) +## +## # The previous line should have deleted the output file +## self.assertFalse(os.path.exists(tmpfile1)) +## +## cf.write(f, tmpfile1, cfa={"strict": False}) +## g = cf.read(tmpfile1) +## self.assertEqual(len(g), 1) +## self.assertTrue(g[0].equals(f)) +## +## cf.write(g, tmpfile2, cfa={"strict": True}) +## g = cf.read(tmpfile2) +## self.assertEqual(len(g), 1) +## self.assertTrue(g[0].equals(f)) +## +## def test_CFA_field_ancillaries(self): +## """Test creation of field ancillaries from non-standard CFA terms.""" +## f = cf.example_field(0) +## self.assertFalse(f.field_ancillaries()) +## +## a = f[:2] +## b = f[2:] +## a.set_property("foo", "bar_a") +## b.set_property("foo", "bar_b") +## cf.write(a, tmpfile1) +## cf.write(b, tmpfile2) +## +## c = cf.read( +## [tmpfile1, tmpfile2], aggregate={"field_ancillaries": "foo"} +## ) +## self.assertEqual(len(c), 1) +## c = c[0] +## self.assertEqual(len(c.field_ancillaries()), 1) +## anc = c.field_ancillary() +## self.assertTrue(anc.data.cfa_get_term()) +## self.assertFalse(anc.data.cfa_get_write()) +## +## cf.write(c, tmpfile3, cfa=False) +## c2 = cf.read(tmpfile3) +## self.assertEqual(len(c2), 1) +## self.assertFalse(c2[0].field_ancillaries()) +## +## cf.write(c, tmpfile4, cfa=True) +## d = cf.read(tmpfile4) +## self.assertEqual(len(d), 1) +## d = d[0] +## +## self.assertEqual(len(d.field_ancillaries()), 1) +## anc = d.field_ancillary() +## self.assertTrue(anc.data.cfa_get_term()) +## self.assertFalse(anc.data.cfa_get_write()) +## self.assertTrue(d.equals(c)) +## +## cf.write(d, tmpfile5, cfa=False) +## e = cf.read(tmpfile5) +## self.assertEqual(len(e), 1) +## self.assertFalse(e[0].field_ancillaries()) +## +## cf.write(d, tmpfile5, cfa=True) +## e = cf.read(tmpfile5) +## self.assertEqual(len(e), 1) +## self.assertTrue(e[0].equals(d)) +## +## def test_CFA_substitutions_0(self): +## """Test CFA substitution URI substitutions (0).""" +## f = cf.example_field(0) +## cf.write(f, tmpfile1) +## f = cf.read(tmpfile1)[0] +## +## cwd = os.getcwd() +## +## f.data.cfa_update_file_substitutions({"base": cwd}) +## +## cf.write( +## f, +## tmpfile2, +## cfa={"absolute_paths": True}, +## ) +## +## nc = netCDF4.Dataset(tmpfile2, "r") +## cfa_file = nc.variables["cfa_file"] +## self.assertEqual( +## cfa_file.getncattr("substitutions"), +## f"${{base}}: {cwd}", +## ) +## self.assertEqual( +## cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +## ) +## nc.close() +## +## g = cf.read(tmpfile2) +## self.assertEqual(len(g), 1) +## self.assertTrue(f.equals(g[0])) +## +## def test_CFA_substitutions_1(self): +## """Test CFA substitution URI substitutions (1).""" +## f = cf.example_field(0) +## cf.write(f, tmpfile1) +## f = cf.read(tmpfile1)[0] +## +## cwd = os.getcwd() +## for base in ("base", "${base}"): +## cf.write( +## f, +## tmpfile2, +## cfa={"absolute_paths": True, "substitutions": {base: cwd}}, +## ) +## +## nc = netCDF4.Dataset(tmpfile2, "r") +## cfa_file = nc.variables["cfa_file"] +## self.assertEqual( +## cfa_file.getncattr("substitutions"), +## f"${{base}}: {cwd}", +## ) +## self.assertEqual( +## cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +## ) +## nc.close() +## +## g = cf.read(tmpfile2) +## self.assertEqual(len(g), 1) +## self.assertTrue(f.equals(g[0])) +## +## def test_CFA_substitutions_2(self): +## """Test CFA substitution URI substitutions (2).""" +## f = cf.example_field(0) +## cf.write(f, tmpfile1) +## f = cf.read(tmpfile1)[0] +## +## cwd = os.getcwd() +## +## f.data.cfa_clear_file_substitutions() +## f.data.cfa_update_file_substitutions({"base": cwd}) +## +## cf.write( +## f, +## tmpfile2, +## cfa={ +## "absolute_paths": True, +## "substitutions": {"base2": "/bad/location"}, +## }, +## ) +## +## nc = netCDF4.Dataset(tmpfile2, "r") +## cfa_file = nc.variables["cfa_file"] +## self.assertEqual( +## cfa_file.getncattr("substitutions"), +## f"${{base2}}: /bad/location ${{base}}: {cwd}", +## ) +## self.assertEqual( +## cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +## ) +## nc.close() +## +## g = cf.read(tmpfile2) +## self.assertEqual(len(g), 1) +## self.assertTrue(f.equals(g[0])) +## +## f.data.cfa_clear_file_substitutions() +## f.data.cfa_update_file_substitutions({"base": "/bad/location"}) +## +## cf.write( +## f, +## tmpfile2, +## cfa={"absolute_paths": True, "substitutions": {"base": cwd}}, +## ) +## +## nc = netCDF4.Dataset(tmpfile2, "r") +## cfa_file = nc.variables["cfa_file"] +## self.assertEqual( +## cfa_file.getncattr("substitutions"), +## f"${{base}}: {cwd}", +## ) +## self.assertEqual( +## cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +## ) +## nc.close() +## +## g = cf.read(tmpfile2) +## self.assertEqual(len(g), 1) +## self.assertTrue(f.equals(g[0])) +## +## f.data.cfa_clear_file_substitutions() +## f.data.cfa_update_file_substitutions({"base2": "/bad/location"}) +## +## cf.write( +## f, +## tmpfile2, +## cfa={"absolute_paths": True, "substitutions": {"base": cwd}}, +## ) +## +## nc = netCDF4.Dataset(tmpfile2, "r") +## cfa_file = nc.variables["cfa_file"] +## self.assertEqual( +## cfa_file.getncattr("substitutions"), +## f"${{base2}}: /bad/location ${{base}}: {cwd}", +## ) +## self.assertEqual( +## cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +## ) +## nc.close() +## +## g = cf.read(tmpfile2) +## self.assertEqual(len(g), 1) +## self.assertTrue(f.equals(g[0])) +## +## def test_CFA_absolute_paths(self): +## """Test CFA 'absolute_paths' option to the cfa.write 'cfa' keyword.""" +## f = cf.example_field(0) +## cf.write(f, tmpfile1) +## f = cf.read(tmpfile1)[0] +## +## for absolute_paths, filename in zip( +## (True, False), +## ( +## PurePath(os.path.abspath(tmpfile1)).as_uri(), +## os.path.basename(tmpfile1), +## ), +## ): +## cf.write(f, tmpfile2, cfa={"absolute_paths": absolute_paths}) +## +## nc = netCDF4.Dataset(tmpfile2, "r") +## cfa_file = nc.variables["cfa_file"] +## self.assertEqual(cfa_file[...], filename) +## nc.close() +## +## g = cf.read(tmpfile2) +## self.assertEqual(len(g), 1) +## self.assertTrue(f.equals(g[0])) +## +## def test_CFA_constructs(self): +## """Test choice of constructs to write as CFA-netCDF variables.""" +## f = cf.example_field(1) +## f.del_construct("T") +## f.del_construct("long_name=Grid latitude name") +## cf.write(f, tmpfile1) +## f = cf.read(tmpfile1)[0] +## +## # No constructs +## cf.write(f, tmpfile2, cfa={"constructs": []}) +## nc = netCDF4.Dataset(tmpfile2, "r") +## for var in nc.variables.values(): +## attrs = var.ncattrs() +## self.assertNotIn("aggregated_dimensions", attrs) +## self.assertNotIn("aggregated_data", attrs) +## +## nc.close() +## +## # Field construct +## cf.write(f, tmpfile2, cfa={"constructs": "field"}) +## nc = netCDF4.Dataset(tmpfile2, "r") +## for ncvar, var in nc.variables.items(): +## attrs = var.ncattrs() +## if ncvar in ("ta",): +## self.assertFalse(var.ndim) +## self.assertIn("aggregated_dimensions", attrs) +## self.assertIn("aggregated_data", attrs) +## else: +## self.assertNotIn("aggregated_dimensions", attrs) +## self.assertNotIn("aggregated_data", attrs) +## +## nc.close() +## +## # Dimension construct +## for constructs in ( +## "dimension_coordinate", +## ["dimension_coordinate"], +## {"dimension_coordinate": None}, +## {"dimension_coordinate": 1}, +## {"dimension_coordinate": cf.eq(1)}, +## ): +## cf.write(f, tmpfile2, cfa={"constructs": constructs}) +## nc = netCDF4.Dataset(tmpfile2, "r") +## for ncvar, var in nc.variables.items(): +## attrs = var.ncattrs() +## if ncvar in ( +## "x", +## "x_bnds", +## "y", +## "y_bnds", +## "atmosphere_hybrid_height_coordinate", +## "atmosphere_hybrid_height_coordinate_bounds", +## ): +## self.assertFalse(var.ndim) +## self.assertIn("aggregated_dimensions", attrs) +## self.assertIn("aggregated_data", attrs) +## else: +## self.assertNotIn("aggregated_dimensions", attrs) +## self.assertNotIn("aggregated_data", attrs) +## +## nc.close() +## +## # Dimension and auxiliary constructs +## for constructs in ( +## ["dimension_coordinate", "auxiliary_coordinate"], +## {"dimension_coordinate": None, "auxiliary_coordinate": cf.ge(2)}, +## ): +## cf.write(f, tmpfile2, cfa={"constructs": constructs}) +## nc = netCDF4.Dataset(tmpfile2, "r") +## for ncvar, var in nc.variables.items(): +## attrs = var.ncattrs() +## if ncvar in ( +## "x", +## "x_bnds", +## "y", +## "y_bnds", +## "atmosphere_hybrid_height_coordinate", +## "atmosphere_hybrid_height_coordinate_bounds", +## "latitude_1", +## "longitude_1", +## ): +## self.assertFalse(var.ndim) +## self.assertIn("aggregated_dimensions", attrs) +## self.assertIn("aggregated_data", attrs) +## else: +## self.assertNotIn("aggregated_dimensions", attrs) +## self.assertNotIn("aggregated_data", attrs) +## +## nc.close() +## +## def test_CFA_PP(self): +## """Test writing CFA-netCDF with PP format fragments.""" +## f = cf.read("file1.pp")[0] +## cf.write(f, tmpfile1, cfa=True) +## +## # Check that only the fields have been aggregated +## nc = netCDF4.Dataset(tmpfile1, "r") +## for ncvar, var in nc.variables.items(): +## attrs = var.ncattrs() +## if ncvar in ("UM_m01s15i201_vn405",): +## self.assertFalse(var.ndim) +## self.assertIn("aggregated_dimensions", attrs) +## self.assertIn("aggregated_data", attrs) +## else: +## self.assertNotIn("aggregated_dimensions", attrs) +## self.assertNotIn("aggregated_data", attrs) +## +## nc.close() +## +## g = cf.read(tmpfile1) +## self.assertEqual(len(g), 1) +## self.assertTrue(f.equals(g[0])) +## +## def test_CFA_multiple_files(self): +## """Test storing multiple CFA frgament locations.""" +## f = cf.example_field(0) +## cf.write(f, tmpfile1) +## f = cf.read(tmpfile1)[0] +## f.add_file_location("/new/location") +## +## cf.write(f, tmpfile2, cfa=True) +## g = cf.read(tmpfile2) +## self.assertEqual(len(g), 1) +## g = g[0] +## self.assertTrue(f.equals(g)) +## +## self.assertEqual(len(g.data.get_filenames()), 2) +## self.assertEqual(len(g.get_filenames()), 3) +## +## def test_CFA_unlimited_dimension(self): +## """Test CFA with unlimited dimensions""" +## # Create a CFA file from a field that has an unlimited +## # dimension and no metadata constructs spanning that dimension +## f = cf.example_field(0) +## d = f.domain_axis("X") +## d.nc_set_unlimited(True) +## f.del_construct("X") +## cf.write(f, tmpfile1) +## g = cf.read(tmpfile1) +## cf.write(g, tmpfile2, cfa=True) +## +## # Check that the CFA file can be read +## h = cf.read(tmpfile2) +## self.assertEqual(len(h), 1) +## +## +## if __name__ == "__main__": +## print("Run date:", datetime.datetime.now()) +## cf.environment() +## print() +## unittest.main(verbosity=2) From 2bd687af65f2608a16c0c379cb67261a0ae74bda Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 14 Nov 2024 10:00:13 +0000 Subject: [PATCH 10/51] dev --- cf/data/array/aggregatedarray.py | 21 ++- cf/data/array/umarray.py | 71 +++----- cf/data/fragment/__init__.py | 7 +- cf/data/fragment/fragmentfilearray.py | 25 +++ cf/data/fragment/fragmentumarray.py | 118 +++++++++++++ cf/data/fragment/umfragmentarray.py | 108 ------------ cf/domain.py | 194 +++++++++++----------- cf/functions.py | 1 - cf/mixin/propertiesdata.py | 186 ++++++++++----------- cf/mixin/propertiesdatabounds.py | 228 +++++++++++++------------- cf/read_write/read.py | 8 +- cf/test/test_CFA.py | 38 ++--- 12 files changed, 504 insertions(+), 501 deletions(-) create mode 100644 cf/data/fragment/fragmentfilearray.py create mode 100644 cf/data/fragment/fragmentumarray.py delete mode 100644 cf/data/fragment/umfragmentarray.py diff --git a/cf/data/array/aggregatedarray.py b/cf/data/array/aggregatedarray.py index 14db9edf9b..282515b6f6 100644 --- a/cf/data/array/aggregatedarray.py +++ b/cf/data/array/aggregatedarray.py @@ -1,18 +1,23 @@ import cfdm from ...mixin_container import Container +from ..fragment import FragmentFileArray -# from .mixin import ArrayMixin, FileArrayMixin - -class AggregatedArray( - # FileArrayMixin, - # ArrayMixin, - Container, - cfdm.AggregatedArray, -): +class AggregatedArray(Container, cfdm.AggregatedArray): """An array stored in a CF aggregation variable. .. versionadded:: NEXTVERSION """ + + def __new__(cls, *args, **kwargs): + """Store fragment array classes. + + .. versionadded:: (cfdm) NEXTVERSION + + """ + # Override the inherited FragmentFileArray class + instance = super().__new__(cls) + instance._FragmentArray["location"] = FragmentFileArray + return instance diff --git a/cf/data/array/umarray.py b/cf/data/array/umarray.py index 416bd96820..d9caa832e4 100644 --- a/cf/data/array/umarray.py +++ b/cf/data/array/umarray.py @@ -5,13 +5,9 @@ from ...umread_lib.umfile import File, Rec from .abstract import Array -# from .mixin import FileArrayMixin - class UMArray( - # FileArrayMixin, cfdm.data.mixin.IndexMixin, - # cfdm.data.mixin.FileArrayMixin, cfdm.data.abstract.FileArray, Array, ): @@ -26,7 +22,11 @@ def __init__( fmt=None, word_size=None, byte_ordering=None, + mask=True, + unpack=True, attributes=None, + storage_options=None, + min_file_versions=None, source=None, copy=True, ): @@ -100,34 +100,26 @@ def __init__( *attributes* parameter instead. """ - super().__init__(source=source, copy=copy) + super().__init__( + filename=filename, + address=address, + dtype=dtype, + shape=shape, + mask=mask, + unpack=unpack, + attributes=attributes, + storage_options=storage_options, + min_file_versions=min_file_versions, + source=source, + copy=copy, + ) if source is not None: - try: - shape = source._get_component("shape", None) - except AttributeError: - shape = None - - try: - filename = source._get_component("filename", None) - except AttributeError: - filename = None - - try: - address = source._get_component("address", None) - except AttributeError: - address = None - try: fmt = source._get_component("fmt", None) except AttributeError: fmt = None - try: - dtype = source._get_component("dtype", None) - except AttributeError: - dtype = None - try: word_size = source._get_component("word_size", None) except AttributeError: @@ -138,31 +130,6 @@ def __init__( except AttributeError: byte_ordering = None - try: - attributes = source._get_component("attributes", None) - except AttributeError: - attributes = None - - if filename is not None: - if isinstance(filename, str): - filename = (filename,) - else: - filename = tuple(filename) - - self._set_component("filename", filename, copy=False) - - if address is not None: - if isinstance(address, int): - address = (address,) - else: - address = tuple(address) - - self._set_component("address", address, copy=False) - - self._set_component("shape", shape, copy=False) - self._set_component("dtype", dtype, copy=False) - self._set_component("attributes", attributes, copy=False) - if fmt is not None: self._set_component("fmt", fmt, copy=False) @@ -217,8 +184,8 @@ def _get_array(self, index=None): # Get the data subspace, applying any masking and unpacking array = cfdm.netcdf_indexer( array, - mask=True, - unpack=True, + mask=self.get_mask(), + unpack=self.get_unpack(), always_masked_array=False, orthogonal_indexing=True, attributes=attributes, diff --git a/cf/data/fragment/__init__.py b/cf/data/fragment/__init__.py index 34903d248c..8a93ba7a1d 100644 --- a/cf/data/fragment/__init__.py +++ b/cf/data/fragment/__init__.py @@ -1,5 +1,2 @@ -# from .fullfragmentarray import FullFragmentArray -# from .h5netcdffragmentarray import H5netcdfFragmentArray -# from .netcdffragmentarray import NetCDFFragmentArray -# from .netcdf4fragmentarray import NetCDF4FragmentArray -from .umfragmentarray import UMFragmentArray +from .fragmentfilearray import FragmentFileArray +from .fragmentumarray import FragmentUMArray diff --git a/cf/data/fragment/fragmentfilearray.py b/cf/data/fragment/fragmentfilearray.py new file mode 100644 index 0000000000..71b51324ad --- /dev/null +++ b/cf/data/fragment/fragmentfilearray.py @@ -0,0 +1,25 @@ +import cfdm + + +class FragmentFileArray(cfdm.data.fragment.FragmentFileArray): + """Fragment of aggregated data in a file. + + .. versionadded:: (cfdm) NEXTVERSION + + """ + + def __new__(cls, *args, **kwargs): + """Store fragment classes. + + .. versionadded:: (cfdm) NEXTVERSION + + """ + # Import fragment classes. Do this here (as opposed to outside + # the class) to aid subclassing. + from .fragmentumarray import FragmentUMArray + + instance = super().__new__(cls) + instance._FragmentArrays = instance._FragmentArrays + ( + FragmentUMArray, + ) + return instance diff --git a/cf/data/fragment/fragmentumarray.py b/cf/data/fragment/fragmentumarray.py new file mode 100644 index 0000000000..735b83c0e6 --- /dev/null +++ b/cf/data/fragment/fragmentumarray.py @@ -0,0 +1,118 @@ +import cfdm + +from ..array.umarray import UMArray + + +class FragmentUMArray( + cfdm.data.fragment.mixin.FragmentFileArrayMixin, UMArray +): + """A fragment of aggregated data in a PP or UM file. + + .. versionadded:: 3.14.0 + + """ + + +# +# def __init__( +# self, +# filename=None, +# address=None, +# dtype=None, +# shape=None, +# storage_options=None, +# min_file_versions=None, +# unpack_aggregated_data=True, +# aggregated_attributes=None, +# source=None, +# copy=True, +# ): +# """**Initialisation** +# +# :Parameters: +# +# filename: (sequence of `str`), optional +# The names of the UM or PP files containing the fragment. +# +# address: (sequence of `str`), optional +# The start words in the files of the header. +# +# dtype: `numpy.dtype` +# The data type of the aggregated array. May be `None` +# if the numpy data-type is not known (which can be the +# case for netCDF string types, for example). This may +# differ from the data type of the netCDF fragment +# variable. +# +# shape: `tuple` +# The shape of the fragment within the aggregated +# array. This may differ from the shape of the netCDF +# fragment variable in that the latter may have fewer +# size 1 dimensions. +# +# {{init attributes: `dict` or `None`, optional}} +# +# During the first `__getitem__` call, any of the +# ``_FillValue``, ``add_offset``, ``scale_factor``, +# ``units``, and ``calendar`` attributes which haven't +# already been set will be inferred from the lookup +# header and cached for future use. +# +# .. versionadded:: NEXTVERSION +# +# {{aggregated_units: `str` or `None`, optional}} +# +# {{aggregated_calendar: `str` or `None`, optional}} +# +# {{init storage_options: `dict` or `None`, optional}} +# +# {{init source: optional}} +# +# {{init copy: `bool`, optional}} +# +# units: `str` or `None`, optional +# Deprecated at version NEXTVERSION. Use the +# *attributes* parameter instead. +# +# calendar: `str` or `None`, optional +# Deprecated at version NEXTVERSION. Use the +# *attributes* parameter instead. +# +# """ +# super().__init__( +# filename=filename, +# address=address, +# dtype=dtype, +# shape=shape, +# mask=True, +# unpack=True, +# attributes=None, +# storage_options=storage_options, +# min_file_versions=min_file_versions, +# source=source, +# copy=copy +# ) +# +# if source is not None: +# try: +# aggregated_attributes = source._get_component( +# "aggregated_attributes", None +# ) +# except AttributeError: +# aggregated_attributes = None +# +# try: +# unpack_aggregated_data = source._get_component( +# "unpack_aggregated_data", True +# ) +# except AttributeError: +# unpack_aggregated_data = True +# +# self._set_component( +# "unpack_aggregated_data", +# unpack_aggregated_data, +# copy=False, +# ) +# self._set_component( +# "aggregated_attributes", aggregated_attributes, copy=False +# ) diff --git a/cf/data/fragment/umfragmentarray.py b/cf/data/fragment/umfragmentarray.py deleted file mode 100644 index d4c141c896..0000000000 --- a/cf/data/fragment/umfragmentarray.py +++ /dev/null @@ -1,108 +0,0 @@ -import cfdm - -from ..array.umarray import UMArray - -# from .mixin import FragmentArrayMixin - - -class UMFragmentArray(cfdm.data.fragment.mixin.FragmentArrayMixin, UMArray): - """A CFA fragment array stored in a UM or PP file. - - .. versionadded:: 3.14.0 - - """ - - def __init__( - self, - filename=None, - address=None, - dtype=None, - shape=None, - aggregated_units=False, - aggregated_calendar=False, - attributes=None, - storage_options=None, - source=None, - copy=True, - ): - """**Initialisation** - - :Parameters: - - filename: (sequence of `str`), optional - The names of the UM or PP files containing the fragment. - - address: (sequence of `str`), optional - The start words in the files of the header. - - dtype: `numpy.dtype` - The data type of the aggregated array. May be `None` - if the numpy data-type is not known (which can be the - case for netCDF string types, for example). This may - differ from the data type of the netCDF fragment - variable. - - shape: `tuple` - The shape of the fragment within the aggregated - array. This may differ from the shape of the netCDF - fragment variable in that the latter may have fewer - size 1 dimensions. - - {{init attributes: `dict` or `None`, optional}} - - During the first `__getitem__` call, any of the - ``_FillValue``, ``add_offset``, ``scale_factor``, - ``units``, and ``calendar`` attributes which haven't - already been set will be inferred from the lookup - header and cached for future use. - - .. versionadded:: NEXTVERSION - - {{aggregated_units: `str` or `None`, optional}} - - {{aggregated_calendar: `str` or `None`, optional}} - - {{init storage_options: `dict` or `None`, optional}} - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - units: `str` or `None`, optional - Deprecated at version NEXTVERSION. Use the - *attributes* parameter instead. - - calendar: `str` or `None`, optional - Deprecated at version NEXTVERSION. Use the - *attributes* parameter instead. - - """ - super().__init__( - filename=filename, - address=address, - dtype=dtype, - shape=shape, - attributes=attributes, - source=source, - copy=False, - ) - - if source is not None: - try: - aggregated_units = source._get_component( - "aggregated_units", False - ) - except AttributeError: - aggregated_units = False - - try: - aggregated_calendar = source._get_component( - "aggregated_calendar", False - ) - except AttributeError: - aggregated_calendar = False - - self._set_component("aggregated_units", aggregated_units, copy=False) - self._set_component( - "aggregated_calendar", aggregated_calendar, copy=False - ) diff --git a/cf/domain.py b/cf/domain.py index 49d7e9f8e6..32fd33b9c1 100644 --- a/cf/domain.py +++ b/cf/domain.py @@ -162,103 +162,103 @@ def add_file_location( return location -# def cfa_clear_file_substitutions( -# self, -# ): -# """Remove all of the CFA-netCDF file name substitutions. -# -# .. versionadded:: 3.15.0 -# -# :Returns: -# -# `dict` -# {{Returns cfa_clear_file_substitutions}} -# -# **Examples** -# -# >>> d.cfa_clear_file_substitutions() -# {} -# -# """ -# out = {} -# for c in self.constructs.filter_by_data(todict=True).values(): -# out.update(c.cfa_clear_file_substitutions()) -# -# return out -# -# def cfa_file_substitutions(self): -# """Return the CFA-netCDF file name substitutions. -# -# .. versionadded:: 3.15.0 -# -# :Returns: -# -# `dict` -# {{Returns cfa_file_substitutions}} -# -# **Examples** -# -# >>> d.cfa_file_substitutions() -# {} -# -# """ -# out = {} -# for c in self.constructs.filter_by_data(todict=True).values(): -# out.update(c.cfa_file_substitutions()) -# -# return out -# -# def cfa_del_file_substitution( -# self, -# base, -# ): -# """Remove a CFA-netCDF file name substitution. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# base: `str` -# {{cfa base: `str`}} -# -# :Returns: -# -# `dict` -# {{Returns cfa_del_file_substitution}} -# -# **Examples** -# -# >>> f.cfa_del_file_substitution('base') -# -# """ -# for c in self.constructs.filter_by_data(todict=True).values(): -# c.cfa_del_file_substitution( -# base, -# ) -# -# def cfa_update_file_substitutions( -# self, -# substitutions, -# ): -# """Set CFA-netCDF file name substitutions. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# {{cfa substitutions: `dict`}} -# -# :Returns: -# -# `None` -# -# **Examples** -# -# >>> d.cfa_update_file_substitutions({'base': '/data/model'}) -# -# """ -# for c in self.constructs.filter_by_data(todict=True).values(): -# c.cfa_update_file_substitutions(substitutions) + # def cfa_clear_file_substitutions( + # self, + # ): + # """Remove all of the CFA-netCDF file name substitutions. + # + # .. versionadded:: 3.15.0 + # + # :Returns: + # + # `dict` + # {{Returns cfa_clear_file_substitutions}} + # + # **Examples** + # + # >>> d.cfa_clear_file_substitutions() + # {} + # + # """ + # out = {} + # for c in self.constructs.filter_by_data(todict=True).values(): + # out.update(c.cfa_clear_file_substitutions()) + # + # return out + # + # def cfa_file_substitutions(self): + # """Return the CFA-netCDF file name substitutions. + # + # .. versionadded:: 3.15.0 + # + # :Returns: + # + # `dict` + # {{Returns cfa_file_substitutions}} + # + # **Examples** + # + # >>> d.cfa_file_substitutions() + # {} + # + # """ + # out = {} + # for c in self.constructs.filter_by_data(todict=True).values(): + # out.update(c.cfa_file_substitutions()) + # + # return out + # + # def cfa_del_file_substitution( + # self, + # base, + # ): + # """Remove a CFA-netCDF file name substitution. + # + # .. versionadded:: 3.15.0 + # + # :Parameters: + # + # base: `str` + # {{cfa base: `str`}} + # + # :Returns: + # + # `dict` + # {{Returns cfa_del_file_substitution}} + # + # **Examples** + # + # >>> f.cfa_del_file_substitution('base') + # + # """ + # for c in self.constructs.filter_by_data(todict=True).values(): + # c.cfa_del_file_substitution( + # base, + # ) + # + # def cfa_update_file_substitutions( + # self, + # substitutions, + # ): + # """Set CFA-netCDF file name substitutions. + # + # .. versionadded:: 3.15.0 + # + # :Parameters: + # + # {{cfa substitutions: `dict`}} + # + # :Returns: + # + # `None` + # + # **Examples** + # + # >>> d.cfa_update_file_substitutions({'base': '/data/model'}) + # + # """ + # for c in self.constructs.filter_by_data(todict=True).values(): + # c.cfa_update_file_substitutions(substitutions) def close(self): """Close all files referenced by the domain construct. diff --git a/cf/functions.py b/cf/functions.py index 7f420ac044..722f75d477 100644 --- a/cf/functions.py +++ b/cf/functions.py @@ -2791,7 +2791,6 @@ def dirname(path, isdir=False): dirname.__doc__ = cfdm.dirname.__doc__.replace("cfdm.", "cf.") - def pathjoin(path1, path2): """Join two file path components intelligently. diff --git a/cf/mixin/propertiesdata.py b/cf/mixin/propertiesdata.py index 100095c07d..3ec83f258a 100644 --- a/cf/mixin/propertiesdata.py +++ b/cf/mixin/propertiesdata.py @@ -2577,99 +2577,99 @@ def ceil(self, inplace=False, i=False): delete_props=True, ) -# def cfa_update_file_substitutions(self, substitutions): -# """Set CFA-netCDF file name substitutions. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# {{cfa substitutions: `dict`}} -# -# :Returns: -# -# `None` -# -# **Examples** -# -# >>> f.cfa_update_file_substitutions({'base', '/data/model'}) -# -# """ -# data = self.get_data(None, _fill_value=False, _units=False) -# if data is not None: -# data.cfa_update_file_substitutions(substitutions) -# -# @_inplace_enabled(default=False) -# def cfa_clear_file_substitutions(self, inplace=False): -# """Remove all of the CFA-netCDF file name substitutions. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# {{inplace: `bool`, optional}} -# -# :Returns: -# -# `dict` -# {{Returns cfa_clear_file_substitutions}} -# -# **Examples** -# -# >>> f.cfa_clear_file_substitutions() -# {} -# -# """ -# data = self.get_data(None) -# if data is None: -# return {} -# -# return data.cfa_clear_file_substitutions({}) -# -# def cfa_del_file_substitution( -# self, -# base, -# ): -# """Remove a CFA-netCDF file name substitution. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# `dict` -# {{Returns cfa_del_file_substitution}} -# -# **Examples** -# -# >>> f.cfa_del_file_substitution('base') -# -# """ -# data = self.get_data(None, _fill_value=False, _units=False) -# if data is not None: -# data.cfa_del_file_substitution(base) -# -# def cfa_file_substitutions( -# self, -# ): -# """Return the CFA-netCDF file name substitutions. -# -# .. versionadded:: 3.15.0 -# -# :Returns: -# -# `dict` -# {{Returns cfa_file_substitutions}} -# -# **Examples** -# -# >>> g = f.cfa_file_substitutions() -# -# """ -# data = self.get_data(None) -# if data is None: -# return {} -# -# return data.cfa_file_substitutions({}) + # def cfa_update_file_substitutions(self, substitutions): + # """Set CFA-netCDF file name substitutions. + # + # .. versionadded:: 3.15.0 + # + # :Parameters: + # + # {{cfa substitutions: `dict`}} + # + # :Returns: + # + # `None` + # + # **Examples** + # + # >>> f.cfa_update_file_substitutions({'base', '/data/model'}) + # + # """ + # data = self.get_data(None, _fill_value=False, _units=False) + # if data is not None: + # data.cfa_update_file_substitutions(substitutions) + # + # @_inplace_enabled(default=False) + # def cfa_clear_file_substitutions(self, inplace=False): + # """Remove all of the CFA-netCDF file name substitutions. + # + # .. versionadded:: 3.15.0 + # + # :Parameters: + # + # {{inplace: `bool`, optional}} + # + # :Returns: + # + # `dict` + # {{Returns cfa_clear_file_substitutions}} + # + # **Examples** + # + # >>> f.cfa_clear_file_substitutions() + # {} + # + # """ + # data = self.get_data(None) + # if data is None: + # return {} + # + # return data.cfa_clear_file_substitutions({}) + # + # def cfa_del_file_substitution( + # self, + # base, + # ): + # """Remove a CFA-netCDF file name substitution. + # + # .. versionadded:: 3.15.0 + # + # :Parameters: + # + # `dict` + # {{Returns cfa_del_file_substitution}} + # + # **Examples** + # + # >>> f.cfa_del_file_substitution('base') + # + # """ + # data = self.get_data(None, _fill_value=False, _units=False) + # if data is not None: + # data.cfa_del_file_substitution(base) + # + # def cfa_file_substitutions( + # self, + # ): + # """Return the CFA-netCDF file name substitutions. + # + # .. versionadded:: 3.15.0 + # + # :Returns: + # + # `dict` + # {{Returns cfa_file_substitutions}} + # + # **Examples** + # + # >>> g = f.cfa_file_substitutions() + # + # """ + # data = self.get_data(None) + # if data is None: + # return {} + # + # return data.cfa_file_substitutions({}) def chunk(self, chunksize=None): """Partition the data array. diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index 1f41c2b464..8fd656af23 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -1236,120 +1236,120 @@ def ceil(self, bounds=True, inplace=False, i=False): i=i, ) -# def cfa_clear_file_substitutions( -# self, -# ): -# """Remove all of the CFA-netCDF file name substitutions. -# -# .. versionadded:: 3.15.0 -# -# :Returns: -# -# `dict` -# {{Returns cfa_clear_file_substitutions}} -# -# **Examples** -# -# >>> f.cfa_clear_file_substitutions() -# {} -# -# """ -# out = super().cfa_clear_file_substitutions() -# -# bounds = self.get_bounds(None) -# if bounds is not None: -# out.update(bounds.cfa_clear_file_substitutions()) -# -# interior_ring = self.get_interior_ring(None) -# if interior_ring is not None: -# out.update(interior_ring.cfa_clear_file_substitutions()) -# -# return out -# -# def cfa_del_file_substitution(self, base): -# """Remove a CFA-netCDF file name substitution. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# {{cfa base: `str`}} -# -# :Returns: -# -# `dict` -# {{Returns cfa_del_file_substitution}} -# -# **Examples** -# -# >>> c.cfa_del_file_substitution('base') -# -# """ -# super().cfa_del_file_substitution(base) -# -# bounds = self.get_bounds(None) -# if bounds is not None: -# bounds.cfa_del_file_substitution(base) -# -# interior_ring = self.get_interior_ring(None) -# if interior_ring is not None: -# interior_ring.cfa_del_file_substitution(base) -# -# def cfa_file_substitutions(self): -# """Return the CFA-netCDF file name substitutions. -# -# .. versionadded:: 3.15.0 -# -# :Returns: -# -# `dict` -# {{Returns cfa_file_substitutions}} -# -# **Examples** -# -# >>> c.cfa_file_substitutions() -# {} -# -# """ -# out = super().cfa_file_substitutions() -# -# bounds = self.get_bounds(None) -# if bounds is not None: -# out.update(bounds.cfa_file_substitutions({})) -# -# interior_ring = self.get_interior_ring(None) -# if interior_ring is not None: -# out.update(interior_ring.cfa_file_substitutions({})) -# -# return out -# -# def cfa_update_file_substitutions(self, substitutions): -# """Set CFA-netCDF file name substitutions. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# {{cfa substitutions: `dict`}} -# -# :Returns: -# -# `None` -# -# **Examples** -# -# >>> c.cfa_add_file_substitutions({'base', '/data/model'}) -# -# """ -# super().cfa_update_file_substitutions(substitutions) -# -# bounds = self.get_bounds(None) -# if bounds is not None: -# bounds.cfa_update_file_substitutions(substitutions) -# -# interior_ring = self.get_interior_ring(None) -# if interior_ring is not None: -# interior_ring.cfa_update_file_substitutions(substitutions) + # def cfa_clear_file_substitutions( + # self, + # ): + # """Remove all of the CFA-netCDF file name substitutions. + # + # .. versionadded:: 3.15.0 + # + # :Returns: + # + # `dict` + # {{Returns cfa_clear_file_substitutions}} + # + # **Examples** + # + # >>> f.cfa_clear_file_substitutions() + # {} + # + # """ + # out = super().cfa_clear_file_substitutions() + # + # bounds = self.get_bounds(None) + # if bounds is not None: + # out.update(bounds.cfa_clear_file_substitutions()) + # + # interior_ring = self.get_interior_ring(None) + # if interior_ring is not None: + # out.update(interior_ring.cfa_clear_file_substitutions()) + # + # return out + # + # def cfa_del_file_substitution(self, base): + # """Remove a CFA-netCDF file name substitution. + # + # .. versionadded:: 3.15.0 + # + # :Parameters: + # + # {{cfa base: `str`}} + # + # :Returns: + # + # `dict` + # {{Returns cfa_del_file_substitution}} + # + # **Examples** + # + # >>> c.cfa_del_file_substitution('base') + # + # """ + # super().cfa_del_file_substitution(base) + # + # bounds = self.get_bounds(None) + # if bounds is not None: + # bounds.cfa_del_file_substitution(base) + # + # interior_ring = self.get_interior_ring(None) + # if interior_ring is not None: + # interior_ring.cfa_del_file_substitution(base) + # + # def cfa_file_substitutions(self): + # """Return the CFA-netCDF file name substitutions. + # + # .. versionadded:: 3.15.0 + # + # :Returns: + # + # `dict` + # {{Returns cfa_file_substitutions}} + # + # **Examples** + # + # >>> c.cfa_file_substitutions() + # {} + # + # """ + # out = super().cfa_file_substitutions() + # + # bounds = self.get_bounds(None) + # if bounds is not None: + # out.update(bounds.cfa_file_substitutions({})) + # + # interior_ring = self.get_interior_ring(None) + # if interior_ring is not None: + # out.update(interior_ring.cfa_file_substitutions({})) + # + # return out + # + # def cfa_update_file_substitutions(self, substitutions): + # """Set CFA-netCDF file name substitutions. + # + # .. versionadded:: 3.15.0 + # + # :Parameters: + # + # {{cfa substitutions: `dict`}} + # + # :Returns: + # + # `None` + # + # **Examples** + # + # >>> c.cfa_add_file_substitutions({'base', '/data/model'}) + # + # """ + # super().cfa_update_file_substitutions(substitutions) + # + # bounds = self.get_bounds(None) + # if bounds is not None: + # bounds.cfa_update_file_substitutions(substitutions) + # + # interior_ring = self.get_interior_ring(None) + # if interior_ring is not None: + # interior_ring.cfa_update_file_substitutions(substitutions) def chunk(self, chunksize=None): """Partition the data array. diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 158fc4d5d4..7a239d4567 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -64,7 +64,7 @@ def read( dask_chunks="storage-aligned", store_hdf5_chunks=True, domain=False, -# cfa=None, + # cfa=None, cfa_write=None, netcdf_backend=None, storage_options=None, @@ -1173,7 +1173,7 @@ def read( warn_valid=warn_valid, select=select, domain=domain, -# cfa=cfa, + # cfa=cfa, cfa_write=cfa_write, netcdf_backend=netcdf_backend, storage_options=storage_options, @@ -1293,7 +1293,7 @@ def _read_a_file( store_hdf5_chunks=True, select=None, domain=False, -# cfa=None, + # cfa=None, cfa_write=None, netcdf_backend=None, storage_options=None, @@ -1429,7 +1429,7 @@ def _read_a_file( dask_chunks=dask_chunks, store_hdf5_chunks=store_hdf5_chunks, cache=cache, -# cfa=cfa, + # cfa=cfa, cfa_write=cfa_write, ) except MaskError: diff --git a/cf/test/test_CFA.py b/cf/test/test_CFA.py index e13a81c77e..beab4e1509 100644 --- a/cf/test/test_CFA.py +++ b/cf/test/test_CFA.py @@ -1,32 +1,32 @@ -#import atexit -#import datetime -#import faulthandler -#import os -#import tempfile -#import unittest -#from pathlib import PurePath +# import atexit +# import datetime +# import faulthandler +# import os +# import tempfile +# import unittest +# from pathlib import PurePath # -#import netCDF4 +# import netCDF4 # -#faulthandler.enable() # to debug seg faults and timeouts +# faulthandler.enable() # to debug seg faults and timeouts # -#import cf +# import cf # -#n_tmpfiles = 5 -#tmpfiles = [ +# n_tmpfiles = 5 +# tmpfiles = [ # tempfile.mkstemp("_test_CFA.nc", dir=os.getcwd())[1] # for i in range(n_tmpfiles) -#] -#( +# ] +# ( # tmpfile1, # tmpfile2, # nc_file, # cfa_file, # cfa_file2, -#) = tmpfiles +# ) = tmpfiles # # -#def _remove_tmpfiles(): +# def _remove_tmpfiles(): # """Try to remove defined temporary files by deleting their paths.""" # for f in tmpfiles: # try: @@ -35,10 +35,10 @@ # pass # # -#atexit.register(_remove_tmpfiles) +# atexit.register(_remove_tmpfiles) # # -#class CFATest(unittest.TestCase): +# class CFATest(unittest.TestCase): # """Unit test for aggregation variables.""" # # netcdf3_fmts = [ @@ -626,7 +626,7 @@ # cf.write(g, cfa_file, cfa=cfa) # # -#if __name__ == "__main__": +# if __name__ == "__main__": # print("Run date:", datetime.datetime.now()) # cf.environment() # print() From eb3f63f20de77a2a6bd0275cb2552cf54738133a Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 15 Nov 2024 17:07:30 +0000 Subject: [PATCH 11/51] dev --- cf/data/array/abstract/array.py | 2 - cf/data/array/boundsfromnodesarray.py | 16 +- cf/data/array/cellconnectivityarray.py | 18 +- cf/data/array/fullarray.py | 503 +++++++++--------- cf/data/array/gatheredarray.py | 17 +- cf/data/array/h5netcdfarray.py | 64 +-- cf/data/array/netcdf4array.py | 64 +-- cf/data/array/pointtopologyarray.py | 14 +- cf/data/array/raggedcontiguousarray.py | 21 +- cf/data/array/raggedindexedarray.py | 22 +- cf/data/array/raggedindexedcontiguousarray.py | 25 +- cf/data/array/subsampledarray.py | 201 +------ cf/data/array/umarray.py | 4 +- cf/mixin/propertiesdata.py | 162 +++--- cf/read_write/netcdf/__init__.py | 2 +- cf/read_write/netcdf/netcdfread.py | 26 +- cf/read_write/netcdf/netcdfwrite.py | 30 +- cf/read_write/read.py | 8 +- cf/read_write/write.py | 2 +- 19 files changed, 393 insertions(+), 808 deletions(-) diff --git a/cf/data/array/abstract/array.py b/cf/data/array/abstract/array.py index 36f0a7ad39..307fa4a421 100644 --- a/cf/data/array/abstract/array.py +++ b/cf/data/array/abstract/array.py @@ -2,8 +2,6 @@ from ....mixin_container import Container -# from ..mixin import ArrayMixin - class Array(Container, cfdm.Array): """Abstract base class for a container of an underlying array. diff --git a/cf/data/array/boundsfromnodesarray.py b/cf/data/array/boundsfromnodesarray.py index b8a32f6c1f..6eb0952fa3 100644 --- a/cf/data/array/boundsfromnodesarray.py +++ b/cf/data/array/boundsfromnodesarray.py @@ -2,23 +2,9 @@ from ...mixin_container import Container -# from .mixin import ArrayMixin, CompressedArrayMixin - class BoundsFromNodesArray( - # CompressedArrayMixin, - # ArrayMixin, Container, cfdm.BoundsFromNodesArray, ): - """An array of cell bounds defined by UGRID node coordinates. - - The UGRID node coordinates contain the locations of the nodes of - the domain topology. In UGRID, the bounds of edge, face and volume - cells may be defined by these locations in conjunction with a - mapping from each cell boundary vertex to its corresponding - coordinate value. - - .. versionadded:: 3.16.0 - - """ + pass diff --git a/cf/data/array/cellconnectivityarray.py b/cf/data/array/cellconnectivityarray.py index 5202b3f5c7..f7585aed9a 100644 --- a/cf/data/array/cellconnectivityarray.py +++ b/cf/data/array/cellconnectivityarray.py @@ -2,25 +2,9 @@ from ...mixin_container import Container -# from .mixin import ArrayMixin, CompressedArrayMixin - class CellConnectivityArray( - # CompressedArrayMixin, - # ArrayMixin, Container, cfdm.CellConnectivityArray, ): - """A connectivity array derived from a UGRID connectivity variable. - - A UGRID connectivity variable contains indices which map each cell - to its neighbours, as found in a UGRID "face_face_connectivity" or - "volume_volume_connectivity" variable. - - The connectivity array has one more column than the corresponding - UGRID variable. The extra column, in the first position, contains - the identifier for each cell. - - .. versionadded:: 3.16.0 - - """ + pass diff --git a/cf/data/array/fullarray.py b/cf/data/array/fullarray.py index 559a9cb410..678b84ca48 100644 --- a/cf/data/array/fullarray.py +++ b/cf/data/array/fullarray.py @@ -1,13 +1,16 @@ -import numpy as np -from cfdm.data.mixin import IndexMixin +import cfdm -from ...functions import indices_shape, parse_indices -from .abstract import Array +from ...mixin_container import Container +#import numpy as np +#from cfdm.data.mixin import IndexMixin +# +#from ...functions import indices_shape, parse_indices +#from .abstract import Array -_FULLARRAY_HANDLED_FUNCTIONS = {} +#_FULLARRAY_HANDLED_FUNCTIONS = {} -class FullArray(IndexMixin, Array): +class FullArray(Container, cfdm.FullArray): #IndexMixin, Array): """A array filled with a given value. The array may be empty or all missing values. @@ -16,247 +19,247 @@ class FullArray(IndexMixin, Array): """ - def __init__( - self, - fill_value=None, - dtype=None, - shape=None, - attributes=None, - source=None, - copy=True, - ): - """**Initialisation** - - :Parameters: - - fill_value : scalar, optional - The fill value for the array. May be set to - `cf.masked` or `np.ma.masked`. - - dtype: `numpy.dtype` - The data type of the array. - - shape: `tuple` - The array dimension sizes. - - {{init attributes: `dict` or `None`, optional}} - - .. versionadded:: NEXTVERSION - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - units: `str` or `None`, optional - Deprecated at version NEXTVERSION. Use the - *attributes* parameter instead. - - calendar: `str` or `None`, optional - Deprecated at version NEXTVERSION. Use the - *attributes* parameter instead. - - """ - super().__init__(source=source, copy=copy) - - if source is not None: - try: - fill_value = source._get_component("full_value", None) - except AttributeError: - fill_value = None - - try: - dtype = source._get_component("dtype", None) - except AttributeError: - dtype = None - - try: - shape = source._get_component("shape", None) - except AttributeError: - shape = None - - try: - attributes = source._get_component("attributes", False) - except AttributeError: - attributes = None - - self._set_component("full_value", fill_value, copy=False) - self._set_component("dtype", dtype, copy=False) - self._set_component("shape", shape, copy=False) - self._set_component("attributes", attributes, copy=False) - - def __array_function__(self, func, types, args, kwargs): - """The `numpy` `__array_function__` protocol. - - .. versionadded:: 3.15.0 - - """ - if func not in _FULLARRAY_HANDLED_FUNCTIONS: - return NotImplemented - - # Note: This allows subclasses that don't override - # __array_function__ to handle FullArray objects - if not all(issubclass(t, self.__class__) for t in types): - return NotImplemented - - return _FULLARRAY_HANDLED_FUNCTIONS[func](*args, **kwargs) - - def __repr__(self): - """Called by the `repr` built-in function. - - x.__repr__() <==> repr(x) - - """ - return f"" - - def __str__(self): - """Called by the `str` built-in function. - - x.__str__() <==> str(x) - - """ - fill_value = self.get_full_value() - if fill_value is None: - return "Uninitialised" - - return f"Filled with {fill_value!r}" - - def _get_array(self, index=None): - """Returns the full array. - - .. versionadded:: NEXTVERSION - - .. seealso:: `__array__`, `index` - - :Parameters: - - {{index: `tuple` or `None`, optional}} - - :Returns: - - `numpy.ndarray` - The subspace. - - """ - if index is None: - shape = self.shape - else: - original_shape = self.original_shape - index = parse_indices(original_shape, index, keepdims=False) - shape = indices_shape(index, original_shape, keepdims=False) - - fill_value = self.get_full_value() - if fill_value is np.ma.masked: - array = np.ma.masked_all(shape, dtype=self.dtype) - elif fill_value is not None: - array = np.full(shape, fill_value=fill_value, dtype=self.dtype) - else: - array = np.empty(shape, dtype=self.dtype) - - return array - - @property - def array(self): - """Return an independent numpy array containing the data. - - .. versionadded:: NEXTVERSION - - :Returns: - - `numpy.ndarray` - An independent numpy array of the data. - """ - return np.asanyarray(self) - - @property - def dtype(self): - """Data-type of the data elements.""" - return self._get_component("dtype") - - @property - def shape(self): - """Tuple of array dimension sizes.""" - return self._get_component("shape") - - def get_full_value(self, default=AttributeError()): - """Return the data array fill value. - - .. versionadded:: 3.14.0 - - .. seealso:: `set_full_value` - - :Parameters: - - default: optional - Return the value of the *default* parameter if the - fill value has not been set. If set to an `Exception` - instance then it will be raised instead. - - :Returns: - - The fill value. - - """ - return self._get_component("full_value", default=default) - - def set_full_value(self, fill_value): - """Set the data array fill value. - - .. versionadded:: 3.14.0 - - .. seealso:: `get_full_value` - - :Parameters: - - fill_value : scalar, optional - The fill value for the array. May be set to - `cf.masked` or `np.ma.masked`. - - :Returns: - - `None` - - """ - self._set_component("full_value", fill_value, copy=False) - - -def fullarray_implements(numpy_function): - """Register an __array_function__ implementation for FullArray objects. - - .. versionadded:: 3.15.0 - - """ - - def decorator(func): - _FULLARRAY_HANDLED_FUNCTIONS[numpy_function] = func - return func - - return decorator - - -@fullarray_implements(np.unique) -def unique( - a, return_index=False, return_inverse=False, return_counts=False, axis=None -): - """Version of `np.unique` that is optimised for `FullArray` objects. - - .. versionadded:: 3.15.0 - - """ - if return_index or return_inverse or return_counts or axis is not None: - # Fall back to the slow unique. (I'm sure we could probably do - # something more clever here, but there is no use case at - # present.) - return np.unique( - a[...], - return_index=return_index, - return_inverse=return_inverse, - return_counts=return_counts, - axis=axis, - ) - - # Fast unique based on the full value - x = a.get_full_value() - if x is np.ma.masked: - return np.ma.masked_all((1,), dtype=a.dtype) - - return np.array([x], dtype=a.dtype) +# def __init__( +# self, +# fill_value=None, +# dtype=None, +# shape=None, +# attributes=None, +# source=None, +# copy=True, +# ): +# """**Initialisation** +# +# :Parameters: +# +# fill_value : scalar, optional +# The fill value for the array. May be set to +# `cf.masked` or `np.ma.masked`. +# +# dtype: `numpy.dtype` +# The data type of the array. +# +# shape: `tuple` +# The array dimension sizes. +# +# {{init attributes: `dict` or `None`, optional}} +# +# .. versionadded:: NEXTVERSION +# +# {{init source: optional}} +# +# {{init copy: `bool`, optional}} +# +# units: `str` or `None`, optional +# Deprecated at version NEXTVERSION. Use the +# *attributes* parameter instead. +# +# calendar: `str` or `None`, optional +# Deprecated at version NEXTVERSION. Use the +# *attributes* parameter instead. +# +# """ +# super().__init__(source=source, copy=copy) +# +# if source is not None: +# try: +# fill_value = source._get_component("full_value", None) +# except AttributeError: +# fill_value = None +# +# try: +# dtype = source._get_component("dtype", None) +# except AttributeError: +# dtype = None +# +# try: +# shape = source._get_component("shape", None) +# except AttributeError: +# shape = None +# +# try: +# attributes = source._get_component("attributes", False) +# except AttributeError: +# attributes = None +# +# self._set_component("full_value", fill_value, copy=False) +# self._set_component("dtype", dtype, copy=False) +# self._set_component("shape", shape, copy=False) +# self._set_component("attributes", attributes, copy=False) +# +# def __array_function__(self, func, types, args, kwargs): +# """The `numpy` `__array_function__` protocol. +# +# .. versionadded:: 3.15.0 +# +# """ +# if func not in _FULLARRAY_HANDLED_FUNCTIONS: +# return NotImplemented +# +# # Note: This allows subclasses that don't override +# # __array_function__ to handle FullArray objects +# if not all(issubclass(t, self.__class__) for t in types): +# return NotImplemented +# +# return _FULLARRAY_HANDLED_FUNCTIONS[func](*args, **kwargs) +# +# def __repr__(self): +# """Called by the `repr` built-in function. +# +# x.__repr__() <==> repr(x) +# +# """ +# return f"" +# +# def __str__(self): +# """Called by the `str` built-in function. +# +# x.__str__() <==> str(x) +# +# """ +# fill_value = self.get_full_value() +# if fill_value is None: +# return "Uninitialised" +# +# return f"Filled with {fill_value!r}" +# +# def _get_array(self, index=None): +# """Returns the full array. +# +# .. versionadded:: NEXTVERSION +# +# .. seealso:: `__array__`, `index` +# +# :Parameters: +# +# {{index: `tuple` or `None`, optional}} +# +# :Returns: +# +# `numpy.ndarray` +# The subspace. +# +# """ +# if index is None: +# shape = self.shape +# else: +# original_shape = self.original_shape +# index = parse_indices(original_shape, index, keepdims=False) +# shape = indices_shape(index, original_shape, keepdims=False) +# +# fill_value = self.get_full_value() +# if fill_value is np.ma.masked: +# array = np.ma.masked_all(shape, dtype=self.dtype) +# elif fill_value is not None: +# array = np.full(shape, fill_value=fill_value, dtype=self.dtype) +# else: +# array = np.empty(shape, dtype=self.dtype) +# +# return array +# +# @property +# def array(self): +# """Return an independent numpy array containing the data. +# +# .. versionadded:: NEXTVERSION +# +# :Returns: +# +# `numpy.ndarray` +# An independent numpy array of the data. +# """ +# return np.asanyarray(self) +# +# @property +# def dtype(self): +# """Data-type of the data elements.""" +# return self._get_component("dtype") +# +# @property +# def shape(self): +# """Tuple of array dimension sizes.""" +# return self._get_component("shape") +# +# def get_full_value(self, default=AttributeError()): +# """Return the data array fill value. +# +# .. versionadded:: 3.14.0 +# +# .. seealso:: `set_full_value` +# +# :Parameters: +# +# default: optional +# Return the value of the *default* parameter if the +# fill value has not been set. If set to an `Exception` +# instance then it will be raised instead. +# +# :Returns: +# +# The fill value. +# +# """ +# return self._get_component("full_value", default=default) +# +# def set_full_value(self, fill_value): +# """Set the data array fill value. +# +# .. versionadded:: 3.14.0 +# +# .. seealso:: `get_full_value` +# +# :Parameters: +# +# fill_value : scalar, optional +# The fill value for the array. May be set to +# `cf.masked` or `np.ma.masked`. +# +# :Returns: +# +# `None` +# +# """ +# self._set_component("full_value", fill_value, copy=False) +# +# +#def fullarray_implements(numpy_function): +# """Register an __array_function__ implementation for FullArray objects. +# +# .. versionadded:: 3.15.0 +# +# """ +# +# def decorator(func): +# _FULLARRAY_HANDLED_FUNCTIONS[numpy_function] = func +# return func +# +# return decorator +# +# +#@fullarray_implements(np.unique) +#def unique( +# a, return_index=False, return_inverse=False, return_counts=False, axis=None +#): +# """Version of `np.unique` that is optimised for `FullArray` objects. +# +# .. versionadded:: 3.15.0 +# +# """ +# if return_index or return_inverse or return_counts or axis is not None: +# # Fall back to the slow unique. (I'm sure we could probably do +# # something more clever here, but there is no use case at +# # present.) +# return np.unique( +# a[...], +# return_index=return_index, +# return_inverse=return_inverse, +# return_counts=return_counts, +# axis=axis, +# ) +# +# # Fast unique based on the full value +# x = a.get_full_value() +# if x is np.ma.masked: +# return np.ma.masked_all((1,), dtype=a.dtype) +# +# return np.array([x], dtype=a.dtype) diff --git a/cf/data/array/gatheredarray.py b/cf/data/array/gatheredarray.py index c110d879d1..159e4da991 100644 --- a/cf/data/array/gatheredarray.py +++ b/cf/data/array/gatheredarray.py @@ -2,21 +2,6 @@ from ...mixin_container import Container -# from .mixin import CompressedArrayMixin - class GatheredArray(Container, cfdm.GatheredArray): - """An underlying gathered array. - - Compression by gathering combines axes of a multidimensional array - into a new, discrete axis whilst omitting the missing values and - thus reducing the number of values that need to be stored. - - The information needed to uncompress the data is stored in a "list - variable" that gives the indices of the required points. - - See CF section 8.2. "Lossless Compression by Gathering". - - .. versionadded:: 3.0.0 - - """ + pass diff --git a/cf/data/array/h5netcdfarray.py b/cf/data/array/h5netcdfarray.py index 4d083f5186..b82f054127 100644 --- a/cf/data/array/h5netcdfarray.py +++ b/cf/data/array/h5netcdfarray.py @@ -2,17 +2,11 @@ from ...mixin_container import Container -# from .locks import netcdf_lock -from .mixin import ( # , IndexMixin; ArrayMixin,; FileArrayMixin, - ActiveStorageMixin, -) +from .mixin import ActiveStorageMixin class H5netcdfArray( ActiveStorageMixin, - # IndexMixin, - # FileArrayMixin, - # ArrayMixin, Container, cfdm.H5netcdfArray, ): @@ -26,59 +20,3 @@ class H5netcdfArray( .. versionadded:: NEXTVERSION """ - - # def __dask_tokenize__(self): - # """Return a value fully representative of the object. - - # .. versionadded:: NEXTVERSION - - # """ - # return super().__dask_tokenize__() + (self.get_mask(),) - - # @property - # def _lock(self): - # """Set the lock for use in `dask.array.from_array`. - - # Returns a lock object because concurrent reads are not - # currently supported by the HDF5 library. The lock object will - # be the same for all `NetCDF4Array` and `H5netcdfArray` - # instances, regardless of the dataset they access, which means - # that access to all netCDF and HDF files coordinates around the - # same lock. - - # .. versionadded:: NEXTVERSION - - # """ - # return netcdf_lock - - # def _get_array(self, index=None): - # """Returns a subspace of the dataset variable. - - # .. versionadded:: NEXTVERSION - - # .. seealso:: `__array__`, `index` - - # :Parameters: - - # {{index: `tuple` or `None`, optional}} - - # :Returns: - - # `numpy.ndarray` - # The subspace. - - # """ - # if index is None: - # index = self.index() - - # # We need to lock because the netCDF file is about to be accessed. - # self._lock.acquire() - - # # It's cfdm.H5netcdfArray.__getitem__ that we want to - # # call here, but we use 'Container' in super because - # # that comes immediately before cfdm.H5netcdfArray in - # # the method resolution order. - # array = super(Container, self).__getitem__(index) - - # self._lock.release() - # return array diff --git a/cf/data/array/netcdf4array.py b/cf/data/array/netcdf4array.py index 4046305671..6e3f25d0eb 100644 --- a/cf/data/array/netcdf4array.py +++ b/cf/data/array/netcdf4array.py @@ -1,13 +1,11 @@ import cfdm from ...mixin_container import Container -from .mixin import ActiveStorageMixin # , ArrayMixin #,FileArrayMixin +from .mixin import ActiveStorageMixin class NetCDF4Array( ActiveStorageMixin, - # FileArrayMixin, - # ArrayMixin, Container, cfdm.NetCDF4Array, ): @@ -19,62 +17,4 @@ class NetCDF4Array( method. See `cf.data.collapse.Collapse` for details. """ - - # def __dask_tokenize__(self): - # """Return a value fully representative of the object. - # - # .. versionadded:: 3.15.0 - # - # """ - # return super().__dask_tokenize__() + (self.get_mask(),) - - -# -# @property -# def _lock(self): -# """Set the lock for use in `dask.array.from_array`. -# -# Returns a lock object because concurrent reads are not -# currently supported by the netCDF and HDF libraries. The lock -# object will be the same for all `NetCDF4Array` and -# `H5netcdfArray` instances, regardless of the dataset they -# access, which means that access to all netCDF and HDF files -# coordinates around the same lock. -# -# .. versionadded:: 3.14.0 -# -# """ -# return netcdf_lock -# -# def _get_array(self, index=None): -# """Returns a subspace of the dataset variable. -# -# .. versionadded:: NEXTVERSION -# -# .. seealso:: `__array__`, `index` -# -# :Parameters: -# -# {{index: `tuple` or `None`, optional}} -# -# :Returns: -# -# `numpy.ndarray` -# The subspace. -# -# """ -# if index is None: -# index = self.index() -# -# # Note: We need to lock because the netCDF file is about to be -# # accessed. -# self._lock.acquire() -# -# # Note: It's cfdm.NetCDFArray.__getitem__ that we want to call -# # here, but we use 'Container' in super because that -# # comes immediately before cfdm.NetCDFArray in the -# # method resolution order. -# array = super(Container, self).__getitem__(index) -# -# self._lock.release() -# return array + pass diff --git a/cf/data/array/pointtopologyarray.py b/cf/data/array/pointtopologyarray.py index f63d407ef2..be9512bdad 100644 --- a/cf/data/array/pointtopologyarray.py +++ b/cf/data/array/pointtopologyarray.py @@ -2,21 +2,9 @@ from ...mixin_container import Container -# from .mixin import ArrayMixin, CompressedArrayMixin - class PointTopologyArray( - # CompressedArrayMixin, - # ArrayMixin, Container, cfdm.PointTopologyArray, ): - """A point cell domain topology array derived from a UGRID variable. - - A point cell domain topology array derived from an underlying - UGRID "edge_node_connectivity" or UGRID "face_node_connectivity" - array. - - .. versionadded:: 3.16.0 - - """ + pass diff --git a/cf/data/array/raggedcontiguousarray.py b/cf/data/array/raggedcontiguousarray.py index 365d263423..1b33c48dea 100644 --- a/cf/data/array/raggedcontiguousarray.py +++ b/cf/data/array/raggedcontiguousarray.py @@ -2,25 +2,6 @@ from ...mixin_container import Container -# from .mixin import ArrayMixin, CompressedArrayMixin - class RaggedContiguousArray(Container, cfdm.RaggedContiguousArray): - """An underlying contiguous ragged array. - - A collection of features stored using a contiguous ragged array - combines all features along a single dimension (the "sample - dimension") such that each feature in the collection occupies a - contiguous block. - - The information needed to uncompress the data is stored in a - "count variable" that gives the size of each block. - - It is assumed that the compressed dimension is the left-most - dimension in the compressed array. - - See CF section 9 "Discrete Sampling Geometries". - - .. versionadded:: 3.0.0 - - """ + pass diff --git a/cf/data/array/raggedindexedarray.py b/cf/data/array/raggedindexedarray.py index 73f39e7007..69e2ee7a9a 100644 --- a/cf/data/array/raggedindexedarray.py +++ b/cf/data/array/raggedindexedarray.py @@ -2,26 +2,6 @@ from ...mixin_container import Container -# from .mixin import ArrayMixin, CompressedArrayMixin - class RaggedIndexedArray(Container, cfdm.RaggedIndexedArray): - """An underlying indexed ragged array. - - A collection of features stored using an indexed ragged array - combines all features along a single dimension (the "sample - dimension") such that the values of each feature in the collection - are interleaved. - - The information needed to uncompress the data is stored in an - "index variable" that specifies the feature that each element of - the sample dimension belongs to. - - It is assumed that the compressed dimension is the left-most - dimension in the compressed array. - - See CF section 9 "Discrete Sampling Geometries". - - .. versionadded:: 3.0.0 - - """ + pass diff --git a/cf/data/array/raggedindexedcontiguousarray.py b/cf/data/array/raggedindexedcontiguousarray.py index 3bd28f4e1f..96870fd59a 100644 --- a/cf/data/array/raggedindexedcontiguousarray.py +++ b/cf/data/array/raggedindexedcontiguousarray.py @@ -2,32 +2,9 @@ from ...mixin_container import Container -# from .mixin import ArrayMixin, CompressedArrayMixin - class RaggedIndexedContiguousArray( Container, cfdm.RaggedIndexedContiguousArray, ): - """An underlying indexed contiguous ragged array. - - A collection of features, each of which is sequence of (vertical) - profiles, stored using an indexed contiguous ragged array combines - all feature elements along a single dimension (the "sample - dimension") such that a contiguous ragged array representation is - used for each profile and the indexed ragged array representation - to organise the profiles into timeseries. - - The information needed to uncompress the data is stored in a - "count variable" that gives the size of each profile; and in a - "index variable" that specifies the feature that each profile - belongs to. - - It is assumed that the compressed dimensions are the two left-most - dimensions in the compressed array. - - See CF section 9 "Discrete Sampling Geometries". - - .. versionadded:: 3.0.0 - - """ + pass diff --git a/cf/data/array/subsampledarray.py b/cf/data/array/subsampledarray.py index 71fae12925..15b7e0c6ad 100644 --- a/cf/data/array/subsampledarray.py +++ b/cf/data/array/subsampledarray.py @@ -2,205 +2,6 @@ from ...mixin_container import Container -# from .mixin import ArrayMixin, CompressedArrayMixin - class SubsampledArray(Container, cfdm.SubsampledArray): - """An underlying subsampled array. - - For some structured coordinate data (e.g. coordinates describing - remote sensing products) space may be saved by storing a subsample - of the data, called tie points. The uncompressed data can be - reconstituted by interpolation, from the subsampled values. This - process will likely result in a loss in accuracy (as opposed to - precision) in the uncompressed variables, due to rounding and - approximation errors in the interpolation calculations, but it is - assumed that these errors will be small enough to not be of - concern to users of the uncompressed dataset. The creator of the - compressed dataset can control the accuracy of the reconstituted - data through the degree of subsampling and the choice of - interpolation method. - - See CF section 8.3 "Lossy Compression by Coordinate Subsampling" - and Appendix J "Coordinate Interpolation Methods". - - >>> tie_point_indices={{package}}.TiePointIndex(data=[0, 4, 7, 8, 11]) - >>> w = {{package}}.InterpolationParameter(data=[5, 10, 5]) - >>> coords = {{package}}.SubsampledArray( - ... interpolation_name='quadratic', - ... compressed_array={{package}}.Data([15, 135, 225, 255, 345]), - ... shape=(12,), - ... tie_point_indices={0: tie_point_indices}, - ... parameters={"w": w}, - ... parameter_dimensions={"w": (0,)}, - ... ) - >>> print(coords[...]) - [ 15. 48.75 80. 108.75 135. - 173.88888889 203.88888889 225. 255. 289.44444444 - 319.44444444 345. ] - - **Cell boundaries** - - When the tie points array represents bounds tie points then the - *shape* parameter describes the uncompressed bounds shape. See CF - section 8.3.9 "Interpolation of Cell Boundaries". - - >>> bounds = {{package}}.SubsampledArray( - ... interpolation_name='quadratic', - ... compressed_array={{package}}.Data([0, 150, 240, 240, 360]), - ... shape=(12, 2), - ... tie_point_indices={0: tie_point_indices}, - ... parameters={"w": w}, - ... parameter_dimensions={"w": (0,)}, - ... ) - >>> print(bounds[...]) - [[0.0 33.2] - [33.2 64.8] - [64.8 94.80000000000001] - [94.80000000000001 123.2] - [123.2 150.0] - [150.0 188.88888888888889] - [188.88888888888889 218.88888888888889] - [218.88888888888889 240.0] - [240.0 273.75] - [273.75 305.0] - [305.0 333.75] - [333.75 360.0]] - - .. versionadded:: 3.14.0 - - """ - - def to_dask_array(self, chunks="auto"): - """Convert the data to a `dask` array. - - .. versionadded:: 3.14.0 - - :Parameters: - - chunks: `int`, `tuple`, `dict` or `str`, optional - Specify the chunking of the returned dask array. - - Any value accepted by the *chunks* parameter of the - `dask.array.from_array` function is allowed. - - The chunk sizes implied by *chunks* for a dimension that - has been fragmented are ignored and replaced with values - that are implied by that dimensions fragment sizes. - - :Returns: - - `dask.array.Array` - The `dask` array representation. - - """ - from functools import partial - - import dask.array as da - from dask import config - from dask.array.core import getter, normalize_chunks - from dask.base import tokenize - - name = (f"{self.__class__.__name__}-{tokenize(self)}",) - - dtype = self.dtype - - context = partial(config.set, scheduler="synchronous") - - compressed_dimensions = self.compressed_dimensions() - conformed_data = self.conformed_data() - compressed_data = conformed_data["data"] - parameters = conformed_data["parameters"] - dependent_tie_points = conformed_data["dependent_tie_points"] - - # If possible, convert the compressed data, parameters and - # dependent tie points to dask arrays that don't support - # concurrent reads. This prevents "compute called by compute" - # failures problems at compute time. - # - # TODO: This won't be necessary if this is refactored so that - # arrays are part of the same dask graph as the - # compressed subarrays. - compressed_data = self._lock_file_read(compressed_data) - parameters = { - k: self._lock_file_read(v) for k, v in parameters.items() - } - dependent_tie_points = { - k: self._lock_file_read(v) for k, v in dependent_tie_points.items() - } - - # Get the (cfdm) subarray class - Subarray = self.get_Subarray() - subarray_name = Subarray().__class__.__name__ - - # Set the chunk sizes for the dask array - # - # Note: The chunks created here are incorrect for the - # compressed dimensions, since these chunk sizes are a - # function of the tie point indices which haven't yet - # been accessed. Therefore, the chunks for the - # compressed dimensons need to be redefined later. - chunks = normalize_chunks( - self.subarray_shapes(chunks), - shape=self.shape, - dtype=dtype, - ) - - # Re-initialise the chunks - u_dims = list(compressed_dimensions) - chunks = [[] if i in u_dims else c for i, c in enumerate(chunks)] - - # For each dimension, initialise the index of the chunk - # previously created (prior to the chunk currently being - # created). The value -1 is an arbitrary negative value that is - # always less than any chunk index, which is always a natural - # number. - previous_chunk_location = [-1] * len(chunks) - - dsk = {} - for ( - u_indices, - u_shape, - c_indices, - subarea_indices, - first, - chunk_location, - ) in zip(*self.subarrays(shapes=chunks)): - subarray = Subarray( - data=compressed_data, - indices=c_indices, - shape=u_shape, - compressed_dimensions=compressed_dimensions, - first=first, - subarea_indices=subarea_indices, - parameters=parameters, - dependent_tie_points=dependent_tie_points, - context_manager=context, - ) - - key = f"{subarray_name}-{tokenize(subarray)}" - dsk[key] = subarray - dsk[name + chunk_location] = ( - getter, - key, - Ellipsis, - False, - False, - ) - - # Add correct chunk sizes for compressed dimensions - for d in u_dims[:]: - previous = previous_chunk_location[d] - new = chunk_location[d] - if new > previous: - chunks[d].append(u_shape[d]) - previous_chunk_location[d] = new - elif new < previous: - # No more chunk sizes required for this compressed - # dimension - u_dims.remove(d) - - chunks = [tuple(c) for c in chunks] - - # Return the dask array - return da.Array(dsk, name[0], chunks=chunks, dtype=dtype) + pass diff --git a/cf/data/array/umarray.py b/cf/data/array/umarray.py index d9caa832e4..9fa40748be 100644 --- a/cf/data/array/umarray.py +++ b/cf/data/array/umarray.py @@ -26,7 +26,7 @@ def __init__( unpack=True, attributes=None, storage_options=None, - min_file_versions=None, +# min_file_versions=None, source=None, copy=True, ): @@ -109,7 +109,7 @@ def __init__( unpack=unpack, attributes=attributes, storage_options=storage_options, - min_file_versions=min_file_versions, +# min_file_versions=min_file_versions, source=source, copy=copy, ) diff --git a/cf/mixin/propertiesdata.py b/cf/mixin/propertiesdata.py index 3ec83f258a..3111209837 100644 --- a/cf/mixin/propertiesdata.py +++ b/cf/mixin/propertiesdata.py @@ -1626,38 +1626,38 @@ def units(self): self.Units = Units(None, getattr(self, "calendar", None)) - def add_file_location(self, location): - """Add a new file location in-place. - - All data definitions that reference files are additionally - referenced from the given location. - - .. versionadded:: 3.15.0 - - .. seealso:: `del_file_location`, `file_locations` - - :Parameters: - - location: `str` - The new location. - - :Returns: - - `str` - The new location as an absolute path with no trailing - path name component separator. - - **Examples** - - >>> d.add_file_location('/data/model/') - '/data/model' - - """ - data = self.get_data(None, _fill_value=False, _units=False) - if data is not None: - return data.add_file_location(location) - - return abspath(location).rstrip(sep) +# def add_file_location(self, location): +# """Add a new file location in-place. +# +# All data definitions that reference files are additionally +# referenced from the given location. +# +# .. versionadded:: 3.15.0 +# +# .. seealso:: `del_file_location`, `file_locations` +# +# :Parameters: +# +# location: `str` +# The new location. +# +# :Returns: +# +# `str` +# The new location as an absolute path with no trailing +# path name component separator. +# +# **Examples** +# +# >>> d.add_file_location('/data/model/') +# '/data/model' +# +# """ +# data = self.get_data(None, _fill_value=False, _units=False) +# if data is not None: +# return data.add_file_location(location) +# +# return abspath(location).rstrip(sep) @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) @@ -3066,38 +3066,38 @@ def datum(self, *index): return data.datum(*index) - def del_file_location(self, location): - """Remove a file location in-place. - - All data definitions that reference files will have references - to files in the given location removed from them. - - .. versionadded:: 3.15.0 - - .. seealso:: `add_file_location`, `file_locations` - - :Parameters: - - location: `str` - The file location to remove. - - :Returns: - - `str` - The removed location as an absolute path with no - trailing path name component separator. - - **Examples** - - >>> f.del_file_location('/data/model/') - '/data/model' - - """ - data = self.get_data(None, _fill_value=False, _units=False) - if data is not None: - return data.del_file_location(location) - - return abspath(location).rstrip(sep) +# def del_file_location(self, location): +# """Remove a file location in-place. +# +# All data definitions that reference files will have references +# to files in the given location removed from them. +# +# .. versionadded:: 3.15.0 +# +# .. seealso:: `add_file_location`, `file_locations` +# +# :Parameters: +# +# location: `str` +# The file location to remove. +# +# :Returns: +# +# `str` +# The removed location as an absolute path with no +# trailing path name component separator. +# +# **Examples** +# +# >>> f.del_file_location('/data/model/') +# '/data/model' +# +# """ +# data = self.get_data(None, _fill_value=False, _units=False) +# if data is not None: +# return data.del_file_location(location) +# +# return abspath(location).rstrip(sep) @_manage_log_level_via_verbosity def equals( @@ -3429,7 +3429,7 @@ def convert_reference_time( ) def file_locations(self): - """The locations of files containing parts of the data. + """TODOCFA The locations of files containing parts of the data. Returns the locations of any files that may be required to deliver the computed data array. @@ -3449,10 +3449,40 @@ def file_locations(self): >>> d.file_locations() {'/home/data1', 'file:///data2'} + """ + _DEPRECATION_ERROR_METHOD( + self, + "file_locations", + "Use method 'file_directories' instead", + version="NEXTVERSION", + removed_at="4.0.0", + ) # pragma: no cover + + def file_directories(self): + """TODOCFA The locations of files containing parts of the data. + + Returns the locations of any files that may be required to + deliver the computed data array. + + .. versionadded:: NEXTVERSION + + .. seealso:: `add_file_location`, `del_file_location` + + :Returns: + + `set` + The unique file locations as absolute paths with no + trailing path name component separator. + + **Examples** + + >>> d.file_locations() + {'/home/data1', 'file:///data2'} + """ data = self.get_data(None, _fill_value=False, _units=False) if data is not None: - return data.file_locations() + return data.file_directories() return set() diff --git a/cf/read_write/netcdf/__init__.py b/cf/read_write/netcdf/__init__.py index 6fcf9698b2..94adcca6b6 100644 --- a/cf/read_write/netcdf/__init__.py +++ b/cf/read_write/netcdf/__init__.py @@ -1,2 +1,2 @@ -from .netcdfread import NetCDFRead +#from .netcdfread import NetCDFRead from .netcdfwrite import NetCDFWrite diff --git a/cf/read_write/netcdf/netcdfread.py b/cf/read_write/netcdf/netcdfread.py index 95cbf8911d..8e343fd72b 100644 --- a/cf/read_write/netcdf/netcdfread.py +++ b/cf/read_write/netcdf/netcdfread.py @@ -1,16 +1,16 @@ -import cfdm - -# from packaging.version import Version - - -class NetCDFRead(cfdm.read_write.netcdf.NetCDFRead): - """A container for instantiating Fields from a netCDF dataset. - - .. versionadded:: 3.0.0 - - """ - - +#import cfdm +# +## from packaging.version import Version +# +# +#class NetCDFRead(cfdm.read_write.netcdf.NetCDFRead): +# """A container for instantiating Fields from a netCDF dataset. +# +# .. versionadded:: 3.0.0 +# +# """ +# +# # def _ncdimensions(self, ncvar, ncdimensions=None, parent_ncvar=None): # """Return a list of the netCDF dimensions corresponding to a # netCDF variable. diff --git a/cf/read_write/netcdf/netcdfwrite.py b/cf/read_write/netcdf/netcdfwrite.py index 81217bff10..d488ba1260 100644 --- a/cf/read_write/netcdf/netcdfwrite.py +++ b/cf/read_write/netcdf/netcdfwrite.py @@ -1,28 +1,22 @@ -# from os import remove -# import cfdm -from .netcdfread import NetCDFRead - -# import dask.array as da -# import numpy as np -# from cfdm.data.dask_utils import cfdm_asanyarray +#from .netcdfread import NetCDFRead class NetCDFWrite(cfdm.read_write.netcdf.NetCDFWrite): """A container for writing Fields to a netCDF dataset.""" - def __new__(cls, *args, **kwargs): - """Store the NetCDFRead class. - - .. note:: If a child class requires a different NetCDFRead class - than the one defined here, then it must be redefined in the - child class. - - """ - instance = super().__new__(cls) - instance._NetCDFRead = NetCDFRead - return instance + #def __new__(cls, *args, **kwargs): + # """Store the NetCDFRead class. + # + # .. note:: If a child class requires a different NetCDFRead class + # than the one defined here, then it must be redefined in the + # child class. + # + # """ + # instance = super().__new__(cls) + # instance._NetCDFRead = NetCDFRead + # return instance # def _unlimited(self, field, axis): # """Whether an axis is unlimited. diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 7a239d4567..846c20ba01 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -64,7 +64,7 @@ def read( dask_chunks="storage-aligned", store_hdf5_chunks=True, domain=False, - # cfa=None, + cfa=None, cfa_write=None, netcdf_backend=None, storage_options=None, @@ -1173,7 +1173,7 @@ def read( warn_valid=warn_valid, select=select, domain=domain, - # cfa=cfa, + cfa=cfa, cfa_write=cfa_write, netcdf_backend=netcdf_backend, storage_options=storage_options, @@ -1293,7 +1293,7 @@ def _read_a_file( store_hdf5_chunks=True, select=None, domain=False, - # cfa=None, + cfa=None, cfa_write=None, netcdf_backend=None, storage_options=None, @@ -1429,7 +1429,7 @@ def _read_a_file( dask_chunks=dask_chunks, store_hdf5_chunks=store_hdf5_chunks, cache=cache, - # cfa=cfa, + cfa=cfa, cfa_write=cfa_write, ) except MaskError: diff --git a/cf/read_write/write.py b/cf/read_write/write.py index 47d05bbeee..761869d408 100644 --- a/cf/read_write/write.py +++ b/cf/read_write/write.py @@ -2,7 +2,7 @@ from ..cfimplementation import implementation from ..decorators import _manage_log_level_via_verbosity -from ..functions import ( # CFA, +from ..functions import ( _DEPRECATION_ERROR_FUNCTION_KWARG, _DEPRECATION_ERROR_FUNCTION_KWARG_VALUE, flat, From acefc450b11638e4d15d281759a9c971e14da943 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Sat, 16 Nov 2024 18:28:32 +0000 Subject: [PATCH 12/51] dev --- cf/__init__.py | 9 - cf/cfimplementation.py | 39 +- cf/data/array/__init__.py | 3 - cf/data/array/abstract/__init__.py | 2 - cf/data/array/abstract/filearray.py | 80 -- cf/data/array/cfah5netcdfarray.py | 10 - cf/data/array/cfanetcdf4array.py | 10 - cf/data/array/fullarray.py | 254 +---- cf/data/array/h5netcdfarray.py | 3 +- cf/data/array/locks.py | 4 - cf/data/array/mixin/arraymixin.py | 45 - cf/data/array/mixin/cfamixin.py | 858 --------------- cf/data/array/mixin/compressedarraymixin.py | 132 --- cf/data/array/mixin/filearraymixin.py | 219 ---- cf/data/array/mixin/indexmixin.py | 366 ------- cf/data/array/netcdf4array.py | 1 + cf/data/array/pointtopologyarray.py | 2 +- cf/data/array/umarray.py | 2 - cf/data/data.py | 345 ------ cf/data/fragment/fragmentumarray.py | 105 -- cf/data/fragment/fullfragmentarray.py | 91 -- cf/data/fragment/h5netcdffragmentarray.py | 97 -- cf/data/fragment/mixin/__init__.py | 1 - cf/data/fragment/mixin/fragmentarraymixin.py | 258 ----- cf/data/fragment/netcdf4fragmentarray.py | 108 -- cf/data/fragment/netcdffragmentarray.py | 239 ----- cf/domain.py | 98 -- cf/field.py | 77 -- cf/mixin/propertiesdata.py | 226 ---- cf/mixin/propertiesdatabounds.py | 227 ---- cf/mixin2/__init__.py | 1 - cf/mixin2/cfanetcdf.py | 509 --------- cf/read_write/netcdf/__init__.py | 1 - cf/read_write/netcdf/netcdfread.py | 1006 ------------------ cf/read_write/netcdf/netcdfwrite.py | 885 --------------- cf/read_write/read.py | 33 +- cf/read_write/write.py | 52 - cf/test/test_CFA.py | 918 ++++++++-------- 38 files changed, 468 insertions(+), 6848 deletions(-) delete mode 100644 cf/data/array/abstract/filearray.py delete mode 100644 cf/data/array/cfah5netcdfarray.py delete mode 100644 cf/data/array/cfanetcdf4array.py delete mode 100644 cf/data/array/locks.py delete mode 100644 cf/data/array/mixin/arraymixin.py delete mode 100644 cf/data/array/mixin/cfamixin.py delete mode 100644 cf/data/array/mixin/compressedarraymixin.py delete mode 100644 cf/data/array/mixin/filearraymixin.py delete mode 100644 cf/data/array/mixin/indexmixin.py delete mode 100644 cf/data/fragment/fullfragmentarray.py delete mode 100644 cf/data/fragment/h5netcdffragmentarray.py delete mode 100644 cf/data/fragment/mixin/__init__.py delete mode 100644 cf/data/fragment/mixin/fragmentarraymixin.py delete mode 100644 cf/data/fragment/netcdf4fragmentarray.py delete mode 100644 cf/data/fragment/netcdffragmentarray.py delete mode 100644 cf/mixin2/cfanetcdf.py delete mode 100644 cf/read_write/netcdf/netcdfread.py diff --git a/cf/__init__.py b/cf/__init__.py index e22a94b116..79d5de82c7 100644 --- a/cf/__init__.py +++ b/cf/__init__.py @@ -105,7 +105,6 @@ raise ImportError(_error0 + str(error1)) __cf_version__ = cfdm.core.__cf_version__ -# __cfa_version__ = "0.6.2" from packaging.version import Version import importlib.util @@ -276,8 +275,6 @@ AggregatedArray, BoundsFromNodesArray, CellConnectivityArray, - # CFAH5netcdfArray, - # CFANetCDF4Array, FullArray, GatheredArray, H5netcdfArray, @@ -291,12 +288,6 @@ UMArray, ) -# from .data.fragment import ( -# FullFragmentArray, -# NetCDFFragmentArray, -# UMFragmentArray, -# ) - from .aggregate import aggregate, climatology_cells from .query import ( Query, diff --git a/cf/cfimplementation.py b/cf/cfimplementation.py index 5a059d9bb3..3de62364df 100644 --- a/cf/cfimplementation.py +++ b/cf/cfimplementation.py @@ -26,7 +26,7 @@ TiePointIndex, ) from .data import Data -from .data.array import ( # CFAH5netcdfArray,; CFANetCDF4Array, +from .data.array import ( AggregatedArray, BoundsFromNodesArray, CellConnectivityArray, @@ -114,41 +114,6 @@ def set_construct(self, parent, construct, axes=None, copy=True, **kwargs): ) -# def initialise_CFANetCDF4Array(self, **kwargs): -# """Return a `CFANetCDF4Array` instance. -# -# :Parameters: -# -# kwargs: optional -# Initialisation parameters to pass to the new instance. -# -# :Returns: -# -# `CFANetCDF4Array` -# -# """ -# cls = self.get_class("CFANetCDF4Array") -# return cls(**kwargs) -# -# def initialise_CFAH5netcdfArray(self, **kwargs): -# """Return a `CFAH5netcdfArray` instance. -# -# .. versionadded:: NEXTVERSION -# -# :Parameters: -# -# kwargs: optional -# Initialisation parameters to pass to the new instance. -# -# :Returns: -# -# `CFAH5netcdfArray` -# -# """ -# cls = self.get_class("CFAH5netcdfArray") -# return cls(**kwargs) - - _implementation = CFImplementation( cf_version=CF(), AggregatedArray=AggregatedArray, @@ -156,8 +121,6 @@ def set_construct(self, parent, construct, axes=None, copy=True, **kwargs): CellConnectivity=CellConnectivity, CellMeasure=CellMeasure, CellMethod=CellMethod, - # CFAH5netcdfArray=CFAH5netcdfArray, - # CFANetCDF4Array=CFANetCDF4Array, CoordinateReference=CoordinateReference, DimensionCoordinate=DimensionCoordinate, Domain=Domain, diff --git a/cf/data/array/__init__.py b/cf/data/array/__init__.py index 54edaa65ec..693fec0fb4 100644 --- a/cf/data/array/__init__.py +++ b/cf/data/array/__init__.py @@ -1,9 +1,6 @@ from .aggregatedarray import AggregatedArray from .boundsfromnodesarray import BoundsFromNodesArray from .cellconnectivityarray import CellConnectivityArray - -# from .cfah5netcdfarray import CFAH5netcdfArray -# from .cfanetcdf4array import CFANetCDF4Array from .fullarray import FullArray from .gatheredarray import GatheredArray from .h5netcdfarray import H5netcdfArray diff --git a/cf/data/array/abstract/__init__.py b/cf/data/array/abstract/__init__.py index afe3c59ad7..1dd4744403 100644 --- a/cf/data/array/abstract/__init__.py +++ b/cf/data/array/abstract/__init__.py @@ -1,3 +1 @@ from .array import Array - -# from .filearray import FileArray diff --git a/cf/data/array/abstract/filearray.py b/cf/data/array/abstract/filearray.py deleted file mode 100644 index 96e7dcbd7c..0000000000 --- a/cf/data/array/abstract/filearray.py +++ /dev/null @@ -1,80 +0,0 @@ -# from ....functions import _DEPRECATION_ERROR_ATTRIBUTE -# from ..mixin import FileArrayMixin -# from .array import Array -# -# -# class FileArray(FileArrayMixin, Array): -# """Abstract base class for an array stored in a file.""" -# -# def __getitem__(self, indices): -# """Return a subspace of the array. -# -# x.__getitem__(indices) <==> x[indices] -# -# Returns a subspace of the array as an independent numpy array. -# -# """ -# raise NotImplementedError( -# f"Must implement {self.__class__.__name__}.__getitem__" -# ) # pragma: no cover -# -# def __repr__(self): -# """x.__repr__() <==> repr(x)""" -# return f"" -# -# def __str__(self): -# """x.__str__() <==> str(x)""" -# return f"{self.get_filename()}, {self.get_address()}" -# -# @property -# def dtype(self): -# """Data-type of the array.""" -# return self._get_component("dtype") -# -# @property -# def filename(self): -# """The name of the file containing the array. -# -# Deprecated at version 3.14.0. Use method `get_filename` instead. -# -# """ -# _DEPRECATION_ERROR_ATTRIBUTE( -# self, -# "filename", -# message="Use method 'get_filename' instead.", -# version="3.14.0", -# removed_at="5.0.0", -# ) # pragma: no cover -# -# @property -# def shape(self): -# """Shape of the array.""" -# return self._get_component("shape") -# -# def close(self): -# """Close the dataset containing the data.""" -# raise NotImplementedError( -# f"Must implement {self.__class__.__name__}.close" -# ) # pragma: no cover -# -# def get_address(self): -# """The address in the file of the variable. -# -# .. versionadded:: 3.14.0 -# -# :Returns: -# -# `str` or `None` -# The address, or `None` if there isn't one. -# -# """ -# raise NotImplementedError( -# f"Must implement {self.__class__.__name__}.get_address " -# "in subclasses" -# ) # pragma: no cover -# -# def open(self): -# """Returns an open dataset containing the data array.""" -# raise NotImplementedError( -# f"Must implement {self.__class__.__name__}.open" -# ) # pragma: no cover diff --git a/cf/data/array/cfah5netcdfarray.py b/cf/data/array/cfah5netcdfarray.py deleted file mode 100644 index 950b9d7b44..0000000000 --- a/cf/data/array/cfah5netcdfarray.py +++ /dev/null @@ -1,10 +0,0 @@ -# from .h5netcdfarray import H5netcdfArray -# from .mixin import CFAMixin -# -# -# class CFAH5netcdfArray(CFAMixin, H5netcdfArray): -# """A CFA-netCDF array accessed with `h5netcdf` -# -# .. versionadded:: NEXTVERSION -# -# """ diff --git a/cf/data/array/cfanetcdf4array.py b/cf/data/array/cfanetcdf4array.py deleted file mode 100644 index c0c991e496..0000000000 --- a/cf/data/array/cfanetcdf4array.py +++ /dev/null @@ -1,10 +0,0 @@ -# from .mixin import CFAMixin -# from .netcdf4array import NetCDF4Array -# -# -# class CFANetCDF4Array(CFAMixin, NetCDF4Array): -# """A CFA-netCDF array accessed with `netCDF4`. -# -# .. versionadded:: NEXTVERSION -# -# """ diff --git a/cf/data/array/fullarray.py b/cf/data/array/fullarray.py index 678b84ca48..b7b4ff19ee 100644 --- a/cf/data/array/fullarray.py +++ b/cf/data/array/fullarray.py @@ -1,16 +1,9 @@ import cfdm from ...mixin_container import Container -#import numpy as np -#from cfdm.data.mixin import IndexMixin -# -#from ...functions import indices_shape, parse_indices -#from .abstract import Array -#_FULLARRAY_HANDLED_FUNCTIONS = {} - -class FullArray(Container, cfdm.FullArray): #IndexMixin, Array): +class FullArray(Container, cfdm.FullArray): """A array filled with a given value. The array may be empty or all missing values. @@ -18,248 +11,3 @@ class FullArray(Container, cfdm.FullArray): #IndexMixin, Array): .. versionadded:: 3.14.0 """ - -# def __init__( -# self, -# fill_value=None, -# dtype=None, -# shape=None, -# attributes=None, -# source=None, -# copy=True, -# ): -# """**Initialisation** -# -# :Parameters: -# -# fill_value : scalar, optional -# The fill value for the array. May be set to -# `cf.masked` or `np.ma.masked`. -# -# dtype: `numpy.dtype` -# The data type of the array. -# -# shape: `tuple` -# The array dimension sizes. -# -# {{init attributes: `dict` or `None`, optional}} -# -# .. versionadded:: NEXTVERSION -# -# {{init source: optional}} -# -# {{init copy: `bool`, optional}} -# -# units: `str` or `None`, optional -# Deprecated at version NEXTVERSION. Use the -# *attributes* parameter instead. -# -# calendar: `str` or `None`, optional -# Deprecated at version NEXTVERSION. Use the -# *attributes* parameter instead. -# -# """ -# super().__init__(source=source, copy=copy) -# -# if source is not None: -# try: -# fill_value = source._get_component("full_value", None) -# except AttributeError: -# fill_value = None -# -# try: -# dtype = source._get_component("dtype", None) -# except AttributeError: -# dtype = None -# -# try: -# shape = source._get_component("shape", None) -# except AttributeError: -# shape = None -# -# try: -# attributes = source._get_component("attributes", False) -# except AttributeError: -# attributes = None -# -# self._set_component("full_value", fill_value, copy=False) -# self._set_component("dtype", dtype, copy=False) -# self._set_component("shape", shape, copy=False) -# self._set_component("attributes", attributes, copy=False) -# -# def __array_function__(self, func, types, args, kwargs): -# """The `numpy` `__array_function__` protocol. -# -# .. versionadded:: 3.15.0 -# -# """ -# if func not in _FULLARRAY_HANDLED_FUNCTIONS: -# return NotImplemented -# -# # Note: This allows subclasses that don't override -# # __array_function__ to handle FullArray objects -# if not all(issubclass(t, self.__class__) for t in types): -# return NotImplemented -# -# return _FULLARRAY_HANDLED_FUNCTIONS[func](*args, **kwargs) -# -# def __repr__(self): -# """Called by the `repr` built-in function. -# -# x.__repr__() <==> repr(x) -# -# """ -# return f"" -# -# def __str__(self): -# """Called by the `str` built-in function. -# -# x.__str__() <==> str(x) -# -# """ -# fill_value = self.get_full_value() -# if fill_value is None: -# return "Uninitialised" -# -# return f"Filled with {fill_value!r}" -# -# def _get_array(self, index=None): -# """Returns the full array. -# -# .. versionadded:: NEXTVERSION -# -# .. seealso:: `__array__`, `index` -# -# :Parameters: -# -# {{index: `tuple` or `None`, optional}} -# -# :Returns: -# -# `numpy.ndarray` -# The subspace. -# -# """ -# if index is None: -# shape = self.shape -# else: -# original_shape = self.original_shape -# index = parse_indices(original_shape, index, keepdims=False) -# shape = indices_shape(index, original_shape, keepdims=False) -# -# fill_value = self.get_full_value() -# if fill_value is np.ma.masked: -# array = np.ma.masked_all(shape, dtype=self.dtype) -# elif fill_value is not None: -# array = np.full(shape, fill_value=fill_value, dtype=self.dtype) -# else: -# array = np.empty(shape, dtype=self.dtype) -# -# return array -# -# @property -# def array(self): -# """Return an independent numpy array containing the data. -# -# .. versionadded:: NEXTVERSION -# -# :Returns: -# -# `numpy.ndarray` -# An independent numpy array of the data. -# """ -# return np.asanyarray(self) -# -# @property -# def dtype(self): -# """Data-type of the data elements.""" -# return self._get_component("dtype") -# -# @property -# def shape(self): -# """Tuple of array dimension sizes.""" -# return self._get_component("shape") -# -# def get_full_value(self, default=AttributeError()): -# """Return the data array fill value. -# -# .. versionadded:: 3.14.0 -# -# .. seealso:: `set_full_value` -# -# :Parameters: -# -# default: optional -# Return the value of the *default* parameter if the -# fill value has not been set. If set to an `Exception` -# instance then it will be raised instead. -# -# :Returns: -# -# The fill value. -# -# """ -# return self._get_component("full_value", default=default) -# -# def set_full_value(self, fill_value): -# """Set the data array fill value. -# -# .. versionadded:: 3.14.0 -# -# .. seealso:: `get_full_value` -# -# :Parameters: -# -# fill_value : scalar, optional -# The fill value for the array. May be set to -# `cf.masked` or `np.ma.masked`. -# -# :Returns: -# -# `None` -# -# """ -# self._set_component("full_value", fill_value, copy=False) -# -# -#def fullarray_implements(numpy_function): -# """Register an __array_function__ implementation for FullArray objects. -# -# .. versionadded:: 3.15.0 -# -# """ -# -# def decorator(func): -# _FULLARRAY_HANDLED_FUNCTIONS[numpy_function] = func -# return func -# -# return decorator -# -# -#@fullarray_implements(np.unique) -#def unique( -# a, return_index=False, return_inverse=False, return_counts=False, axis=None -#): -# """Version of `np.unique` that is optimised for `FullArray` objects. -# -# .. versionadded:: 3.15.0 -# -# """ -# if return_index or return_inverse or return_counts or axis is not None: -# # Fall back to the slow unique. (I'm sure we could probably do -# # something more clever here, but there is no use case at -# # present.) -# return np.unique( -# a[...], -# return_index=return_index, -# return_inverse=return_inverse, -# return_counts=return_counts, -# axis=axis, -# ) -# -# # Fast unique based on the full value -# x = a.get_full_value() -# if x is np.ma.masked: -# return np.ma.masked_all((1,), dtype=a.dtype) -# -# return np.array([x], dtype=a.dtype) diff --git a/cf/data/array/h5netcdfarray.py b/cf/data/array/h5netcdfarray.py index b82f054127..578e0b90ae 100644 --- a/cf/data/array/h5netcdfarray.py +++ b/cf/data/array/h5netcdfarray.py @@ -1,8 +1,7 @@ import cfdm from ...mixin_container import Container - -from .mixin import ActiveStorageMixin +from .mixin import ActiveStorageMixin class H5netcdfArray( diff --git a/cf/data/array/locks.py b/cf/data/array/locks.py deleted file mode 100644 index a52ce7a28e..0000000000 --- a/cf/data/array/locks.py +++ /dev/null @@ -1,4 +0,0 @@ -# from dask.utils import SerializableLock -# -# Global lock for netCDF file access -# netcdf_lock = SerializableLock() diff --git a/cf/data/array/mixin/arraymixin.py b/cf/data/array/mixin/arraymixin.py deleted file mode 100644 index 5225723d80..0000000000 --- a/cf/data/array/mixin/arraymixin.py +++ /dev/null @@ -1,45 +0,0 @@ -# import numpy as np -# -# from ....units import Units -# -# -# class ArrayMixin: -# """Mixin class for a container of an array. -# -# .. versionadded:: 3.14.0 -# -# """ -# -# -# def __array_function__(self, func, types, args, kwargs): -# """Implement the `numpy` ``__array_function__`` protocol. -# -# .. versionadded:: 3.14.0 -# -# """ -# return NotImplemented -# -# @property -# def _meta(self): -# """Normalise the array to an appropriate Dask meta object. -# -# The Dask meta can be thought of as a suggestion to Dask. Dask -# uses this meta to generate the task graph until it can infer -# the actual metadata from the values. It does not force the -# output to have the structure or dtype of the specified meta. -# -# .. versionadded:: NEXTVERSION -# -# .. seealso:: `dask.utils.meta_from_array` -# -# """ -# # return np.array((), dtype=self.dtype) -# -# @property -# def Units(self): -# """The `cf.Units` object containing the units of the array. -# -# .. versionadded:: 3.14.0 -# -# """ -# return Units(self.get_units(None), self.get_calendar(None)) diff --git a/cf/data/array/mixin/cfamixin.py b/cf/data/array/mixin/cfamixin.py deleted file mode 100644 index 56682cd94f..0000000000 --- a/cf/data/array/mixin/cfamixin.py +++ /dev/null @@ -1,858 +0,0 @@ -from copy import deepcopy -from functools import partial -from itertools import accumulate, product - -import numpy as np -from cfdm.data.utils import chunk_locations, chunk_positions - - -class CFAMixin: - """Mixin class for a CFA array. - - .. versionadded:: NEXTVERSION - - """ - - def __new__(cls, *args, **kwargs): - """Store fragment array classes. - - .. versionadded:: NEXTVERSION - - """ - # Import fragment array classes. Do this here (as opposed to - # outside the class) to avoid a circular import. - from ...fragment import ( - FullFragmentArray, - NetCDFFragmentArray, - UMFragmentArray, - ) - - instance = super().__new__(cls) - instance._FragmentArray = { - "nc": NetCDFFragmentArray, - "um": UMFragmentArray, - "full": FullFragmentArray, - } - return instance - - def __init__( - self, - filename=None, - address=None, - dtype=None, - mask=True, - unpack=True, - instructions=None, - substitutions=None, - term=None, - attributes=None, - storage_options=None, - source=None, - copy=True, - x=None, - ): - """**Initialisation** - - :Parameters: - - filename: (sequence of) `str`, optional - The name of the CFA file containing the array. If a - sequence then it must contain one element. - - address: (sequence of) `str`, optional - The name of the CFA aggregation variable for the - array. If a sequence then it must contain one element. - - dtype: `numpy.dtype` - The data type of the aggregated data array. May be - `None` if the numpy data-type is not known (which can - be the case for some string types, for example). - - mask: `bool` - If True (the default) then mask by convention when - reading data from disk. - - A array is masked depending on the values of any of - the variable attributes ``valid_min``, ``valid_max``, - ``valid_range``, ``_FillValue`` and ``missing_value``. - - {{init unpack: `bool`, optional}} - - .. versionadded:: NEXTVERSION - - instructions: `str`, optional - The ``aggregated_data`` attribute value as found on - the CFA variable. If set then this will be used to - improve the performance of `__dask_tokenize__`. - - substitutions: `dict`, optional - A dictionary whose key/value pairs define text - substitutions to be applied to the fragment file - names. Each key must be specified with the ``${...}`` - syntax, for instance ``{'${base}': 'sub'}``. - - .. versionadded:: 3.15.0 - - term: `str`, optional - The name of a non-standard aggregation instruction - term from which the array is to be created, instead of - creating the aggregated data in the standard terms. If - set then *address* must be the name of the term's - aggregation instruction variable, which must be - defined on the fragment dimensions and no others. Each - value of the aggregation instruction variable will be - broadcast across the shape of the corresponding - fragment. - - *Parameter example:* - ``address='cfa_tracking_id', term='tracking_id'`` - - .. versionadded:: 3.15.0 - - storage_options: `dict` or `None`, optional - Key/value pairs to be passed on to the creation of - `s3fs.S3FileSystem` file systems to control the - opening of fragment files in S3 object stores. Ignored - for files not in an S3 object store, i.e. those whose - names do not start with ``s3:``. - - By default, or if `None`, then *storage_options* is - taken as ``{}``. - - If the ``'endpoint_url'`` key is not in - *storage_options* or is not in a dictionary defined by - the ``'client_kwargs`` key (which is always the case - when *storage_options* is `None`), then one will be - automatically inserted for accessing a fragment S3 - file. For example, for a file name of - ``'s3://store/data/file.nc'``, an ``'endpoint_url'`` - key with value ``'https://store'`` would be created. - - *Parameter example:* - ``{'key: 'scaleway-api-key...', 'secret': - 'scaleway-secretkey...', 'endpoint_url': - 'https://s3.fr-par.scw.cloud', 'client_kwargs': - {'region_name': 'fr-par'}}`` - - .. versionadded:: NEXTVERSION - - {{init attributes: `dict` or `None`, optional}} - - If *attributes* is `None`, the default, then the - attributes will be set from the netCDF variable during - the first `__getitem__` call. - - .. versionaddedd:: NEXTVERSION - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - units: `str` or `None`, optional - Deprecated at version NEXTVERSION. Use the - *attributes* parameter instead. - - calendar: `str` or `None`, optional - Deprecated at version NEXTVERSION. Use the - *attributes* parameter instead. - - """ - if source is not None: - super().__init__(source=source, copy=copy) - - try: - fragment_shape = source.get_fragment_shape() - except AttributeError: - fragment_shape = None - - try: - instructions = source._get_component("instructions") - except AttributeError: - instructions = None - - try: - aggregated_data = source.get_aggregated_data(copy=False) - except AttributeError: - aggregated_data = {} - - try: - substitutions = source.get_substitutions() - except AttributeError: - substitutions = None - - try: - term = source.get_term() - except AttributeError: - term = None - - elif filename is not None: - shape, fragment_shape, aggregated_data = self._parse_cfa( - x, term, substitutions - ) - super().__init__( - filename=filename, - address=address, - shape=shape, - dtype=dtype, - mask=mask, - attributes=attributes, - copy=copy, - ) - else: - super().__init__( - filename=filename, - address=address, - dtype=dtype, - mask=mask, - attributes=attributes, - copy=copy, - ) - - fragment_shape = None - aggregated_data = None - instructions = None - term = None - - self._set_component("fragment_shape", fragment_shape, copy=False) - self._set_component("aggregated_data", aggregated_data, copy=False) - self._set_component("instructions", instructions, copy=False) - self._set_component("term", term, copy=False) - - if substitutions is not None: - self._set_component( - "substitutions", substitutions.copy(), copy=False - ) - - def _parse_cfa(self, x, term, substitutions): - """Parse the CFA aggregation instructions. - - .. versionadded:: NEXTVERSION - - :Parameters: - - x: `dict` - - term: `str` or `None` - The name of a non-standard aggregation instruction - term from which the array is to be created, instead of - creating the aggregated data in the standard - terms. Each value of the aggregation instruction - variable will be broadcast across the shape of the - corresponding fragment. - - substitutions: `dict` or `None` - A dictionary whose key/value pairs define text - substitutions to be applied to the fragment file - names. Each key must be specified with the ``${...}`` - syntax, for instance ``{'${base}': 'sub'}``. - - :Returns: - - 3-`tuple` - 1. The shape of the aggregated data. - 2. The shape of the array of fragments. - 3. The parsed aggregation instructions. - - """ - aggregated_data = {} - - location = x["location"] - ndim = location.shape[0] - compressed = np.ma.compressed - chunks = [compressed(i).tolist() for i in location] - shape = [sum(c) for c in chunks] - positions = chunk_positions(chunks) - locations = chunk_locations(chunks) - - if term is not None: - # -------------------------------------------------------- - # Each fragment contains a constant value, not file - # locations. - # -------------------------------------------------------- - term = x[term] - fragment_shape = term.shape - aggregated_data = { - frag_loc: { - "location": loc, - "fill_value": term[frag_loc].item(), - "format": "full", - } - for frag_loc, loc in zip(positions, locations) - } - else: - # -------------------------------------------------------- - # Each fragment contains file locations - # -------------------------------------------------------- - a = x["address"] - f = x["file"] - file_fmt = x["format"] - - extra_dimension = f.ndim > ndim - if extra_dimension: - # There is an extra non-fragment dimension - fragment_shape = f.shape[:-1] - else: - fragment_shape = f.shape - - if not a.ndim: - a = (a.item(),) - scalar_address = True - else: - scalar_address = False - - if not file_fmt.ndim: - file_fmt = file_fmt.item() - scalar_fmt = True - else: - scalar_fmt = False - - for frag_loc, location in zip(positions, locations): - if extra_dimension: - filename = compressed(f[frag_loc]).tolist() - if scalar_address: - address = a * len(filename) - else: - address = compressed(a[frag_loc].tolist()) - - if scalar_fmt: - fmt = file_fmt - else: - fmt = compressed(file_fmt[frag_loc]).tolist() - else: - filename = (f[frag_loc].item(),) - if scalar_address: - address = a - else: - address = (a[frag_loc].item(),) - - if scalar_fmt: - fmt = file_fmt - else: - fmt = file_fmt[frag_loc].item() - - aggregated_data[frag_loc] = { - "location": location, - "filename": filename, - "address": address, - "format": fmt, - } - - # Apply string substitutions to the fragment filenames - if substitutions: - for value in aggregated_data.values(): - filenames2 = [] - for filename in value["filename"]: - for base, sub in substitutions.items(): - filename = filename.replace(base, sub) - - filenames2.append(filename) - - value["filename"] = filenames2 - - return shape, fragment_shape, aggregated_data - - def __dask_tokenize__(self): - """Used by `dask.base.tokenize`. - - .. versionadded:: 3.14.0 - - """ - out = super().__dask_tokenize__() - aggregated_data = self._get_component("instructions", None) - if aggregated_data is None: - aggregated_data = self.get_aggregated_data(copy=False) - - return out + (aggregated_data,) - - def __getitem__(self, indices): - """x.__getitem__(indices) <==> x[indices]""" - return NotImplemented # pragma: no cover - - def get_aggregated_data(self, copy=True): - """Get the aggregation data dictionary. - - The aggregation data dictionary contains the definitions of - the fragments and the instructions on how to aggregate them. - The keys are indices of the CFA fragment dimensions, - e.g. ``(1, 0, 0 ,0)``. - - .. versionadded:: 3.14.0 - - :Parameters: - - copy: `bool`, optional - Whether or not to return a copy of the aggregation - dictionary. By default a deep copy is returned. - - .. warning:: If False then changing the returned - dictionary in-place will change the - aggregation dictionary stored in the - {{class}} instance, **as well as in any - copies of it**. - - :Returns: - - `dict` - The aggregation data dictionary. - - **Examples** - - >>> a.shape - (12, 1, 73, 144) - >>> a.get_fragment_shape() - (2, 1, 1, 1) - >>> a.get_aggregated_data() - {(0, 0, 0, 0): { - 'file': ('January-June.nc',), - 'address': ('temp',), - 'format': 'nc', - 'location': [(0, 6), (0, 1), (0, 73), (0, 144)]}, - (1, 0, 0, 0): { - 'file': ('July-December.nc',), - 'address': ('temp',), - 'format': 'nc', - 'location': [(6, 12), (0, 1), (0, 73), (0, 144)]}} - - """ - aggregated_data = self._get_component("aggregated_data") - if copy: - aggregated_data = deepcopy(aggregated_data) - - return aggregated_data - - def get_fragmented_dimensions(self): - """Get the positions of dimensions that have two or more fragments. - - .. versionadded:: 3.14.0 - - :Returns: - - `list` - The dimension positions. - - **Examples** - - >>> a.get_fragment_shape() - (20, 1, 40, 1) - >>> a.get_fragmented_dimensions() - [0, 2] - - >>> a.get_fragment_shape() - (1, 1, 1) - >>> a.get_fragmented_dimensions() - [] - - """ - return [ - i for i, size in enumerate(self.get_fragment_shape()) if size > 1 - ] - - def get_fragment_shape(self): - """Get the sizes of the fragment dimensions. - - The fragment dimension sizes are given in the same order as - the aggregated dimension sizes given by `shape`. - - .. versionadded:: 3.14.0 - - :Returns: - - `tuple` - The shape of the fragment dimensions. - - """ - return self._get_component("fragment_shape") - - def get_storage_options(self): - """Return `s3fs.S3FileSystem` options for accessing S3 fragment files. - - .. versionadded:: NEXTVERSION - - :Returns: - - `dict` or `None` - The `s3fs.S3FileSystem` options. - - **Examples** - - >>> f.get_storage_options() - {} - - >>> f.get_storage_options() - {'anon': True} - - >>> f.get_storage_options() - {'key: 'scaleway-api-key...', - 'secret': 'scaleway-secretkey...', - 'endpoint_url': 'https://s3.fr-par.scw.cloud', - 'client_kwargs': {'region_name': 'fr-par'}} - - """ - return super().get_storage_options(create_endpoint_url=False) - - def get_term(self, default=ValueError()): - """The CFA aggregation instruction term for the data, if set. - - .. versionadded:: 3.15.0 - - :Parameters: - - default: optional - Return the value of the *default* parameter if the - term has not been set. If set to an `Exception` - instance then it will be raised instead. - - :Returns: - - `str` - The CFA aggregation instruction term name. - - """ - return self._get_component("term", default=default) - - def subarray_shapes(self, shapes): - """Create the subarray shapes. - - A fragmented dimension (i.e. one spanned by two or more - fragments) will always have a subarray size equal to the - size of each of its fragments, overriding any other size - implied by the *shapes* parameter. - - .. versionadded:: 3.14.0 - - .. seealso:: `subarrays` - - :Parameters: - - shapes: `int`, sequence, `dict` or `str`, optional - Define the subarray shapes. - - Any value accepted by the *chunks* parameter of the - `dask.array.from_array` function is allowed. - - The subarray sizes implied by *chunks* for a dimension - that has been fragmented are ignored, so their - specification is arbitrary. - - :Returns: - - `tuple` - The subarray sizes along each dimension. - - **Examples** - - >>> a.shape - (12, 1, 73, 144) - >>> a.get_fragment_shape() - (2, 1, 1, 1) - >>> a.fragmented_dimensions() - [0] - >>> a.subarray_shapes(-1) - ((6, 6), (1,), (73,), (144,)) - >>> a.subarray_shapes(None) - ((6, 6), (1,), (73,), (144,)) - >>> a.subarray_shapes("auto") - ((6, 6), (1,), (73,), (144,)) - >>> a.subarray_shapes((None, 1, 40, 50)) - ((6, 6), (1,), (40, 33), (50, 50, 44)) - >>> a.subarray_shapes((None, None, "auto", 50)) - ((6, 6), (1,), (73,), (50, 50, 44)) - >>> a.subarray_shapes({2: 40}) - ((6, 6), (1,), (40, 33), (144,)) - - """ - from numbers import Number - - from dask.array.core import normalize_chunks - - # Positions of fragmented dimensions (i.e. those spanned by - # two or more fragments) - f_dims = self.get_fragmented_dimensions() - - shape = self.shape - aggregated_data = self.get_aggregated_data(copy=False) - - # Create the base chunks. - chunks = [] - ndim = self.ndim - for dim, (n_fragments, size) in enumerate( - zip(self.get_fragment_shape(), self.shape) - ): - if dim in f_dims: - # This aggregated dimension is spanned by two or more - # fragments => set the chunks to be the same size as - # each fragment. - c = [] - index = [0] * ndim - for j in range(n_fragments): - index[dim] = j - loc = aggregated_data[tuple(index)]["location"][dim] - chunk_size = loc[1] - loc[0] - c.append(chunk_size) - - chunks.append(tuple(c)) - else: - # This aggregated dimension is spanned by exactly one - # fragment => store `None` for now. This will get - # overwritten from 'shapes'. - chunks.append(None) - - if isinstance(shapes, (str, Number)) or shapes is None: - chunks = [ - c if i in f_dims else shapes for i, c in enumerate(chunks) - ] - elif isinstance(shapes, dict): - chunks = [ - chunks[i] if i in f_dims else shapes.get(i, "auto") - for i, c in enumerate(chunks) - ] - else: - # chunks is a sequence - if len(shapes) != ndim: - raise ValueError( - f"Wrong number of 'shapes' elements in {shapes}: " - f"Got {len(shapes)}, expected {self.ndim}" - ) - - chunks = [ - c if i in f_dims else shapes[i] for i, c in enumerate(chunks) - ] - - return normalize_chunks(chunks, shape=shape, dtype=self.dtype) - - def subarrays(self, subarray_shapes): - """Return descriptors for every subarray. - - .. versionadded:: 3.14.0 - - .. seealso:: `subarray_shapes` - - :Parameters: - - subarray_shapes: `tuple` - The subarray sizes along each dimension, as returned - by a prior call to `subarray_shapes`. - - :Returns: - - 6-`tuple` of iterators - Each iterator iterates over a particular descriptor - from each subarray. - - 1. The indices of the aggregated array that correspond - to each subarray. - - 2. The shape of each subarray. - - 3. The indices of the fragment that corresponds to each - subarray (some subarrays may be represented by a - part of a fragment). - - 4. The location of each subarray. - - 5. The location on the fragment dimensions of the - fragment that corresponds to each subarray. - - 6. The shape of each fragment that overlaps each chunk. - - **Examples** - - An aggregated array with shape (12, 73, 144) has two - fragments, both with with shape (6, 73, 144). - - >>> a.shape - (12, 73, 144) - >>> a.get_fragment_shape() - (2, 1, 1) - >>> a.fragmented_dimensions() - [0] - >>> subarray_shapes = a.subarray_shapes({1: 40}) - >>> print(subarray_shapes) - ((6, 6), (40, 33), (144,)) - >>> ( - ... u_indices, - ... u_shapes, - ... f_indices, - ... s_locations, - ... f_locations, - ... f_shapes, - ... ) = a.subarrays(subarray_shapes) - >>> for i in u_indices: - ... print(i) - ... - (slice(0, 6, None), slice(0, 40, None), slice(0, 144, None)) - (slice(0, 6, None), slice(40, 73, None), slice(0, 144, None)) - (slice(6, 12, None), slice(0, 40, None), slice(0, 144, None)) - (slice(6, 12, None), slice(40, 73, None), slice(0, 144, None)) - - >>> for i in u_shapes - ... print(i) - ... - (6, 40, 144) - (6, 33, 144) - (6, 40, 144) - (6, 33, 144) - >>> for i in f_indices: - ... print(i) - ... - (slice(None, None, None), slice(0, 40, None), slice(0, 144, None)) - (slice(None, None, None), slice(40, 73, None), slice(0, 144, None)) - (slice(None, None, None), slice(0, 40, None), slice(0, 144, None)) - (slice(None, None, None), slice(40, 73, None), slice(0, 144, None)) - >>> for i in s_locations: - ... print(i) - ... - (0, 0, 0) - (0, 1, 0) - (1, 0, 0) - (1, 1, 0) - >>> for i in f_locations: - ... print(i) - ... - (0, 0, 0) - (0, 0, 0) - (1, 0, 0) - (1, 0, 0) - >>> for i in f_shapes: - ... print(i) - ... - (6, 73, 144) - (6, 73, 144) - (6, 73, 144) - (6, 73, 144) - - """ - f_dims = self.get_fragmented_dimensions() - - # The indices of the uncompressed array that correspond to - # each subarray, the shape of each uncompressed subarray, and - # the location of each subarray - s_locations = [] - u_shapes = [] - u_indices = [] - f_locations = [] - for dim, c in enumerate(subarray_shapes): - nc = len(c) - s_locations.append(tuple(range(nc))) - u_shapes.append(c) - - if dim in f_dims: - f_locations.append(tuple(range(nc))) - else: - # No fragmentation along this dimension - f_locations.append((0,) * nc) - - c = tuple(accumulate((0,) + c)) - u_indices.append([slice(i, j) for i, j in zip(c[:-1], c[1:])]) - - # For each subarray, the part of the fragment that corresponds - # to it. - f_indices = [ - (slice(None),) * len(u) if dim in f_dims else u - for dim, u in enumerate(u_indices) - ] - - # For each subarray, the shape of the fragment that - # corresponds to it. - f_shapes = [ - u_shape if dim in f_dims else (size,) * len(u_shape) - for dim, (u_shape, size) in enumerate(zip(u_shapes, self.shape)) - ] - - return ( - product(*u_indices), - product(*u_shapes), - product(*f_indices), - product(*s_locations), - product(*f_locations), - product(*f_shapes), - ) - - def to_dask_array(self, chunks="auto"): - """Create a dask array with `FragmentArray` chunks. - - .. versionadded:: 3.14.0 - - :Parameters: - - chunks: `int`, `tuple`, `dict` or `str`, optional - Specify the chunking of the returned dask array. - - Any value accepted by the *chunks* parameter of the - `dask.array.from_array` function is allowed. - - The chunk sizes implied by *chunks* for a dimension that - has been fragmented are ignored and replaced with values - that are implied by that dimensions fragment sizes. - - :Returns: - - `dask.array.Array` - - """ - import dask.array as da - from dask.array.core import getter - from dask.base import tokenize - - name = (f"{self.__class__.__name__}-{tokenize(self)}",) - - dtype = self.dtype - units = self.get_units(None) - calendar = self.get_calendar(None) - aggregated_data = self.get_aggregated_data(copy=False) - - # Set the chunk sizes for the dask array - chunks = self.subarray_shapes(chunks) - - fragment_arrays = self._FragmentArray - if not self.get_mask(): - fragment_arrays = fragment_arrays.copy() - fragment_arrays["nc"] = partial(fragment_arrays["nc"], mask=False) - - storage_options = self.get_storage_options() - - dsk = {} - for ( - u_indices, - u_shape, - f_indices, - chunk_location, - fragment_location, - fragment_shape, - ) in zip(*self.subarrays(chunks)): - kwargs = aggregated_data[fragment_location].copy() - kwargs.pop("location", None) - - fragment_format = kwargs.pop("format", None) - try: - FragmentArray = fragment_arrays[fragment_format] - except KeyError: - raise ValueError( - "Can't get FragmentArray class for unknown " - f"fragment dataset format: {fragment_format!r}" - ) - - if storage_options and kwargs["address"] == "nc": - # Pass on any file system options - kwargs["storage_options"] = storage_options - - fragment = FragmentArray( - dtype=dtype, - shape=fragment_shape, - aggregated_units=units, - aggregated_calendar=calendar, - **kwargs, - ) - - key = f"{fragment.__class__.__name__}-{tokenize(fragment)}" - dsk[key] = fragment - dsk[name + chunk_location] = ( - getter, - key, - f_indices, - False, - False, - ) - - # Return the dask array - return da.Array(dsk, name[0], chunks=chunks, dtype=dtype) diff --git a/cf/data/array/mixin/compressedarraymixin.py b/cf/data/array/mixin/compressedarraymixin.py deleted file mode 100644 index 103c61199c..0000000000 --- a/cf/data/array/mixin/compressedarraymixin.py +++ /dev/null @@ -1,132 +0,0 @@ -# import dask.array as da -# -# -# class CompressedArrayMixin: -# """Mixin class for compressed arrays. -# -# .. versionadded:: 3.14.0 -# -# """ -# -# -# def _lock_file_read(self, array): -# """Try to return a dask array that does not support concurrent -# reads. -# -# .. versionadded:: 3.14.0 -# -# :Parameters: -# -# array: array_like -# The array to process. -# -# :Returns" -# -# `dask.array.Array` or array_like -# The new `dask` array, or the orginal array if it -# couldn't be ascertained how to form the `dask` array. -# -# """ -# try: -# return array.to_dask_array() -# except AttributeError: -# pass -# -# try: -# chunks = array.chunks -# except AttributeError: -# chunks = "auto" -# -# try: -# array = array.source() -# except (ValueError, AttributeError): -# pass -# -# try: -# array.get_filenames() -# except AttributeError: -# pass -# else: -# array = da.from_array(array, chunks=chunks, lock=True) -# -# return array -# -# def to_dask_array(self, chunks="auto"): -# """Convert the data to a `dask` array. -# -# .. versionadded:: 3.14.0 -# -# :Parameters: -# -# chunks: `int`, `tuple`, `dict` or `str`, optional -# Specify the chunking of the returned dask array. -# -# Any value accepted by the *chunks* parameter of the -# `dask.array.from_array` function is allowed. -# -# The chunk sizes implied by *chunks* for a dimension that -# has been fragmented are ignored and replaced with values -# that are implied by that dimensions fragment sizes. -# -# :Returns: -# -# `dask.array.Array` -# The `dask` array representation. -# -# """ -# from functools import partial -# -# import dask.array as da -# from cfdm.data.utils import normalize_chunks -# from dask import config -# from dask.array.core import getter -# from dask.base import tokenize -# -# name = (f"{self.__class__.__name__}-{tokenize(self)}",) -# -# dtype = self.dtype -# -# context = partial(config.set, scheduler="synchronous") -# -# # If possible, convert the compressed data to a dask array -# # that doesn't support concurrent reads. This prevents -# # "compute called by compute" failures problems at compute -# # time. -# # -# # TODO: This won't be necessary if this is refactored so that -# # the compressed data is part of the same dask graph as -# # the compressed subarrays. -# conformed_data = self.conformed_data() -# conformed_data = { -# k: self._lock_file_read(v) for k, v in conformed_data.items() -# } -# subarray_kwargs = {**conformed_data, **self.subarray_parameters()} -# -# # Get the (cfdm) subarray class -# Subarray = self.get_Subarray() -# subarray_name = Subarray().__class__.__name__ -# -# # Set the chunk sizes for the dask array -# chunks = normalize_chunks( -# self.subarray_shapes(chunks), -# shape=self.shape, -# dtype=dtype, -# ) -# -# dsk = {} -# for u_indices, u_shape, c_indices, chunk_location in zip( -# *self.subarrays(chunks) -# ): -# subarray = Subarray( -# indices=c_indices, -# shape=u_shape, -# context_manager=context, -# **subarray_kwargs, -# ) -# -# key = f"{subarray_name}-{tokenize(subarray)}" -# dsk[key] = subarray -# dsk[name + chunk_location] = (getter, key, Ellipsis, False, False) -# -# # Return the dask array -# return da.Array(dsk, name[0], chunks=chunks, dtype=dtype) diff --git a/cf/data/array/mixin/filearraymixin.py b/cf/data/array/mixin/filearraymixin.py deleted file mode 100644 index a4092262f0..0000000000 --- a/cf/data/array/mixin/filearraymixin.py +++ /dev/null @@ -1,219 +0,0 @@ -# from os import sep -# from os.path import basename, dirname, join -# -# from ....functions import _DEPRECATION_ERROR_ATTRIBUTE, abspath -# -# -# class FileArrayMixin: -# """Mixin class for an array stored in a file. -# -# .. versionadded:: 3.14.0 -# -# """ -# -# -# def __dask_tokenize__(self): -# """Return a value fully representative of the object. -# -# .. versionadded:: 3.15.0 -# -# """ -# return ( -# self.__class__, -# self.shape, -# self.get_filenames(), -# self.get_addresses(), -# ) -# -# @property -# def filename(self): -# """The name of the file containing the array. -# -# Deprecated at version 3.14.0. Use method `get_filename` instead. -# -# """ -# _DEPRECATION_ERROR_ATTRIBUTE( -# self, -# "filename", -# message="Use method 'get_filename' instead.", -# version="3.14.0", -# removed_at="5.0.0", -# ) # pragma: no cover -# -# def del_file_location(self, location): -# """Remove reference to files in the given location. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# location: `str` -# The file location to remove. -# -# :Returns: -# -# `{{class}}` -# A new {{class}} with reference to files in *location* -# removed. -# -# **Examples** -# -# >>> a.get_filenames() -# ('/data1/file1', '/data2/file2') -# >>> a.get_addresses() -# ('tas1', 'tas2') -# >>> b = a.del_file_location('/data1') -# >>> b = get_filenames() -# ('/data2/file2',) -# >>> b.get_addresses() -# ('tas2',) -# -# >>> a.get_filenames() -# ('/data1/file1', '/data2/file1', '/data2/file2') -# >>> a.get_addresses() -# ('tas1', 'tas1', 'tas2') -# >>> b = a.del_file_location('/data2') -# >>> b.get_filenames() -# ('/data1/file1',) -# >>> b.get_addresses() -# ('tas1',) -# -# """ -# location = abspath(location).rstrip(sep) -# -# new_filenames = [] -# new_addresses = [] -# for filename, address in zip( -# self.get_filenames(), self.get_addresses() -# ): -# if dirname(filename) != location: -# new_filenames.append(filename) -# new_addresses.append(address) -# -# if not new_filenames: -# raise ValueError( -# "Can't delete a file location when it results in there " -# "being no files" -# ) -# -# a = self.copy() -# a._set_component("filename", tuple(new_filenames), copy=False) -# a._set_component("address", tuple(new_addresses), copy=False) -# return a -# -# def file_locations(self): -# """The locations of the files, any of which may contain the data. -# -# .. versionadded:: 3.15.0 -# -# :Returns: -# -# `tuple` -# The file locations, one for each file, as absolute -# paths with no trailing path name component separator. -# -# **Examples** -# -# >>> a.get_filenames() -# ('/data1/file1',) -# >>> a.file_locations() -# ('/data1,) -# -# >>> a.get_filenames() -# ('/data1/file1', '/data2/file2') -# >>> a.file_locations() -# ('/data1', '/data2') -# -# >>> a.get_filenames() -# ('/data1/file1', '/data2/file2', '/data1/file2') -# >>> a.file_locations() -# ('/data1', '/data2', '/data1') -# -# """ -# return tuple(map(dirname, self.get_filenames())) -# -# def add_file_location(self, location): -# """Add a new file location. -# -# All existing files are additionally referenced from the given -# location. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# location: `str` -# The new location. -# -# :Returns: -# -# `{{class}}` -# A new {{class}} with all previous files additionally -# referenced from *location*. -# -# **Examples** -# -# >>> a.get_filenames() -# ('/data1/file1',) -# >>> a.get_addresses() -# ('tas',) -# >>> b = a.add_file_location('/home') -# >>> b.get_filenames() -# ('/data1/file1', '/home/file1') -# >>> b.get_addresses() -# ('tas', 'tas') -# -# >>> a.get_filenames() -# ('/data1/file1', '/data2/file2',) -# >>> a.get_addresses() -# ('tas', 'tas') -# >>> b = a.add_file_location('/home/') -# >>> b = get_filenames() -# ('/data1/file1', '/data2/file2', '/home/file1', '/home/file2') -# >>> b.get_addresses() -# ('tas', 'tas', 'tas', 'tas') -# -# >>> a.get_filenames() -# ('/data1/file1', '/data2/file1',) -# >>> a.get_addresses() -# ('tas1', 'tas2') -# >>> b = a.add_file_location('/home/') -# >>> b.get_filenames() -# ('/data1/file1', '/data2/file1', '/home/file1') -# >>> b.get_addresses() -# ('tas1', 'tas2', 'tas1') -# -# >>> a.get_filenames() -# ('/data1/file1', '/data2/file1',) -# >>> a.get_addresses() -# ('tas1', 'tas2') -# >>> b = a.add_file_location('/data1') -# >>> b.get_filenames() -# ('/data1/file1', '/data2/file1') -# >>> b.get_addresses() -# ('tas1', 'tas2') -# -# """ -# location = abspath(location).rstrip(sep) -# -# filenames = self.get_filenames() -# addresses = self.get_addresses() -# -# # Note: It is assumed that each existing file name is either -# # an absolute path or a fully qualified URI. -# new_filenames = list(filenames) -# new_addresses = list(addresses) -# for filename, address in zip(filenames, addresses): -# new_filename = join(location, basename(filename)) -# if new_filename not in new_filenames: -# new_filenames.append(new_filename) -# new_addresses.append(address) -# -# a = self.copy() -# a._set_component("filename", tuple(new_filenames), copy=False) -# a._set_component( -# "address", -# tuple(new_addresses), -# copy=False, -# ) -# return a diff --git a/cf/data/array/mixin/indexmixin.py b/cf/data/array/mixin/indexmixin.py deleted file mode 100644 index 3e0ccc7afb..0000000000 --- a/cf/data/array/mixin/indexmixin.py +++ /dev/null @@ -1,366 +0,0 @@ -# from numbers import Integral -# -# import numpy as np -# from dask.array.slicing import normalize_index -# from dask.base import is_dask_collection -# -# from ....functions import indices_shape, parse_indices -# -# -# class IndexMixin: -# """Mixin class for lazy indexing of a data array. -# -# A data for a subspace is retrieved by casting the object as a -# `numpy` array. See `__getitem__` for more details. -# -# **Examples** -# -# >>> a = cf.{{class}}(...) -# >>> a.shape -# (6, 5) -# >>> print(np.asanyarray(a)) -# [[ 0 1 2 3 4]) -# [ 5 6 7 8 9] -# [10 11 12 13 14] -# [15 16 17 18 19] -# [20 21 22 23 24] -# [25 26 27 28 29]] -# >>> a = a[::2, [1, 2, 4]] -# >>> a = a[[True, False, True], :] -# >>> a.shape -# (2, 3) -# >>> print(np.asanyarray(a)) -# [[ 1, 2, 4], -# [21, 22, 24]] -# -# .. versionadded:: NEXTVERSION -# -# """ -# -# -# -# def __array__(self, *dtype): -# """Convert the `{{class}}` into a `numpy` array. -# -# .. versionadded:: NEXTVERSION -# -# :Parameters: -# -# dtype: optional -# Typecode or data-type to which the array is cast. -# -# :Returns: -# -# `numpy.ndarray` -# An independent `numpy` array of the subspace of the -# data defined by the `indices` attribute. -# -# """ -# array = self._get_array() -# if dtype: -# return array.astype(dtype[0], copy=False) -# -# return array -# -# def __getitem__(self, index): -# """Returns a subspace of the data as a new `{{class}}`. -# -# x.__getitem__(indices) <==> x[indices] -# -# Subspaces created by indexing are lazy and are not applied -# until the `{{class}}` object is converted to a `numpy` array, -# by which time all lazily-defined subspaces will have been -# converted to a single combined index which defines only the -# actual elements that need to be retrieved from the original -# data. -# -# The combined index is orthogonal, meaning that the index for -# each dimension is to be applied independently, regardless of -# how that index was defined. For instance, the indices ``[[0, -# 1], [1, 3], 0]`` and ``[:2, 1::2, 0]`` will give identical -# results. -# -# For example, if the original data has shape ``(12, 145, 192)`` -# and consecutive subspaces of ``[::2, [1, 3, 4], 96:]`` and -# ``[[0, 5], [True, False, True], 0]`` are applied, then only -# the elements defined by the combined index``[[0, 10], [1, 4], -# 96]`` will be retrieved from the data when `__array__` is -# called. -# -# .. versionadded:: NEXTVERSION -# -# .. seealso:: `index`, `original_shape`, `__array__`, -# `__getitem__` -# -# :Returns: -# -# `{{class}}` -# The subspaced data. -# -# """ -# shape0 = self.shape -# index0 = self.index(conform=False) -# original_shape = self.original_shape -# -# index1 = parse_indices(shape0, index, keepdims=False) -# -# new = self.copy() -# new_indices = [] -# new_shape = [] -# -# i = 0 -# for ind0, original_size in zip(index0, original_shape): -# if isinstance(ind0, Integral): -# # The previous call to __getitem__ resulted in a -# # dimension being removed (i.e. 'ind0' is -# # integer-valued). Therefore 'index1' must have fewer -# # elements than 'index0', so we need to "carry -# # forward" the integer-valued index so that it is -# # available at evaluation time. -# new_indices.append(ind0) -# continue -# -# ind1 = index1[i] -# size0 = shape0[i] -# i += 1 -# -# # If this dimension is not subspaced by the new index then -# # we don't need to update the old index. -# if isinstance(ind1, slice) and ind1 == slice(None): -# new_indices.append(ind0) -# continue -# -# # Still here? Then we have to work out the index of the -# # full array that is equivalent to applying -# # 'ind0' followed by 'ind1'. -# if is_dask_collection(ind1): -# # Note: This will never occur when this __getitem__ is -# # being called from within a Dask graph, because -# # any lazy indices will have already been -# # computed as part of the whole graph execution; -# # i.e. we don't have to worry about a -# # compute-within-a-compute situation. (If this -# # were not the case then we could add -# # `scheduler="synchronous"` to the compute -# # call.) -# ind1 = ind1.compute() -# -# if isinstance(ind0, slice): -# if isinstance(ind1, slice): -# # ind0: slice -# # ind1: slice -# start, stop, step = ind0.indices(original_size) -# start1, stop1, step1 = ind1.indices(size0) -# size1, mod1 = divmod(stop1 - start1, step1) -# -# if mod1 != 0: -# size1 += 1 -# -# start += start1 * step -# step *= step1 -# stop = start + (size1 - 1) * step -# -# if step > 0: -# stop += 1 -# else: -# stop -= 1 -# -# if stop < 0: -# stop = None -# -# new_index = slice(start, stop, step) -# else: -# # ind0: slice -# # ind1: int, or array of int/bool -# new_index = np.arange(*ind0.indices(original_size))[ind1] -# else: -# # ind0: array of int. If we made it to here then it -# # can't be anything else. This is -# # because we've dealt with ind0 -# # being a slice or an int, the -# # very first ind0 is always -# # slice(None), and a previous ind1 -# # that was an array of bool will -# # have resulted in this ind0 being -# # an array of int. -# # -# # ind1: anything -# new_index = np.asanyarray(ind0)[ind1] -# -# new_indices.append(new_index) -# -# new._custom["index"] = tuple(new_indices) -# -# # Find the shape defined by the new index -# new_shape = indices_shape(new_indices, original_shape, keepdims=False) -# new._set_component("shape", tuple(new_shape), copy=False) -# -# return new -# -# def __repr__(self): -# """Called by the `repr` built-in function. -# -# x.__repr__() <==> repr(x) -# -# """ -# return ( -# f"" -# ) -# -# @property -# def __asanyarray__(self): -# """Whether the array is accessed by conversion to a `numpy` array. -# -# .. versionadded:: NEXTVERSION -# -# :Returns: -# -# `True` -# -# """ -# return True -# -# def _get_array(self, index=None): -# """Returns a subspace of the data as a `numpy` array. -# -# .. versionadded:: NEXTVERSION -# -# .. seealso:: `__array__`, `index` -# -# :Parameters: -# -# index: `tuple` or `None`, optional -# Provide the indices that define the subspace. If -# `None` then the `index` attribute is used. -# -# :Returns: -# -# `numpy.ndarray` -# The subspace. -# -# """ -# return NotImplementedError( -# f"Must implement {self.__class__.__name__}._get_array" -# ) -# -# def index(self, conform=True): -# """The index to be applied when converting to a `numpy` array. -# -# The `shape` is defined by the `index` applied to the -# `original_shape`. -# -# .. versionadded:: NEXTVERSION -# -# .. seealso:: `shape`, `original_shape` -# -# :Parameters: -# -# conform: `bool`, optional -# If True, the default, then -# -# * Convert a decreasing size 1 slice to an increasing -# one. -# -# * Convert, where possible, a sequence of integers to a -# slice. -# -# These transformations are to allow subspacing on data -# objects that have restricted indexing functionality, -# such as `h5py.Variable` objects. -# -# If False then these transformations are not done. -# -# :Returns: -# -# `tuple` -# -# **Examples** -# -# >>> x.shape -# (12, 145, 192) -# >>> x.index() -# (slice(None), slice(None), slice(None)) -# >>> x = x[8:7:-1, 10:19:3, [15, 1, 4, 12]] -# >>> x = x[[0], [True, False, True], ::-2] -# >>> x.shape -# (1, 2, 2) -# >>> x.index() -# (slice(8, 9, None), slice(10, 17, 6), slice(12, -1, -11)) -# >>> x.index(conform=False) -# (array([8]), array([10, 16]), array([12, 1])) -# -# """ -# ind = self._custom.get("index") -# if ind is None: -# # No indices have been applied yet, so define indices that -# # are equivalent to Ellipsis, and set the original shape. -# ind = (slice(None),) * self.ndim -# self._custom["index"] = ind -# self._custom["original_shape"] = self.shape -# return ind -# -# if not conform: -# return ind -# -# # Still here? Then conform the indices by: -# # -# # 1) Converting decreasing size 1 slices to increasing -# # ones. This helps when the parent class can't cope with -# # decreasing slices. -# # -# # 2) Converting, where possible, sequences of integers to -# # slices. This helps when the parent class can't cope with -# # indices that are sequences of integers. -# ind = list(ind) -# for n, (i, size) in enumerate(zip(ind[:], self.original_shape)): -# if isinstance(i, slice): -# if size == 1: -# start, _, step = i.indices(size) -# if step and step < 0: -# # Decreasing slices are not universally -# # accepted (e.g. `h5py` doesn't like them), -# # but we can convert them to increasing ones. -# ind[n] = slice(start, start + 1) -# elif np.iterable(i): -# i = normalize_index((i,), (size,))[0] -# if i.size == 1: -# # Convert a sequence of one integer into a slice -# start = i.item() -# ind[n] = slice(start, start + 1) -# else: -# # Convert a sequence of two or more evenly spaced -# # integers into a slice. -# step = np.unique(np.diff(i)) -# if step.size == 1: -# start, stop = i[[0, -1]] -# if stop >= start: -# stop += 1 -# elif stop: -# stop = -1 -# else: -# stop = None -# -# ind[n] = slice(start, stop, step.item()) -# -# return tuple(ind) -# -# @property -# def original_shape(self): -# """The original shape of the data, before any subspacing. -# -# The `shape` is defined by the result of subspacing the data in -# its original shape with the indices given by `index`. -# -# .. versionadded:: NEXTVERSION -# -# .. seealso:: `index`, `shape` -# -# """ -# out = self._custom.get("original_shape") -# if out is None: -# # No subspace has been defined yet -# out = self.shape -# self._custom["original_shape"] = out -# -# return out diff --git a/cf/data/array/netcdf4array.py b/cf/data/array/netcdf4array.py index 6e3f25d0eb..3bb3871b0f 100644 --- a/cf/data/array/netcdf4array.py +++ b/cf/data/array/netcdf4array.py @@ -17,4 +17,5 @@ class NetCDF4Array( method. See `cf.data.collapse.Collapse` for details. """ + pass diff --git a/cf/data/array/pointtopologyarray.py b/cf/data/array/pointtopologyarray.py index be9512bdad..ce8c2107a2 100644 --- a/cf/data/array/pointtopologyarray.py +++ b/cf/data/array/pointtopologyarray.py @@ -7,4 +7,4 @@ class PointTopologyArray( Container, cfdm.PointTopologyArray, ): - pass + pass diff --git a/cf/data/array/umarray.py b/cf/data/array/umarray.py index 9fa40748be..0bc35e1839 100644 --- a/cf/data/array/umarray.py +++ b/cf/data/array/umarray.py @@ -26,7 +26,6 @@ def __init__( unpack=True, attributes=None, storage_options=None, -# min_file_versions=None, source=None, copy=True, ): @@ -109,7 +108,6 @@ def __init__( unpack=unpack, attributes=attributes, storage_options=storage_options, -# min_file_versions=min_file_versions, source=source, copy=copy, ) diff --git a/cf/data/data.py b/cf/data/data.py index 4ecc5e8fb1..734e65cbf8 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -686,43 +686,6 @@ def __setitem__(self, indices, value): return - # def _cfa_del_write(self): - # """Set the CFA write status of the data to `False`. - # - # .. versionadded:: 3.15.0 - # - # .. seealso:: `cfa_get_write`, `_cfa_set_write` - # - # :Returns: - # - # `bool` - # The CFA status prior to deletion. - # - # """ - # return self._custom.pop("cfa_write", False) - # - # def _cfa_set_term(self, value): - # """Set the CFA aggregation instruction term status. - # - # .. versionadded:: 3.15.0 - # - # .. seealso:: `cfa_get_term`, `cfa_set_term` - # - # :Parameters: - # - # status: `bool` - # The new CFA aggregation instruction term status. - # - # :Returns: - # - # `None` - # - # """ - # if not value: - # self._custom.pop("cfa_term", None) - # - # self._custom["cfa_term"] = bool(value) - def _is_abstract_Array_subclass(self, array): """Whether or not an array is a type of Array. @@ -737,30 +700,6 @@ def _is_abstract_Array_subclass(self, array): """ return isinstance(array, cfdm.Array) - # def _cfa_set_write(self, status): - # """Set the CFA write status of the data. - # - # If and only if the CFA write status is True then it may be - # possible to write the data as an aggregation variable to a - # CFA-netCDF file. - # - # .. versionadded:: 3.15.0 - # - # .. seealso:: `cfa_get_write`, `cfa_set_write`, - # `_cfa_del_write`, `cf.read`, `cf.write`, - # - # :Parameters: - # - # status: `bool` - # The new CFA write status. - # - # :Returns: - # - # `None` - # - # """ - # self._custom["cfa_write"] = bool(status) - def _update_deterministic(self, other): """Update the deterministic name status. @@ -1669,110 +1608,6 @@ def ceil(self, inplace=False, i=False): d._set_dask(dx) return d - # def cfa_get_term(self): - # """The CFA aggregation instruction term status. - # - # If True then the data represents that of a non-standard CFA - # aggregation instruction variable. - # - # .. versionadded:: 3.15.0 - # - # .. seealso:: `cfa_set_term` - # - # :Returns: - # - # `bool` - # - # **Examples** - # - # >>> d = cf.Data([1, 2]) - # >>> d.cfa_get_term() - # False - # - # """ - # return bool(self._custom.get("cfa_term", False)) - # - # def cfa_get_write(self): - # """The CFA write status of the data. - # - # If and only if the CFA write status is True then it may be - # possible to write the data as an aggregation variable to a - # CFA-netCDF file. - # - # .. versionadded:: 3.15.0 - # - # .. seealso:: `cfa_set_write`, `cf.read`, `cf.write` - # - # :Returns: - # - # `bool` - # - # **Examples** - # - # >>> d = cf.Data([1, 2]) - # >>> d.cfa_get_write() - # False - # - # """ - # return bool(self._custom.get("cfa_write", False)) - # - # def cfa_set_term(self, status): - # """Set the CFA aggregation instruction term status. - # - # If True then the data represents that of a non-standard CFA - # aggregation instruction variable. - # - # .. versionadded:: 3.15.0 - # - # .. seealso:: `cfa_get_term` - # - # :Parameters: - # - # status: `bool` - # The new CFA aggregation instruction term status. - # - # :Returns: - # - # `None` - # - # """ - # if status: - # raise ValueError( - # "'cfa_set_term' only allows the CFA aggregation instruction " - # "term write status to be set to False" - # ) - # - # self._custom.pop("cfa_term", False) - # - # def cfa_set_write(self, status): - # """Set the CFA write status of the data. - # - # If and only if the CFA write status is True then it may be - # possible to write the data as an aggregation variable to a - # CFA-netCDF file. - # - # .. versionadded:: 3.15.0 - # - # .. seealso:: `cfa_get_write`, `cf.read`, `cf.write` - # - # :Parameters: - # - # status: `bool` - # The new CFA write status. - # - # :Returns: - # - # `None` - # - # """ - # if status: - # raise ValueError( - # "'cfa_set_write' only allows the CFA write status to be " - # "set to False" - # ) - # - # self._cfa_del_write() - @_inplace_enabled(default=False) def convolution_filter( self, @@ -2177,47 +2012,6 @@ def _asreftime(self, inplace=False): return d - # def _clear_after_dask_update(self, clear=None): - # """Remove components invalidated by updating the `dask` array. - # - # Removes or modifies components that can't be guaranteed to be - # consistent with an updated `dask` array. See the *clear* - # parameter for details. - # - # .. versionadded:: NEXTVERSION - # - # .. seealso:: `_del_Array`, `_del_cached_elements`, - # `_set_dask`, `_cfa_del_write` - # - # :Parameters: - # - # clear: `int` or `None`, optional - # Specify which components to remove, determined by - # sequentially combining an integer value of *clear* - # with the relevant class-level constants (such as - # ``{{class}}._ARRAY``), using the bitwise AND (&) - # operator. If ``clear & `` is - # True then the corresponding component is cleared. The - # default value of `None` is equivalent to *clear* being - # set to ``{{class}}._ALL``. - # - # The bitwise OR (^) operator can be used to retain a - # component (or components) but remove all others. For - # instance, if *clear* is ``{{class}}._ALL ^ - # {{class}}._CACHE`` then all components except the - # cached array values will be removed. - # - # :Returns: - # - # `int` TODODASK - # - # """ - # clear = super()._clear_after_dask_update(clear) - # - # if clear & self._CFA: - # # Set the CFA write status to False - # self._cfa_del_write() - def _combined_units(self, data1, method, inplace): """Combines by given method the data's units with other units. @@ -4153,58 +3947,6 @@ def get_deterministic_name(self): units._canonical_calendar, ) - # def add_file_location(self, location): - # """Add a new file location in-place. - # - # All data definitions that reference files are additionally - # referenced from the given location. - # - # .. versionadded:: 3.15.0 - # - # .. seealso:: `del_file_location`, `file_locations` - # - # :Parameters: - # - # location: `str` - # The new location. - # - # :Returns: - # - # `str` - # The new location as an absolute path with no trailing - # path name component separator. - # - # **Examples** - # - # >>> d.add_file_location('/data/model/') - # '/data/model' - # - # """ - # location = abspath(location).rstrip(sep) - # - # updated = False - # - # # The dask graph is never going to be computed, so we can set - # # '_asanyarray=False'. - # dsk = self.todict(_asanyarray=False) - # for key, a in dsk.items(): - # try: - # dsk[key] = a.add_file_location(location) - # except AttributeError: - # # This chunk doesn't contain a file array - # continue - # - # # This chunk contains a file array and the dask graph has - # # been updated - # updated = True - # - # if updated: - # dx = self.to_dask_array(_asanyarray=False) - # dx = da.Array(dsk, dx.name, dx.chunks, dx.dtype, dx._meta) - # self._set_dask(dx, clear=self._NONE, asanyarray=None) - # - # return location - def set_units(self, value): """Set the units. @@ -6076,41 +5818,6 @@ def has_deterministic_name(self): """ return self._custom.get("has_deterministic_name", False) - # def file_locations(self): - # """The locations of files containing parts of the data. - # - # Returns the locations of any files that may be required to - # deliver the computed data array. - # - # .. versionadded:: 3.15.0 - # - # .. seealso:: `add_file_location`, `del_file_location` - # - # :Returns: - # - # `set` - # The unique file locations as absolute paths with no - # trailing path name component separator. - # - # **Examples** - # - # >>> d.file_locations() - # {'/home/data1', 'file:///data2'} - # - # """ - # out = set() - # - # # The dask graph is never going to be computed, so we can set - # # '_asanyarray=False'. - # for key, a in self.todict(_asanyarray=False).items(): - # try: - # out.update(a.file_locations()) - # except AttributeError: - # # This chunk doesn't contain a file array - # pass - # - # return out - def flat(self, ignore_masked=True): """Return a flat iterator over elements of the data array. @@ -6630,58 +6337,6 @@ def masked_invalid(self, inplace=False): d._set_dask(dx) return d - # def del_file_location(self, location): - # """Remove a file location in-place. - # - # All data definitions that reference files will have references - # to files in the given location removed from them. - # - # .. versionadded:: 3.15.0 - # - # .. seealso:: `add_file_location`, `file_locations` - # - # :Parameters: - # - # location: `str` - # The file location to remove. - # - # :Returns: - # - # `str` - # The removed location as an absolute path with no - # trailing path name component separator. - # - # **Examples** - # - # >>> d.del_file_location('/data/model/') - # '/data/model' - # - # """ - # location = abspath(location).rstrip(sep) - # - # updated = False - # - # # The dask graph is never going to be computed, so we can set - # # '_asanyarray=False'. - # dsk = self.todict(_asanyarray=False) - # for key, a in dsk.items(): - # try: - # dsk[key] = a.del_file_location(location) - # except AttributeError: - # # This chunk doesn't contain a file array - # continue - # - # # This chunk contains a file array and the dask graph has - # # been updated - # updated = True - # - # if updated: - # dx = self.to_dask_array(_asanyarray=False) - # dx = da.Array(dsk, dx.name, dx.chunks, dx.dtype, dx._meta) - # self._set_dask(dx, clear=self._NONE, asanyarray=None) - # - # return location - @classmethod def masked_all( cls, shape, dtype=None, units=None, calendar=None, chunks="auto" diff --git a/cf/data/fragment/fragmentumarray.py b/cf/data/fragment/fragmentumarray.py index 735b83c0e6..6cfa8bbde2 100644 --- a/cf/data/fragment/fragmentumarray.py +++ b/cf/data/fragment/fragmentumarray.py @@ -11,108 +11,3 @@ class FragmentUMArray( .. versionadded:: 3.14.0 """ - - -# -# def __init__( -# self, -# filename=None, -# address=None, -# dtype=None, -# shape=None, -# storage_options=None, -# min_file_versions=None, -# unpack_aggregated_data=True, -# aggregated_attributes=None, -# source=None, -# copy=True, -# ): -# """**Initialisation** -# -# :Parameters: -# -# filename: (sequence of `str`), optional -# The names of the UM or PP files containing the fragment. -# -# address: (sequence of `str`), optional -# The start words in the files of the header. -# -# dtype: `numpy.dtype` -# The data type of the aggregated array. May be `None` -# if the numpy data-type is not known (which can be the -# case for netCDF string types, for example). This may -# differ from the data type of the netCDF fragment -# variable. -# -# shape: `tuple` -# The shape of the fragment within the aggregated -# array. This may differ from the shape of the netCDF -# fragment variable in that the latter may have fewer -# size 1 dimensions. -# -# {{init attributes: `dict` or `None`, optional}} -# -# During the first `__getitem__` call, any of the -# ``_FillValue``, ``add_offset``, ``scale_factor``, -# ``units``, and ``calendar`` attributes which haven't -# already been set will be inferred from the lookup -# header and cached for future use. -# -# .. versionadded:: NEXTVERSION -# -# {{aggregated_units: `str` or `None`, optional}} -# -# {{aggregated_calendar: `str` or `None`, optional}} -# -# {{init storage_options: `dict` or `None`, optional}} -# -# {{init source: optional}} -# -# {{init copy: `bool`, optional}} -# -# units: `str` or `None`, optional -# Deprecated at version NEXTVERSION. Use the -# *attributes* parameter instead. -# -# calendar: `str` or `None`, optional -# Deprecated at version NEXTVERSION. Use the -# *attributes* parameter instead. -# -# """ -# super().__init__( -# filename=filename, -# address=address, -# dtype=dtype, -# shape=shape, -# mask=True, -# unpack=True, -# attributes=None, -# storage_options=storage_options, -# min_file_versions=min_file_versions, -# source=source, -# copy=copy -# ) -# -# if source is not None: -# try: -# aggregated_attributes = source._get_component( -# "aggregated_attributes", None -# ) -# except AttributeError: -# aggregated_attributes = None -# -# try: -# unpack_aggregated_data = source._get_component( -# "unpack_aggregated_data", True -# ) -# except AttributeError: -# unpack_aggregated_data = True -# -# self._set_component( -# "unpack_aggregated_data", -# unpack_aggregated_data, -# copy=False, -# ) -# self._set_component( -# "aggregated_attributes", aggregated_attributes, copy=False -# ) diff --git a/cf/data/fragment/fullfragmentarray.py b/cf/data/fragment/fullfragmentarray.py deleted file mode 100644 index 56d6aead43..0000000000 --- a/cf/data/fragment/fullfragmentarray.py +++ /dev/null @@ -1,91 +0,0 @@ -# from ..array.fullarray import FullArray -# from .mixin import FragmentArrayMixin -# -# -# class FullFragmentArray(FragmentArrayMixin, FullArray): -# """A CFA fragment array that is filled with a value. -# -# .. versionadded:: 3.15.0 -# -# """ -# -# def __init__( -# self, -# fill_value=None, -# dtype=None, -# shape=None, -# aggregated_units=False, -# aggregated_calendar=False, -# attributes=None, -# source=None, -# copy=True, -# ): -# """**Initialisation** -# -# :Parameters: -# -# fill_value: scalar -# The fill value. -# -# dtype: `numpy.dtype` -# The data type of the aggregated array. May be `None` -# if the numpy data-type is not known (which can be the -# case for netCDF string types, for example). This may -# differ from the data type of the netCDF fragment -# variable. -# -# shape: `tuple` -# The shape of the fragment within the aggregated -# array. This may differ from the shape of the netCDF -# fragment variable in that the latter may have fewer -# size 1 dimensions. -# -# {{init attributes: `dict` or `None`, optional}} -# -# .. versionadded:: NEXTVERSION -# -# {{aggregated_units: `str` or `None`, optional}} -# -# {{aggregated_calendar: `str` or `None`, optional}} -# -# {{init source: optional}} -# -# {{init copy: `bool`, optional}} -# -# units: `str` or `None`, optional -# Deprecated at version NEXTVERSION. Use the -# *attributes* parameter instead. -# -# calendar: `str` or `None`, optional -# Deprecated at version NEXTVERSION. Use the -# *attributes* parameter instead. -# -# """ -# super().__init__( -# fill_value=fill_value, -# dtype=dtype, -# shape=shape, -# attributes=attributes, -# source=source, -# copy=False, -# ) -# -# if source is not None: -# try: -# aggregated_units = source._get_component( -# "aggregated_units", False -# ) -# except AttributeError: -# aggregated_units = False -# -# try: -# aggregated_calendar = source._get_component( -# "aggregated_calendar", False -# ) -# except AttributeError: -# aggregated_calendar = False -# -# self._set_component("aggregated_units", aggregated_units, copy=False) -# self._set_component( -# "aggregated_calendar", aggregated_calendar, copy=False -# ) diff --git a/cf/data/fragment/h5netcdffragmentarray.py b/cf/data/fragment/h5netcdffragmentarray.py deleted file mode 100644 index 0f4caf4210..0000000000 --- a/cf/data/fragment/h5netcdffragmentarray.py +++ /dev/null @@ -1,97 +0,0 @@ -# from ..array.h5netcdfarray import H5netcdfArray -# from .mixin import FragmentArrayMixin -# -# -# class H5netcdfFragmentArray(FragmentArrayMixin, H5netcdfArray): -# """A netCDF fragment array accessed with `h5netcdf`. -# -# .. versionadded:: NEXTVERSION -# -# """ -# -# def __init__( -# self, -# filename=None, -# address=None, -# dtype=None, -# shape=None, -# aggregated_units=False, -# aggregated_calendar=False, -# attributes=None, -# storage_options=None, -# source=None, -# copy=True, -# ): -# """**Initialisation** -# -# :Parameters: -# -# filename: (sequence of `str`), optional -# The names of the netCDF fragment files containing the -# array. -# -# address: (sequence of `str`), optional -# The name of the netCDF variable containing the -# fragment array. Required unless *varid* is set. -# -# dtype: `numpy.dtype`, optional -# The data type of the aggregated array. May be `None` -# if the numpy data-type is not known (which can be the -# case for netCDF string types, for example). This may -# differ from the data type of the netCDF fragment -# variable. -# -# shape: `tuple`, optional -# The shape of the fragment within the aggregated -# array. This may differ from the shape of the netCDF -# fragment variable in that the latter may have fewer -# size 1 dimensions. -# -# {{init attributes: `dict` or `None`, optional}} -# -# If *attributes* is `None`, the default, then the -# attributes will be set from the netCDF variable during -# the first `__getitem__` call. -# -# {{aggregated_units: `str` or `None`, optional}} -# -# {{aggregated_calendar: `str` or `None`, optional}} -# -# {{init storage_options: `dict` or `None`, optional}} -# -# {{init source: optional}} -# -# {{init copy: `bool`, optional}} -# -# """ -# super().__init__( -# filename=filename, -# address=address, -# dtype=dtype, -# shape=shape, -# mask=True, -# attributes=attributes, -# storage_options=storage_options, -# source=source, -# copy=copy, -# ) -# -# if source is not None: -# try: -# aggregated_units = source._get_component( -# "aggregated_units", False -# ) -# except AttributeError: -# aggregated_units = False -# -# try: -# aggregated_calendar = source._get_component( -# "aggregated_calendar", False -# ) -# except AttributeError: -# aggregated_calendar = False -# -# self._set_component("aggregated_units", aggregated_units, copy=False) -# self._set_component( -# "aggregated_calendar", aggregated_calendar, copy=False -# ) diff --git a/cf/data/fragment/mixin/__init__.py b/cf/data/fragment/mixin/__init__.py deleted file mode 100644 index a4a35a1129..0000000000 --- a/cf/data/fragment/mixin/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .fragmentarraymixin import FragmentArrayMixin diff --git a/cf/data/fragment/mixin/fragmentarraymixin.py b/cf/data/fragment/mixin/fragmentarraymixin.py deleted file mode 100644 index 7567d7f667..0000000000 --- a/cf/data/fragment/mixin/fragmentarraymixin.py +++ /dev/null @@ -1,258 +0,0 @@ -# rom math import prod -# -# mport numpy as np -# -# rom ....units import Units -# -# -# lass FragmentArrayMixin: -# """Mixin class for a CFA fragment array. -# -# .. versionadded:: 3.15.0 -# -# """ -# -# def _get_array(self, index=None): -# """Returns a subspace of the dataset variable. -# -# .. versionadded:: NEXTVERSION -# -# .. seealso:: `__array__`, `index` -# -# :Parameters: -# -# {{index: `tuple` or `None`, optional}} -# -# It is important that there is a distinct value for each -# fragment dimension, which is guaranteed when the -# default of the `index` attribute is being used. -# -# :Returns: -# -# `numpy.ndarray` -# The subspace. -# -# """ -# if index is None: -# index = self.index() -# -# try: -# array = super()._get_array(index) -# except ValueError: -# # A ValueError is expected to be raised when the fragment -# # variable has fewer than 'self.ndim' dimensions (we know -# # that this is the case because 'index' has 'self.ndim' -# # elements). -# axis = self._size_1_axis(index) -# if axis is not None: -# # There is a unique size 1 index that must correspond -# # to the missing dimension => Remove it from the -# # indices, get the fragment array with the new -# # indices; and then insert the missing size one -# # dimension. -# index = list(index) -# index.pop(axis) -# array = super()._get_array(tuple(index)) -# array = np.expand_dims(array, axis) -# else: -# # There are multiple size 1 indices so we don't know -# # how many missing dimensions the fragment has, nor -# # their positions => Get the full fragment array and -# # then reshape it to the shape of the dask compute -# # chunk; and then apply the index. -# array = super()._get_array(Ellipsis) -# if array.size > prod(self.original_shape): -# raise ValueError( -# f"Can't get CFA fragment data from ({self}) when " -# "the fragment has two or more missing size 1 " -# "dimensions, whilst also spanning two or more " -# "Dask compute chunks." -# "\n\n" -# "Consider re-creating the data with exactly one " -# "Dask compute chunk per fragment (e.g. by setting " -# "'chunks=None' as a keyword to cf.read)." -# ) -# -# array = array.reshape(self.original_shape) -# array = array[index] -# -# array = self._conform_to_aggregated_units(array) -# return array -# -# def _conform_to_aggregated_units(self, array): -# """Conform the array to have the aggregated units. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# array: `numpy.ndarray` or `dict` -# The array to be conformed. If *array* is a `dict` with -# `numpy` array values then selected values are -# conformed. -# -# :Returns: -# -# `numpy.ndarray` or `dict` -# The conformed array. The returned array may or may not -# be the input array updated in-place, depending on its -# data type and the nature of its units and the -# aggregated units. -# -# If *array* is a `dict` then a dictionary of conformed -# arrays is returned. -# -# """ -# units = self.Units -# if units: -# aggregated_units = self.aggregated_Units -# if not units.equivalent(aggregated_units): -# raise ValueError( -# f"Can't convert fragment data with units {units!r} to " -# f"have aggregated units {aggregated_units!r}" -# ) -# -# if units != aggregated_units: -# if isinstance(array, dict): -# # 'array' is a dictionary. -# raise ValueError( -# "TODOACTIVE. Placeholder notification that " -# "we can't yet deal with active " -# "storage reductions on CFA fragments." -# ) -# else: -# # 'array' is a numpy array -# array = Units.conform( -# array, units, aggregated_units, inplace=True -# ) -# -# return array -# -# def _size_1_axis(self, indices): -# """Find the position of a unique size 1 index. -# -# .. versionadded:: 3.15.0 -# -# .. seealso:: `_parse_indices`, `__getitem__` -# -# :Paramealso:: `_parse_indices`, `__getitem__` -# -# :Parameters: -# -# indices: sequence of index -# The array indices to be parsed, as returned by -# `_parse_indices`. -# -# :Returns: -# -# `int` or `None` -# The position of the unique size 1 index, or `None` if -# there are zero or at least two of them. -# -# **Examples** -# -# >>> a._size_1_axis(([2, 4, 5], slice(0, 1), slice(0, 73))) -# 1 -# >>> a._size_1_axis(([2, 4, 5], slice(3, 4), slice(0, 73))) -# 1 -# >>> a._size_1_axis(([2, 4, 5], [0], slice(0, 73))) -# 1 -# >>> a._size_1_axis(([2, 4, 5], slice(0, 144), slice(0, 73))) -# None -# >>> a._size_1_axis(([2, 4, 5], slice(3, 7), [0, 1])) -# None -# >>> a._size_1_axis(([2, 4, 5], slice(0, 1), [0])) -# None -# -# """ -# original_shape = self.original_shape -# if original_shape.count(1): -# return original_shape.index(1) -# -# return -# -# @property -# def aggregated_Units(self): -# """The units of the aggregated data. -# -# .. versionadded:: 3.15.0 -# -# :Returns: -# -# `Units` -# The units of the aggregated data. -# -# """ -# return Units( -# self.get_aggregated_units(), self.get_aggregated_calendar(None) -# ) -# -# def get_aggregated_calendar(self, default=ValueError()): -# """The calendar of the aggregated array. -# -# If the calendar is `None` then the CF default calendar is -# assumed, if applicable. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# default: optional -# Return the value of the *default* parameter if the -# aggregated calendar has not been set. If set to an -# `Exception` instance then it will be raised instead. -# -# :Returns: -# -# `str` or `None` -# The calendar value. -# -# """ -# calendar = self._get_component("aggregated_calendar", False) -# if calendar is False: -# if default is None: -# return -# -# return self._default( -# default, -# f"{self.__class__.__name__} 'aggregated_calendar' has not " -# "been set", -# ) -# -# return calendar -# -# def get_aggregated_units(self, default=ValueError()): -# """The units of the aggregated array. -# -# If the units are `None` then the aggregated array has no -# defined units. -# -# .. versionadded:: 3.15.0 -# -# .. seealso:: `get_aggregated_calendar` -# -# :Parameters: -# -# default: optional -# Return the value of the *default* parameter if the -# aggregated units have not been set. If set to an -# `Exception` instance then it will be raised instead. -# -# :Returns: -# -# `str` or `None` -# The units value. -# -# """ -# units = self._get_component("aggregated_units", False) -# if units is False: -# if default is None: -# return -# -# return self._default( -# default, -# f"{self.__class__.__name__} 'aggregated_units' have not " -# "been set", -# ) -# -# return units diff --git a/cf/data/fragment/netcdf4fragmentarray.py b/cf/data/fragment/netcdf4fragmentarray.py deleted file mode 100644 index ab1b761c47..0000000000 --- a/cf/data/fragment/netcdf4fragmentarray.py +++ /dev/null @@ -1,108 +0,0 @@ -# from ..array.netcdf4array import NetCDF4Array -# from .mixin import FragmentArrayMixin -# -# -# class NetCDF4FragmentArray(FragmentArrayMixin, NetCDF4Array): -# """A netCDF fragment array accessed with `netCDF4`. -# -# .. versionadded:: NEXTVERSION -# -# """ -# -# def __init__( -# self, -# filename=None, -# address=None, -# dtype=None, -# shape=None, -# aggregated_units=False, -# aggregated_calendar=False, -# attributes=None, -# storage_options=None, -# source=None, -# copy=True, -# ): -# """**Initialisation** -# -# :Parameters: -# -# filename: (sequence of `str`), optional -# The names of the netCDF fragment files containing the -# array. -# -# address: (sequence of `str`), optional -# The name of the netCDF variable containing the -# fragment array. Required unless *varid* is set. -# -# dtype: `numpy.dtype`, optional -# The data type of the aggregated array. May be `None` -# if the numpy data-type is not known (which can be the -# case for netCDF string types, for example). This may -# differ from the data type of the netCDF fragment -# variable. -# -# shape: `tuple`, optional -# The shape of the fragment within the aggregated -# array. This may differ from the shape of the netCDF -# fragment variable in that the latter may have fewer -# size 1 dimensions. -# -# units: `str` or `None`, optional -# The units of the fragment data. Set to `None` to -# indicate that there are no units. If unset then the -# units will be set during the first `__getitem__` call. -# -# calendar: `str` or `None`, optional -# The calendar of the fragment data. Set to `None` to -# indicate the CF default calendar, if applicable. If -# unset then the calendar will be set during the first -# `__getitem__` call. -# -# {{init attributes: `dict` or `None`, optional}} -# -# If *attributes* is `None`, the default, then the -# attributes will be set from the netCDF variable during -# the first `__getitem__` call. -# -# {{aggregated_units: `str` or `None`, optional}} -# -# {{aggregated_calendar: `str` or `None`, optional}} -# -# {{init storage_options: `dict` or `None`, optional}} -# -# {{init source: optional}} -# -# {{init copy: `bool`, optional}} -# -# """ -# super().__init__( -# filename=filename, -# address=address, -# dtype=dtype, -# shape=shape, -# mask=True, -# attributes=attributes, -# storage_options=storage_options, -# source=source, -# copy=copy, -# ) -# -# if source is not None: -# try: -# aggregated_units = source._get_component( -# "aggregated_units", False -# ) -# except AttributeError: -# aggregated_units = False -# -# try: -# aggregated_calendar = source._get_component( -# "aggregated_calendar", False -# ) -# except AttributeError: -# aggregated_calendar = False -# -# self._set_component("aggregated_units", aggregated_units, copy=False) -# self._set_component( -# "aggregated_calendar", aggregated_calendar, copy=False -# ) diff --git a/cf/data/fragment/netcdffragmentarray.py b/cf/data/fragment/netcdffragmentarray.py deleted file mode 100644 index e701f6b862..0000000000 --- a/cf/data/fragment/netcdffragmentarray.py +++ /dev/null @@ -1,239 +0,0 @@ -# import cfdm -# -# from ..array.abstract import Array -# from ..array.mixin import FileArrayMixin -# from .h5netcdffragmentarray import H5netcdfFragmentArray -# from .mixin import FragmentArrayMixin -# from .netcdf4fragmentarray import NetCDF4FragmentArray -# -# -# class NetCDFFragmentArray( -# FragmentArrayMixin, -# cfdm.data.mixin.NetCDFFileMixin, -# FileArrayMixin, -# cfdm.data.mixin.IndexMixin, -# cfdm.data.mixin.FileArrayMixin, -# Array, -# ): -# """A netCDF fragment array. -# -# Access will be with either `netCDF4` or `h5netcdf`. -# -# .. versionadded:: 3.15.0 -# -# """ -# -# def __init__( -# self, -# filename=None, -# address=None, -# dtype=None, -# shape=None, -# aggregated_units=False, -# aggregated_calendar=False, -# attributes=None, -# storage_options=None, -# source=None, -# copy=True, -# ): -# """**Initialisation** -# -# :Parameters: -# -# filename: (sequence of `str`), optional -# The names of the netCDF fragment files containing the -# array. -# -# address: (sequence of `str`), optional -# The name of the netCDF variable containing the -# fragment array. Required unless *varid* is set. -# -# dtype: `numpy.dtype`, optional -# The data type of the aggregated array. May be `None` -# if the numpy data-type is not known (which can be the -# case for netCDF string types, for example). This may -# differ from the data type of the netCDF fragment -# variable. -# -# shape: `tuple`, optional -# The shape of the fragment within the aggregated -# array. This may differ from the shape of the netCDF -# fragment variable in that the latter may have fewer -# size 1 dimensions. -# -# {{init attributes: `dict` or `None`, optional}} -# -# If *attributes* is `None`, the default, then the -# attributes will be set from the netCDF variable during -# the first `__getitem__` call. -# -# .. versionadded:: NEXTVERSION -# -# {{aggregated_units: `str` or `None`, optional}} -# -# {{aggregated_calendar: `str` or `None`, optional}} -# -# {{init storage_options: `dict` or `None`, optional}} -# -# .. versionadded:: NEXTVERSION -# -# {{init source: optional}} -# -# {{init copy: `bool`, optional}} -# -# units: `str` or `None`, optional -# Deprecated at version NEXTVERSION. Use the -# *attributes* parameter instead. -# -# calendar: `str` or `None`, optional -# Deprecated at version NEXTVERSION. Use the -# *attributes* parameter instead. -# -# """ -# super().__init__( -# source=source, -# copy=copy, -# ) -# -# if source is not None: -# try: -# shape = source._get_component("shape", None) -# except AttributeError: -# shape = None -# -# try: -# filename = source._get_component("filename", None) -# except AttributeError: -# filename = None -# -# try: -# address = source._get_component("address", None) -# except AttributeError: -# address = None -# -# try: -# dtype = source._get_component("dtype", None) -# except AttributeError: -# dtype = None -# -# try: -# attributes = source._get_component("attributes", None) -# except AttributeError: -# attributes = None -# -# try: -# aggregated_units = source._get_component( -# "aggregated_units", False -# ) -# except AttributeError: -# aggregated_units = False -# -# try: -# aggregated_calendar = source._get_component( -# "aggregated_calendar", False -# ) -# except AttributeError: -# aggregated_calendar = False -# -# try: -# storage_options = source._get_component( -# "storage_options", None -# ) -# except AttributeError: -# storage_options = None -# -# if filename is not None: -# if isinstance(filename, str): -# filename = (filename,) -# else: -# filename = tuple(filename) -# -# self._set_component("filename", filename, copy=False) -# -# if address is not None: -# if isinstance(address, int): -# address = (address,) -# else: -# address = tuple(address) -# -# self._set_component("address", address, copy=False) -# -# if storage_options is not None: -# self._set_component("storage_options", storage_options, copy=False) -# -# self._set_component("shape", shape, copy=False) -# self._set_component("dtype", dtype, copy=False) -# self._set_component("attributes", attributes, copy=False) -# self._set_component("mask", True, copy=False) -# -# self._set_component("aggregated_units", aggregated_units, copy=False) -# self._set_component( -# "aggregated_calendar", aggregated_calendar, copy=False -# ) -# -# # By default, close the file after data array access -# self._set_component("close", True, copy=False) -# -# def _get_array(self, index=None): -# """Returns a subspace of the dataset variable. -# -# The method acts as a factory for either a -# `NetCDF4FragmentArray` or a `H5netcdfFragmentArray` class, and -# it is the result of calling `!_get_array` on the newly created -# instance that is returned. -# -# `H5netcdfFragmentArray` will only be used if -# `NetCDF4FragmentArray` returns a `FileNotFoundError` exception. -# -# .. versionadded:: NEXTVERSION -# -# .. seealso:: `__array__`, `index` -# -# :Parameters: -# -# {{index: `tuple` or `None`, optional}} -# -# It is important that there is a distinct value for each -# fragment dimension, which is guaranteed when the -# default of the `index` attribute is being used. -# -# :Returns: -# -# `numpy.ndarray` -# The subspace. -# -# """ -# kwargs = { -# "dtype": self.dtype, -# "shape": self.shape, -# "aggregated_units": self.get_aggregated_units(None), -# "aggregated_calendar": self.get_aggregated_calendar(None), -# "attributes": self.get_attributes(None), -# "copy": False, -# } -# -# # Loop round the files, returning as soon as we find one that -# # is accessible. -# filenames = self.get_filenames() -# for filename, address in zip(filenames, self.get_addresses()): -# kwargs["filename"] = filename -# kwargs["address"] = address -# kwargs["storage_options"] = self.get_storage_options( -# create_endpoint_url=False -# ) -# -# try: -# return NetCDF4FragmentArray(**kwargs)._get_array(index) -# except FileNotFoundError: -# pass -# except Exception: -# return H5netcdfFragmentArray(**kwargs)._get_array(index) -# -# # Still here? -# if not filenames: -# raise FileNotFoundError("No fragment files") -# -# if len(filenames) == 1: -# raise FileNotFoundError(f"No such fragment file: {filenames[0]}") -# -# raise FileNotFoundError(f"No such fragment files: {filenames}") diff --git a/cf/domain.py b/cf/domain.py index 32fd33b9c1..5efa6fc541 100644 --- a/cf/domain.py +++ b/cf/domain.py @@ -162,104 +162,6 @@ def add_file_location( return location - # def cfa_clear_file_substitutions( - # self, - # ): - # """Remove all of the CFA-netCDF file name substitutions. - # - # .. versionadded:: 3.15.0 - # - # :Returns: - # - # `dict` - # {{Returns cfa_clear_file_substitutions}} - # - # **Examples** - # - # >>> d.cfa_clear_file_substitutions() - # {} - # - # """ - # out = {} - # for c in self.constructs.filter_by_data(todict=True).values(): - # out.update(c.cfa_clear_file_substitutions()) - # - # return out - # - # def cfa_file_substitutions(self): - # """Return the CFA-netCDF file name substitutions. - # - # .. versionadded:: 3.15.0 - # - # :Returns: - # - # `dict` - # {{Returns cfa_file_substitutions}} - # - # **Examples** - # - # >>> d.cfa_file_substitutions() - # {} - # - # """ - # out = {} - # for c in self.constructs.filter_by_data(todict=True).values(): - # out.update(c.cfa_file_substitutions()) - # - # return out - # - # def cfa_del_file_substitution( - # self, - # base, - # ): - # """Remove a CFA-netCDF file name substitution. - # - # .. versionadded:: 3.15.0 - # - # :Parameters: - # - # base: `str` - # {{cfa base: `str`}} - # - # :Returns: - # - # `dict` - # {{Returns cfa_del_file_substitution}} - # - # **Examples** - # - # >>> f.cfa_del_file_substitution('base') - # - # """ - # for c in self.constructs.filter_by_data(todict=True).values(): - # c.cfa_del_file_substitution( - # base, - # ) - # - # def cfa_update_file_substitutions( - # self, - # substitutions, - # ): - # """Set CFA-netCDF file name substitutions. - # - # .. versionadded:: 3.15.0 - # - # :Parameters: - # - # {{cfa substitutions: `dict`}} - # - # :Returns: - # - # `None` - # - # **Examples** - # - # >>> d.cfa_update_file_substitutions({'base': '/data/model'}) - # - # """ - # for c in self.constructs.filter_by_data(todict=True).values(): - # c.cfa_update_file_substitutions(substitutions) - def close(self): """Close all files referenced by the domain construct. diff --git a/cf/field.py b/cf/field.py index b25bc5b1ef..f118e29275 100644 --- a/cf/field.py +++ b/cf/field.py @@ -10695,41 +10695,6 @@ def cumsum( return f - # def file_locations(self, constructs=True): - # """The locations of files containing parts of the data. - # - # Returns the locations of any files that may be required to - # deliver the computed data array. - # - # .. versionadded:: 3.15.0 - # - # .. seealso:: `add_file_location`, `del_file_location` - # - # :Parameters: - # - # constructs: `bool`, optional - # If True (the default) then the file locations from - # metadata constructs are also returned. - # - # :Returns: - # - # `set` - # The unique file locations as absolute paths with no - # trailing path name component separator. - # - # **Examples** - # - # >>> f.file_locations() - # {'/home/data1', 'file:///data2'} - # - # """ - # out = super().file_locations() - # if constructs: - # for c in self.constructs.filter_by_data(todict=True).values(): - # out.update(c.file_locations()) - # - # return out - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) def flip(self, axes=None, inplace=False, i=False, **kwargs): @@ -13345,48 +13310,6 @@ def subspace(self): """ return SubspaceField(self) - # def add_file_location( - # self, - # location, - # constructs=True, - # ): - # """Add a new file location in-place. - # - # All data definitions that reference files are additionally - # referenced from the given location. - # - # .. versionadded:: 3.15.0 - # - # .. seealso:: `del_file_location`, `file_locations` - # - # :Parameters: - # - # location: `str` - # The new location. - # - # constructs: `bool`, optional - # If True (the default) then metadata constructs also - # have the new file location added to them. - # - # :Returns: - # - # `str` - # The new location as an absolute path with no trailing - # path name component separator. - # - # **Examples** - # - # >>> f.add_file_location('/data/model/') - # '/data/model' - # - # """ - # location = super().add_file_location(location) - # if constructs: - # for c in self.constructs.filter_by_data(todict=True).values(): - # c.add_file_location(location) - # - # return location - def section(self, axes=None, stop=None, min_step=1, **kwargs): """Return a FieldList of m dimensional sections of a Field of n dimensions, where M <= N. diff --git a/cf/mixin/propertiesdata.py b/cf/mixin/propertiesdata.py index 3111209837..9144c6d70a 100644 --- a/cf/mixin/propertiesdata.py +++ b/cf/mixin/propertiesdata.py @@ -1604,14 +1604,6 @@ def units(self): f"{self.__class__.__name__} doesn't have CF property 'units'" ) - # value = getattr(self.Units, "units", None) - # if value is None: - # raise AttributeError( - # f"{self.__class__.__name__} doesn't have CF property 'units'" - # ) - # - # return value - @units.setter def units(self, value): self.Units = Units(value, getattr(self, "calendar", None)) @@ -1626,39 +1618,6 @@ def units(self): self.Units = Units(None, getattr(self, "calendar", None)) -# def add_file_location(self, location): -# """Add a new file location in-place. -# -# All data definitions that reference files are additionally -# referenced from the given location. -# -# .. versionadded:: 3.15.0 -# -# .. seealso:: `del_file_location`, `file_locations` -# -# :Parameters: -# -# location: `str` -# The new location. -# -# :Returns: -# -# `str` -# The new location as an absolute path with no trailing -# path name component separator. -# -# **Examples** -# -# >>> d.add_file_location('/data/model/') -# '/data/model' -# -# """ -# data = self.get_data(None, _fill_value=False, _units=False) -# if data is not None: -# return data.add_file_location(location) -# -# return abspath(location).rstrip(sep) - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) def mask_invalid(self, inplace=False, i=False): @@ -2577,100 +2536,6 @@ def ceil(self, inplace=False, i=False): delete_props=True, ) - # def cfa_update_file_substitutions(self, substitutions): - # """Set CFA-netCDF file name substitutions. - # - # .. versionadded:: 3.15.0 - # - # :Parameters: - # - # {{cfa substitutions: `dict`}} - # - # :Returns: - # - # `None` - # - # **Examples** - # - # >>> f.cfa_update_file_substitutions({'base', '/data/model'}) - # - # """ - # data = self.get_data(None, _fill_value=False, _units=False) - # if data is not None: - # data.cfa_update_file_substitutions(substitutions) - # - # @_inplace_enabled(default=False) - # def cfa_clear_file_substitutions(self, inplace=False): - # """Remove all of the CFA-netCDF file name substitutions. - # - # .. versionadded:: 3.15.0 - # - # :Parameters: - # - # {{inplace: `bool`, optional}} - # - # :Returns: - # - # `dict` - # {{Returns cfa_clear_file_substitutions}} - # - # **Examples** - # - # >>> f.cfa_clear_file_substitutions() - # {} - # - # """ - # data = self.get_data(None) - # if data is None: - # return {} - # - # return data.cfa_clear_file_substitutions({}) - # - # def cfa_del_file_substitution( - # self, - # base, - # ): - # """Remove a CFA-netCDF file name substitution. - # - # .. versionadded:: 3.15.0 - # - # :Parameters: - # - # `dict` - # {{Returns cfa_del_file_substitution}} - # - # **Examples** - # - # >>> f.cfa_del_file_substitution('base') - # - # """ - # data = self.get_data(None, _fill_value=False, _units=False) - # if data is not None: - # data.cfa_del_file_substitution(base) - # - # def cfa_file_substitutions( - # self, - # ): - # """Return the CFA-netCDF file name substitutions. - # - # .. versionadded:: 3.15.0 - # - # :Returns: - # - # `dict` - # {{Returns cfa_file_substitutions}} - # - # **Examples** - # - # >>> g = f.cfa_file_substitutions() - # - # """ - # data = self.get_data(None) - # if data is None: - # return {} - # - # return data.cfa_file_substitutions({}) - def chunk(self, chunksize=None): """Partition the data array. @@ -3066,39 +2931,6 @@ def datum(self, *index): return data.datum(*index) -# def del_file_location(self, location): -# """Remove a file location in-place. -# -# All data definitions that reference files will have references -# to files in the given location removed from them. -# -# .. versionadded:: 3.15.0 -# -# .. seealso:: `add_file_location`, `file_locations` -# -# :Parameters: -# -# location: `str` -# The file location to remove. -# -# :Returns: -# -# `str` -# The removed location as an absolute path with no -# trailing path name component separator. -# -# **Examples** -# -# >>> f.del_file_location('/data/model/') -# '/data/model' -# -# """ -# data = self.get_data(None, _fill_value=False, _units=False) -# if data is not None: -# return data.del_file_location(location) -# -# return abspath(location).rstrip(sep) - @_manage_log_level_via_verbosity def equals( self, @@ -3428,64 +3260,6 @@ def convert_reference_time( calendar_years=calendar_years, ) - def file_locations(self): - """TODOCFA The locations of files containing parts of the data. - - Returns the locations of any files that may be required to - deliver the computed data array. - - .. versionadded:: 3.15.0 - - .. seealso:: `add_file_location`, `del_file_location` - - :Returns: - - `set` - The unique file locations as absolute paths with no - trailing path name component separator. - - **Examples** - - >>> d.file_locations() - {'/home/data1', 'file:///data2'} - - """ - _DEPRECATION_ERROR_METHOD( - self, - "file_locations", - "Use method 'file_directories' instead", - version="NEXTVERSION", - removed_at="4.0.0", - ) # pragma: no cover - - def file_directories(self): - """TODOCFA The locations of files containing parts of the data. - - Returns the locations of any files that may be required to - deliver the computed data array. - - .. versionadded:: NEXTVERSION - - .. seealso:: `add_file_location`, `del_file_location` - - :Returns: - - `set` - The unique file locations as absolute paths with no - trailing path name component separator. - - **Examples** - - >>> d.file_locations() - {'/home/data1', 'file:///data2'} - - """ - data = self.get_data(None, _fill_value=False, _units=False) - if data is not None: - return data.file_directories() - - return set() - @_inplace_enabled(default=False) def filled(self, fill_value=None, inplace=False): """Replace masked elements with a fill value. diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index 8fd656af23..124c1b5239 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -1150,45 +1150,6 @@ def dtype(self): if data is not None: del data.dtype - def add_file_location(self, location): - """Add a new file location in-place. - - All data definitions that reference files are additionally - referenced from the given location. - - .. versionadded:: 3.15.0 - - .. seealso:: `del_file_location`, `file_locations` - - :Parameters: - - location: `str` - The new location. - - :Returns: - - `str` - The new location as an absolute path with no trailing - path name component separator. - - **Examples** - - >>> d.add_file_location('/data/model/') - '/data/model' - - """ - location = super().add_file_location(location) - - bounds = self.get_bounds(None) - if bounds is not None: - bounds.add_file_location(location) - - interior_ring = self.get_interior_ring(None) - if interior_ring is not None: - interior_ring.add_file_location(location) - - return location - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) def ceil(self, bounds=True, inplace=False, i=False): @@ -1236,121 +1197,6 @@ def ceil(self, bounds=True, inplace=False, i=False): i=i, ) - # def cfa_clear_file_substitutions( - # self, - # ): - # """Remove all of the CFA-netCDF file name substitutions. - # - # .. versionadded:: 3.15.0 - # - # :Returns: - # - # `dict` - # {{Returns cfa_clear_file_substitutions}} - # - # **Examples** - # - # >>> f.cfa_clear_file_substitutions() - # {} - # - # """ - # out = super().cfa_clear_file_substitutions() - # - # bounds = self.get_bounds(None) - # if bounds is not None: - # out.update(bounds.cfa_clear_file_substitutions()) - # - # interior_ring = self.get_interior_ring(None) - # if interior_ring is not None: - # out.update(interior_ring.cfa_clear_file_substitutions()) - # - # return out - # - # def cfa_del_file_substitution(self, base): - # """Remove a CFA-netCDF file name substitution. - # - # .. versionadded:: 3.15.0 - # - # :Parameters: - # - # {{cfa base: `str`}} - # - # :Returns: - # - # `dict` - # {{Returns cfa_del_file_substitution}} - # - # **Examples** - # - # >>> c.cfa_del_file_substitution('base') - # - # """ - # super().cfa_del_file_substitution(base) - # - # bounds = self.get_bounds(None) - # if bounds is not None: - # bounds.cfa_del_file_substitution(base) - # - # interior_ring = self.get_interior_ring(None) - # if interior_ring is not None: - # interior_ring.cfa_del_file_substitution(base) - # - # def cfa_file_substitutions(self): - # """Return the CFA-netCDF file name substitutions. - # - # .. versionadded:: 3.15.0 - # - # :Returns: - # - # `dict` - # {{Returns cfa_file_substitutions}} - # - # **Examples** - # - # >>> c.cfa_file_substitutions() - # {} - # - # """ - # out = super().cfa_file_substitutions() - # - # bounds = self.get_bounds(None) - # if bounds is not None: - # out.update(bounds.cfa_file_substitutions({})) - # - # interior_ring = self.get_interior_ring(None) - # if interior_ring is not None: - # out.update(interior_ring.cfa_file_substitutions({})) - # - # return out - # - # def cfa_update_file_substitutions(self, substitutions): - # """Set CFA-netCDF file name substitutions. - # - # .. versionadded:: 3.15.0 - # - # :Parameters: - # - # {{cfa substitutions: `dict`}} - # - # :Returns: - # - # `None` - # - # **Examples** - # - # >>> c.cfa_add_file_substitutions({'base', '/data/model'}) - # - # """ - # super().cfa_update_file_substitutions(substitutions) - # - # bounds = self.get_bounds(None) - # if bounds is not None: - # bounds.cfa_update_file_substitutions(substitutions) - # - # interior_ring = self.get_interior_ring(None) - # if interior_ring is not None: - # interior_ring.cfa_update_file_substitutions(substitutions) - def chunk(self, chunksize=None): """Partition the data array. @@ -2044,40 +1890,6 @@ def get_property(self, prop, default=ValueError(), bounds=False): return super().get_property(prop, default) - def file_locations(self): - """The locations of files containing parts of the data. - - Returns the locations of any files that may be required to - deliver the computed data array. - - .. versionadded:: 3.15.0 - - .. seealso:: `add_file_location`, `del_file_location` - - :Returns: - - `set` - The unique file locations as absolute paths with no - trailing path name component separator. - - **Examples** - - >>> d.file_locations() - {'/home/data1', 'file:///data2'} - - """ - out = super().file_locations() - - bounds = self.get_bounds(None) - if bounds is not None: - out.update(bounds.file_locations()) - - interior_ring = self.get_interior_ring(None) - if interior_ring is not None: - out.update(interior_ring.file_locations()) - - return out - @_inplace_enabled(default=False) def filled(self, fill_value=None, bounds=True, inplace=False): """Replace masked elements with a fill value. @@ -2184,45 +1996,6 @@ def flatten(self, axes=None, inplace=False): return v - def del_file_location(self, location): - """Remove a file location in-place. - - All data definitions that reference files will have references - to files in the given location removed from them. - - .. versionadded:: 3.15.0 - - .. seealso:: `add_file_location`, `file_locations` - - :Parameters: - - location: `str` - The file location to remove. - - :Returns: - - `str` - The removed location as an absolute path with no - trailing path name component separator. - - **Examples** - - >>> c.del_file_location('/data/model/') - '/data/model' - - """ - location = super().del_file_location(location) - - bounds = self.get_bounds(None) - if bounds is not None: - bounds.del_file_location(location) - - interior_ring = self.get_interior_ring(None) - if interior_ring is not None: - interior_ring.del_file_location(location) - - return location - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) def floor(self, bounds=True, inplace=False, i=False): diff --git a/cf/mixin2/__init__.py b/cf/mixin2/__init__.py index 4531d515c3..0aca8bbb13 100644 --- a/cf/mixin2/__init__.py +++ b/cf/mixin2/__init__.py @@ -1,2 +1 @@ -# from .cfanetcdf import CFANetCDF from .container import Container diff --git a/cf/mixin2/cfanetcdf.py b/cf/mixin2/cfanetcdf.py deleted file mode 100644 index 80575ae8cd..0000000000 --- a/cf/mixin2/cfanetcdf.py +++ /dev/null @@ -1,509 +0,0 @@ -# """This class is not in the cf.mixin package because it needs to be -# imported by cf.Data, and some of the other mixin classes in cf.mixin -# themsleves import cf.Data, which would lead to a circular import -# situation. -# -# """ -# -# from re import split -# -# from cfdm.mixin import NetCDFMixin -# -# -# class CFANetCDF(NetCDFMixin): -# """Mixin class for CFA-netCDF. -# -# .. versionadded:: 3.15.0 -# -# """ -# -# -# -# def cfa_del_aggregated_data(self): -# """Remove the CFA-netCDF aggregation instruction terms. -# -# The aggregation instructions are stored in the -# ``aggregation_data`` attribute of a CFA-netCDF aggregation -# variable. -# -# .. versionadded:: 3.15.0 -# -# .. seealso:: `cfa_get_aggregated_data`, -# `cfa_has_aggregated_data`, -# `cfa_set_aggregated_data` -# -# :Returns: -# -# `dict` -# The removed CFA-netCDF aggregation instruction terms. -# -# **Examples** -# -# >>> f.cfa_set_aggregated_data( -# ... {'location': 'cfa_location', -# ... 'file': 'cfa_file', -# ... 'address': 'cfa_address', -# ... 'format': 'cfa_format', -# ... 'tracking_id': 'tracking_id'} -# ... ) -# >>> f.cfa_has_aggregated_data() -# True -# >>> f.cfa_get_aggregated_data() -# {'location': 'cfa_location', -# 'file': 'cfa_file', -# 'address': 'cfa_address', -# 'format': 'c ', -# 'tracking_id': 'tracking_id'} -# >>> f.cfa_del_aggregated_data() -# {'location': 'cfa_location', -# 'file': 'cfa_file', -# 'address': 'cfa_address', -# 'format': 'cfa_format', -# 'tracking_id': 'tracking_id'} -# >>> f.cfa_has_aggregated_data() -# False -# >>> f.cfa_del_aggregated_data() -# {} -# >>> f.cfa_get_aggregated_data() -# {} -# -# """ -# return self._nc_del("cfa_aggregated_data", {}).copy() -# -# def cfa_get_aggregated_data(self): -# """Return the CFA-netCDF aggregation instruction terms. -# -# The aggregation instructions are stored in the -# ``aggregation_data`` attribute of a CFA-netCDF aggregation -# variable. -# -# .. versionadded:: 3.15.0 -# -# .. seealso:: `cfa_del_aggregated_data`, -# `cfa_has_aggregated_data`, -# `cfa_set_aggregated_data` -# -# :Returns: -# -# `dict` -# The aggregation instruction terms and their -# corresponding netCDF variable names in a dictionary -# whose key/value pairs are the aggregation instruction -# terms and their corresponding variable names. -# -# **Examples** -# -# >>> f.cfa_set_aggregated_data( -# ... {'location': 'cfa_location', -# ... 'file': 'cfa_file', -# ... 'address': 'cfa_address', -# ... 'format': 'cfa_format', -# ... 'tracking_id': 'tracking_id'} -# ... ) -# >>> f.cfa_has_aggregated_data() -# True -# >>> f.cfa_get_aggregated_data() -# {'location': 'cfa_location', -# 'file': 'cfa_file', -# 'address': 'cfa_address', -# 'format': 'cfa_format', -# 'tracking_id': 'tracking_id'} -# >>> f.cfa_del_aggregated_data() -# {'location': 'cfa_location', -# 'file': 'cfa_file', -# 'address': 'cfa_address', -# 'format': 'cfa_format', -# 'tracking_id': 'tracking_id'} -# >>> f.cfa_has_aggregated_data() -# False -# >>> f.cfa_del_aggregated_data() -# {} -# >>> f.cfa_get_aggregated_data() -# {} -# -# """ -# out = self._nc_get("cfa_aggregated_data", default=None) -# if out is not None: -# return out.copy() -# -# return {} -# -# def cfa_has_aggregated_data(self): -# """Whether any CFA-netCDF aggregation instruction terms have been set. -# -# The aggregation instructions are stored in the -# ``aggregation_data`` attribute of a CFA-netCDF aggregation -# variable. -# -# .. versionadded:: 3.15.0 -# -# .. seealso:: `cfa_del_aggregated_data`, -# `cfa_get_aggregated_data`, -# `cfa_set_aggregated_data` -# -# :Returns: -# -# `bool` -# `True` if the CFA-netCDF aggregation instruction terms -# have been set, otherwise `False`. -# -# **Examples** -# -# >>> f.cfa_set_aggregated_data( -# ... {'location': 'cfa_location', -# ... 'file': 'cfa_file', -# ... 'address': 'cfa_address', -# ... 'format': 'cfa_format', -# ... 'tracking_id': 'tracking_id'} -# ... ) -# >>> f.cfa_has_aggregated_data() -# True -# >>> f.cfa_get_aggregated_data() -# {'location': 'cfa_location', -# 'file': 'cfa_file', -# 'address': 'cfa_address', -# 'format': 'cfa_format', -# 'tracking_id': 'tracking_id'} -# >>> f.cfa_del_aggregated_data() -# {'location': 'cfa_location', -# 'file': 'cfa_file', -# 'address': 'cfa_address', -# 'format': 'cfa_format', -# 'tracking_id': 'tracking_id'} -# >>> f.cfa_has_aggregated_data() -# False -# >>> f.cfa_del_aggregated_data() -# {} -# >>> f.cfa_get_aggregated_data() -# {} -# -# """ -# return self._nc_has("cfa_aggregated_data") -# -# def cfa_set_aggregated_data(self, value): -# """Set the CFA-netCDF aggregation instruction terms. -# -# The aggregation instructions are stored in the -# ``aggregation_data`` attribute of a CFA-netCDF aggregation -# variable. -# -# If there are any ``/`` (slash) characters in the netCDF -# variable names then these act as delimiters for a group -# hierarchy. By default, or if the name starts with a ``/`` -# character and contains no others, the name is assumed to be in -# the root group. -# -# .. versionadded:: 3.15.0 -# -# .. seealso:: `cfa_del_aggregated_data`, -# `cfa_get_aggregated_data`, -# `cfa_has_aggregated_data` -# -# :Parameters: -# -# value: `str` or `dict` -# The aggregation instruction terms and their -# corresponding netCDF variable names. Either a -# CFA-netCDF-compliant string value of an -# ``aggregated_data`` attribute, or a dictionary whose -# key/value pairs are the aggregation instruction terms -# and their corresponding variable names. -# -# :Returns: -# -# `None` -# -# **Examples** -# -# >>> f.cfa_set_aggregated_data( -# ... {'location': 'cfa_location', -# ... 'file': 'cfa_file', -# ... 'address': 'cfa_address', -# ... 'format': 'cfa_format', -# ... 'tracking_id': 'tracking_id'} -# ... ) -# >>> f.cfa_has_aggregated_data() -# True -# >>> f.cfa_get_aggregated_data() -# {'location': 'cfa_location', -# 'file': 'cfa_file', -# 'address': 'cfa_address', -# 'format': 'cfa_format', -# 'tracking_id': 'tracking_id'} -# >>> f.cfa_del_aggregated_data() -# {'location': 'cfa_location', -# 'file': 'cfa_file', -# 'address': 'cfa_address', -# 'format': 'cfa_format', -# 'tracking_id': 'tracking_id'} -# >>> f.cfa_has_aggregated_data() -# False -# >>> f.cfa_del_aggregated_data() -# {} -# >>> f.cfa_get_aggregated_data() -# {} -# -# """ -# if value: -# if isinstance(value, str): -# v = split("\s+", value) -# value = {term[:-1]: var for term, var in zip(v[::2], v[1::2])} -# else: -# # 'value' is a dictionary -# value = value.copy() -# -# self._nc_set("cfa_aggregated_data", value) -# -# def cfa_clear_file_substitutions(self): -# """Remove all of the CFA-netCDF file name substitutions. -# -# .. versionadded:: 3.15.0 -# -# .. seealso:: `cfa_del_file_substitution`, -# `cfa_file_substitutions`, -# `cfa_has_file_substitutions`, -# `cfa_update_file_substitutions` -# -# :Returns: -# -# `dict` -# {{Returns cfa_clear_file_substitutions}} -# -# **Examples** -# -# >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) -# >>> f.cfa_has_file_substitutions() -# True -# >>> f.cfa_file_substitutions() -# {'${base}': 'file:///data/'} -# >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) -# >>> f.cfa_file_substitutions() -# {'${base}': 'file:///data/', '${base2}': '/home/data/'} -# >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) -# >>> f.cfa_file_substitutions() -# {'${base}': '/new/location/', '${base2}': '/home/data/'} -# >>> f.cfa_del_file_substitution('${base}') -# {'${base}': '/new/location/'} -# >>> f.cfa_clear_file_substitutions() -# {'${base2}': '/home/data/'} -# >>> f.cfa_has_file_substitutions() -# False -# >>> f.cfa_file_substitutions() -# {} -# >>> f.cfa_clear_file_substitutions() -# {} -# >>> print(f.cfa_del_file_substitution('base', None)) -# None -# -# """ -# return self._nc_del("cfa_file_substitutions", {}).copy() -# -# def cfa_del_file_substitution(self, base): -# """Remove a CFA-netCDF file name substitution. -# -# .. versionadded:: 3.15.0 -# -# .. seealso:: `cfa_clear_file_substitutions`, -# `cfa_file_substitutions`, -# `cfa_has_file_substitutions`, -# `cfa_update_file_substitutions` -# -# :Parameters: -# -# {{cfa base: `str`}} -# -# :Returns: -# -# `dict` -# {{Returns cfa_del_file_substitution}} -# -# **Examples** -# -# >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) -# >>> f.cfa_has_file_substitutions() -# True -# >>> f.cfa_file_substitutions() -# {'${base}': 'file:///data/'} -# >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) -# >>> f.cfa_file_substitutions() -# {'${base}': 'file:///data/', '${base2}': '/home/data/'} -# >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) -# >>> f.cfa_file_substitutions() -# {'${base}': '/new/location/', '${base2}': '/home/data/'} -# >>> f.cfa_del_file_substitution('${base}') -# {'${base}': '/new/location/'} -# >>> f.cfa_clear_file_substitutions() -# {'${base2}': '/home/data/'} -# >>> f.cfa_has_file_substitutions() -# False -# >>> f.cfa_file_substitutions() -# {} -# >>> f.cfa_clear_file_substitutions() -# {} -# >>> print(f.cfa_del_file_substitution('base')) -# {} -# -# """ -# if not (base.startswith("${") and base.endswith("}")): -# base = f"${{{base}}}" -# -# subs = self.cfa_file_substitutions() -# if base not in subs: -# return {} -# -# out = {base: subs.pop(base)} -# if subs: -# self._nc_set("cfa_file_substitutions", subs) -# else: -# self._nc_del("cfa_file_substitutions", None) -# -# return out -# -# def cfa_file_substitutions(self): -# """Return the CFA-netCDF file name substitutions. -# -# .. versionadded:: 3.15.0 -# -# .. seealso:: `cfa_clear_file_substitutions`, -# `cfa_del_file_substitution`, -# `cfa_has_file_substitutions`, -# `cfa_update_file_substitutions` -# :Returns: -# -# `dict` -# The CFA-netCDF file name substitutions. -# -# **Examples** -# -# >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) -# >>> f.cfa_has_file_substitutions() -# True -# >>> f.cfa_file_substitutions() -# {'${base}': 'file:///data/'} -# >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) -# >>> f.cfa_file_substitutions() -# {'${base}': 'file:///data/', '${base2}': '/home/data/'} -# >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) -# >>> f.cfa_file_substitutions() -# {'${base}': '/new/location/', '${base2}': '/home/data/'} -# >>> f.cfa_del_file_substitution('${base}') -# {'${base}': '/new/location/'} -# >>> f.cfa_clear_file_substitutions() -# {'${base2}': '/home/data/'} -# >>> f.cfa_has_file_substitutions() -# False -# >>> f.cfa_file_substitutions() -# {} -# >>> f.cfa_clear_file_substitutions() -# {} -# >>> print(f.cfa_del_file_substitution('base', None)) -# None -# -# """ -# out = self._nc_get("cfa_file_substitutions", default=None) -# if out is not None: -# return out.copy() -# -# return {} -# -# def cfa_has_file_substitutions(self): -# """Whether any CFA-netCDF file name substitutions have been set. -# -# .. versionadded:: 3.15.0 -# -# .. seealso:: `cfa_clear_file_substitutions`, -# `cfa_del_file_substitution`, -# `cfa_file_substitutions`, -# `cfa_update_file_substitutions` -# -# :Returns: -# -# `bool` -# `True` if any CFA-netCDF file name substitutions have -# been set, otherwise `False`. -# -# **Examples** -# -# >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) -# >>> f.cfa_has_file_substitutions() -# True -# >>> f.cfa_file_substitutions() -# {'${base}': 'file:///data/'} -# >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) -# >>> f.cfa_file_substitutions() -# {'${base}': 'file:///data/', '${base2}': '/home/data/'} -# >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) -# >>> f.cfa_file_substitutions() -# {'${base}': '/new/location/', '${base2}': '/home/data/'} -# >>> f.cfa_del_file_substitution('${base}') -# {'${base}': '/new/location/'} -# >>> f.cfa_clear_file_substitutions() -# {'${base2}': '/home/data/'} -# >>> f.cfa_has_file_substitutions() -# False -# >>> f.cfa_file_substitutions() -# {} -# >>> f.cfa_clear_file_substitutions() -# {} -# >>> print(f.cfa_del_file_substitution('base', None)) -# None -# -# """ -# return self._nc_has("cfa_file_substitutions") -# -# def cfa_update_file_substitutions(self, substitutions): -# """Set CFA-netCDF file name substitutions. -# -# .. versionadded:: 3.15.0 -# -# .. seealso:: `cfa_clear_file_substitutions`, -# `cfa_del_file_substitution`, -# `cfa_file_substitutions`, -# `cfa_has_file_substitutions` -# -# :Parameters: -# -# {{cfa substitutions: `dict`}} -# -# :Returns: -# -# `None` -# -# **Examples** -# -# >>> f.cfa_update_file_substitutions({'base': 'file:///data/'}) -# >>> f.cfa_has_file_substitutions() -# True -# >>> f.cfa_file_substitutions() -# {'${base}': 'file:///data/'} -# >>> f.cfa_update_file_substitutions({'${base2}': '/home/data/'}) -# >>> f.cfa_file_substitutions() -# {'${base}': 'file:///data/', '${base2}': '/home/data/'} -# >>> f.cfa_update_file_substitutions({'${base}': '/new/location/'}) -# >>> f.cfa_file_substitutions() -# {'${base}': '/new/location/', '${base2}': '/home/data/'} -# >>> f.cfa_del_file_substitution('${base}') -# {'${base}': '/new/location/'} -# >>> f.cfa_clear_file_substitutions() -# {'${base2}': '/home/data/'} -# >>> f.cfa_has_file_substitutions() -# False -# >>> f.cfa_file_substitutions() -# {} -# >>> f.cfa_clear_file_substitutions() -# {} -# >>> print(f.cfa_del_file_substitution('base', None)) -# None -# -# """ -# if not substitutions: -# return -# -# substitutions = substitutions.copy() -# for base, sub in tuple(substitutions.items()): -# if not (base.startswith("${") and base.endswith("}")): -# substitutions[f"${{{base}}}"] = substitutions.pop(base) -# -# subs = self.cfa_file_substitutions() -# subs.update(substitutions) -# self._nc_set("cfa_file_substitutions", subs) diff --git a/cf/read_write/netcdf/__init__.py b/cf/read_write/netcdf/__init__.py index 94adcca6b6..b443940c43 100644 --- a/cf/read_write/netcdf/__init__.py +++ b/cf/read_write/netcdf/__init__.py @@ -1,2 +1 @@ -#from .netcdfread import NetCDFRead from .netcdfwrite import NetCDFWrite diff --git a/cf/read_write/netcdf/netcdfread.py b/cf/read_write/netcdf/netcdfread.py deleted file mode 100644 index 8e343fd72b..0000000000 --- a/cf/read_write/netcdf/netcdfread.py +++ /dev/null @@ -1,1006 +0,0 @@ -#import cfdm -# -## from packaging.version import Version -# -# -#class NetCDFRead(cfdm.read_write.netcdf.NetCDFRead): -# """A container for instantiating Fields from a netCDF dataset. -# -# .. versionadded:: 3.0.0 -# -# """ -# -# -# def _ncdimensions(self, ncvar, ncdimensions=None, parent_ncvar=None): -# """Return a list of the netCDF dimensions corresponding to a -# netCDF variable. -# -# If the variable has been compressed then the *implied -# uncompressed* dimensions are returned. -# -# For a CFA variable, the netCDF dimensions are taken from the -# 'aggregated_dimensions' netCDF attribute. -# -# .. versionadded:: 3.0.0 -# -# :Parameters: -# -# ncvar: `str` -# The netCDF variable name. -# -# ncdimensions: sequence of `str`, optional -# Use these netCDF dimensions, rather than retrieving them -# from the netCDF variable itself. This allows the -# dimensions of a domain variable to be parsed. Note that -# this only parameter only needs to be used once because the -# parsed domain dimensions are automatically stored in -# `self.read_var['domain_ncdimensions'][ncvar]`. -# -# .. versionadded:: 3.11.0 -# -# parent_ncvar: `str`, optional -# TODO -# -# .. versionadded:: TODO -# -# :Returns: -# -# `list` -# The netCDF dimension names spanned by the netCDF variable. -# -# **Examples** -# -# >>> n._ncdimensions('humidity') -# ['time', 'lat', 'lon'] -# -# For a variable compressed by gathering: -# -# dimensions: -# lat=73; -# lon=96; -# landpoint=2381; -# depth=4; -# variables: -# int landpoint(landpoint); -# landpoint:compress="lat lon"; -# float landsoilt(depth,landpoint); -# landsoilt:long_name="soil temperature"; -# landsoilt:units="K"; -# -# we would have -# -# >>> n._ncdimensions('landsoilt') -# ['depth', 'lat', 'lon'] -# -# """ -# -# if not self._is_cfa_variable(ncvar): -# return super()._ncdimensions( -# ncvar, ncdimensions=ncdimensions, parent_ncvar=parent_ncvar -# ) -# -# # Still here? Then we have a CFA variable. -# ncdimensions = self.read_vars["variable_attributes"][ncvar][ -# "aggregated_dimensions" -# ].split() -# -# return list(map(str, ncdimensions)) -# -# def _get_domain_axes(self, ncvar, allow_external=False, parent_ncvar=None): -# """Return the domain axis identifiers that correspond to a -# netCDF variable's netCDF dimensions. -# -# For a CFA variable, the netCDF dimensions are taken from the -# 'aggregated_dimensions' netCDF attribute. -# -# :Parameter: -# -# ncvar: `str` -# The netCDF variable name. -# -# allow_external: `bool` -# If `True` and *ncvar* is an external variable then return an -# empty list. -# -# parent_ncvar: `str`, optional -# TODO -# -# .. versionadded:: TODO -# -# :Returns: -# -# `list` -# -# **Examples** -# -# >>> r._get_domain_axes('areacello') -# ['domainaxis0', 'domainaxis1'] -# -# >>> r._get_domain_axes('areacello', allow_external=True) -# [] -# -# """ -# if not self._is_cfa_variable(ncvar): -# return super()._get_domain_axes( -# ncvar=ncvar, -# allow_external=allow_external, -# parent_ncvar=parent_ncvar, -# ) -# -# # ------------------------------------------------------------ -# # Still here? Then we have a CFA-netCDF variable. -# # ------------------------------------------------------------ -# g = self.read_vars -# -# ncdimensions = g["variable_attributes"][ncvar][ -# "aggregated_dimensions" -# ].split() -# -# ncdim_to_axis = g["ncdim_to_axis"] -# axes = [ -# ncdim_to_axis[ncdim] -# for ncdim in ncdimensions -# if ncdim in ncdim_to_axis -# ] -# -# return axes -# -# def _create_data( -# self, -# ncvar, -# construct=None, -# unpacked_dtype=False, -# uncompress_override=None, -# parent_ncvar=None, -# coord_ncvar=None, -# cfa_term=None, -# compression_index=False, -# ): -# """Create data for a netCDF or CFA-netCDF variable. -# -# .. versionadded:: 3.0.0 -# -# :Parameters: -# -# ncvar: `str` -# The name of the netCDF variable that contains the -# data. See the *cfa_term* parameter. -# -# construct: optional -# -# unpacked_dtype: `False` or `numpy.dtype`, optional -# -# uncompress_override: `bool`, optional -# -# coord_ncvar: `str`, optional -# -# cfa_term: `dict`, optional -# The name of a non-standard aggregation instruction -# term from which to create the data. If set then -# *ncvar* must be the value of the term in the -# ``aggregation_data`` attribute. -# -# .. versionadded:: 3.15.0 -# -# compression_index: `bool`, optional -# True if the data being created are compression -# indices. -# -# .. versionadded:: 3.15.2 -# -# :Returns: -# -# `Data` -# -# """ -# if not cfa_term and not self._is_cfa_variable(ncvar): -# # Create data for a normal netCDF variable -# data = super()._create_data( -# ncvar=ncvar, -# construct=construct, -# unpacked_dtype=unpacked_dtype, -# uncompress_override=uncompress_override, -# parent_ncvar=parent_ncvar, -# coord_ncvar=coord_ncvar, -# ) -# -# # Set the CFA write status to True when there is exactly -# # one dask chunk -# if data.npartitions == 1: -# data._cfa_set_write(True) -# -# # if ( -# # not compression_index -# # and self.read_vars.get("cache") -# # and self.implementation.get_construct_type(construct) -# # != "field" -# # ): -# # # Only cache values from non-field data and -# # # non-compression-index data, on the assumptions that: -# # # -# # # a) Field data is, in general, so large that finding -# # # the cached values takes too long. -# # # -# # # b) Cached values are never really required for -# # # compression index data. -# # self._cache_data_elements(data, ncvar) -# -# return data -# -# # ------------------------------------------------------------ -# # Still here? Create data for a CFA variable -# # ------------------------------------------------------------ -# if construct is not None: -# # Remove the aggregation attributes from the construct -# self.implementation.del_property( -# construct, "aggregated_dimensions", None -# ) -# aggregated_data = self.implementation.del_property( -# construct, "aggregated_data", None -# ) -# else: -# aggregated_data = None -# -# if cfa_term: -# term, term_ncvar = tuple(cfa_term.items())[0] -# cfa_array, kwargs = self._create_cfanetcdfarray_term( -# ncvar, term, term_ncvar -# ) -# else: -# cfa_array, kwargs = self._create_cfanetcdfarray( -# ncvar, -# unpacked_dtype=unpacked_dtype, -# coord_ncvar=coord_ncvar, -# ) -# -# attributes = kwargs["attributes"] -# data = self._create_Data( -# cfa_array, -# ncvar, -# units=attributes.get("units"), -# calendar=attributes.get("calendar"), -# ) -# -# # Note: We don't cache elements from CFA variables, because -# # the data are in fragment files which have not been -# # opened and may not not even be openable (such as could -# # be the case if a fragment file was on tape storage). -# -# # Set the CFA write status to True iff each non-aggregated -# # axis has exactly one dask storage chunk -# if cfa_term: -# data._cfa_set_term(True) -# else: -# cfa_write = True -# for n, numblocks in zip( -# cfa_array.get_fragment_shape(), data.numblocks -# ): -# if n == 1 and numblocks > 1: -# # Note: 'n == 1' is True for non-aggregated axes -# cfa_write = False -# break -# -# data._cfa_set_write(cfa_write) -# -# # Store the 'aggregated_data' attribute -# if aggregated_data: -# data.cfa_set_aggregated_data(aggregated_data) -# -# # Store the file substitutions -# data.cfa_update_file_substitutions(kwargs.get("substitutions")) -# -# return data -# -# def _is_cfa_variable(self, ncvar): -# """Return True if *ncvar* is a CFA aggregated variable. -# -# .. versionadded:: 3.14.0 -# -# :Parameters: -# -# ncvar: `str` -# The name of the netCDF variable. -# -# :Returns: -# -# `bool` -# Whether or not *ncvar* is a CFA variable. -# -# """ -# g = self.read_vars -# return ( -# g["cfa"] -# and ncvar in g["cfa_aggregated_data"] -# and ncvar not in g["external_variables"] -# ) -# -# def _create_Data( -# self, -# array, -# ncvar, -# units=None, -# calendar=None, -# ncdimensions=(), -# **kwargs, -# ): -# """Create a Data object from a netCDF variable. -# -# .. versionadded:: 3.0.0 -# -# :Parameters: -# -# array: `Array` -# The file array. -# -# ncvar: `str` -# The netCDF variable containing the array. -# -# units: `str`, optional -# The units of *array*. By default, or if `None`, it is -# assumed that there are no units. -# -# calendar: `str`, optional -# The calendar of *array*. By default, or if `None`, it is -# assumed that there is no calendar. -# -# ncdimensions: sequence of `str`, optional -# The netCDF dimensions spanned by the array. -# -# .. versionadded:: 3.14.0 -# -# kwargs: optional -# Extra parameters to pass to the initialisation of the -# returned `Data` object. -# -# :Returns: -# -# `Data` -# -# """ -# if array.dtype is None: -# # The array is based on a netCDF VLEN variable, and -# # therefore has unknown data type. To find the correct -# # data type (e.g. "=1) netCDF string type variable comes out -# # as a numpy object array, so convert it to numpy -# # string array. -# array = array.astype("U", copy=False) -# # NetCDF4 doesn't auto-mask VLEN variables -# array = np.ma.where(array == "", np.ma.masked, array) -# -# # Parse dask chunks -# chunks = self._dask_chunks(array, ncvar, compressed) -# -# data = super()._create_Data( -# array, -# ncvar, -# units=units, -# calendar=calendar, -# chunks=chunks, -# **kwargs, -# ) -# -# return data -# -# def _customise_read_vars(self): -# """Customise the read parameters. -# -# Take the opportunity to apply CFA updates to -# `read_vars['variable_dimensions']` and -# `read_vars['do_not_create_field']`. -# -# .. versionadded:: 3.0.0 -# -# """ -# super()._customise_read_vars() -# g = self.read_vars -# -# if not g["cfa"]: -# return -# -# g["cfa_aggregated_data"] = {} -# g["cfa_aggregation_instructions"] = {} -# g["cfa_file_substitutions"] = {} -# -# # ------------------------------------------------------------ -# # Still here? Then this is a CFA-netCDF file -# # ------------------------------------------------------------ -# if g["CFA_version"] < Version("0.6.2"): -# raise ValueError( -# f"Can't read file {g['filename']} that uses obsolete " -# f"CFA conventions version CFA-{g['CFA_version']}. " -# "(Note that cf version 3.13.1 can be used to read and " -# "write CFA-0.4 files.)" -# ) -# -# # Get the directory of the CFA-netCDF file being read -# from os.path import abspath -# from pathlib import PurePath -# -# g["cfa_dir"] = PurePath(abspath(g["filename"])).parent -# -# # Process the aggregation instruction variables, and the -# # aggregated dimensions. -# dimensions = g["variable_dimensions"] -# attributes = g["variable_attributes"] -# -# for ncvar, attributes in attributes.items(): -# if "aggregated_dimensions" not in attributes: -# # This is not an aggregated variable -# continue -# -# # Set the aggregated variable's dimensions as its -# # aggregated dimensions -# ncdimensions = attributes["aggregated_dimensions"].split() -# dimensions[ncvar] = tuple(map(str, ncdimensions)) -# -# # Do not create fields/domains from aggregation -# # instruction variables -# parsed_aggregated_data = self._cfa_parse_aggregated_data( -# ncvar, attributes.get("aggregated_data") -# ) -# for term_ncvar in parsed_aggregated_data.values(): -# g["do_not_create_field"].add(term_ncvar) -# -# def _cache_data_elements(self, data, ncvar): -# """Cache selected element values. -# -# Updates *data* in-place to store its first, second, -# penultimate, and last element values (as appropriate). -# -# These values are used by, amongst other things, -# `cf.Data.equals`, `cf.aggregate` and for inspection. -# -# Doing this here is quite cheap because only the individual -# elements are read from the already-open file, as opposed to -# being retrieved from *data* (which would require a whole dask -# chunk to be read to get each single value). -# -# However, empirical evidence shows that using netCDF4 to access -# the first and last elements of a large array on disk -# (e.g. shape (1, 75, 1207, 1442)) is slow (e.g. ~2 seconds) and -# doesn't scale well with array size (i.e. it takes -# disproportionally longer for larger arrays). Such arrays are -# usually in field constructs, for which `cf.aggregate` does not -# need to know any array values, so this method should be used -# with caution, if at all, on field construct data. -# -# .. versionadded:: 3.14.0 -# -# :Parameters: -# -# data: `Data` -# The data to be updated with its cached values. -# -# ncvar: `str` -# The name of the netCDF variable that contains the -# data. -# -# :Returns: -# -# `None` -# -# """ -# -# if data.data.get_compression_type(): -# # Don't get cached elements from arrays compressed by -# # convention, as they'll likely be wrong. -# return -# -# g = self.read_vars -# -# # Get the netCDF4.Variable for the data -# if g["has_groups"]: -# group, name = self._netCDF4_group( -# g["variable_grouped_dataset"][ncvar], ncvar -# ) -# variable = group.variables.get(name) -# else: -# variable = g["variables"].get(ncvar) -# -# # Get the required element values -# size = data.size -# ndim = data.ndim -# -# char = False -# if variable.ndim == ndim + 1: -# dtype = variable.dtype -# if dtype is not str and dtype.kind in "SU": -# # This variable is a netCDF classic style char array -# # with a trailing dimension that needs to be collapsed -# char = True -# -# if ndim == 1: -# # Also cache the second element for 1-d data, on the -# # assumption that they may well be dimension coordinate -# # data. -# if size == 1: -# indices = (0, -1) -# value = variable[...] -# values = (value, value) -# elif size == 2: -# indices = (0, 1, -1) -# value = variable[-1:] -# values = (variable[:1], value, value) -# else: -# indices = (0, 1, -1) -# values = (variable[:1], variable[1:2], variable[-1:]) -# elif ndim == 2 and data.shape[-1] == 2: -# # Assume that 2-d data with a last dimension of size 2 -# # contains coordinate bounds, for which it is useful to -# # cache the upper and lower bounds of the the first and -# # last cells. -# indices = (0, 1, -2, -1) -# ndim1 = ndim - 1 -# values = ( -# variable[(slice(0, 1),) * ndim1 + (slice(0, 1),)], -# variable[(slice(0, 1),) * ndim1 + (slice(1, 2),)], -# ) -# if data.size == 2: -# values = values + values -# else: -# values += ( -# variable[(slice(-1, None, 1),) * ndim1 + (slice(0, 1),)], -# variable[(slice(-1, None, 1),) * ndim1 + (slice(1, 2),)], -# ) -# elif size == 1: -# indices = (0, -1) -# value = variable[...] -# values = (value, value) -# elif size == 3: -# indices = (0, 1, -1) -# if char: -# values = variable[...].reshape(3, variable.shape[-1]) -# else: -# values = variable[...].flatten() -# else: -# indices = (0, -1) -# values = ( -# variable[(slice(0, 1),) * ndim], -# variable[(slice(-1, None, 1),) * ndim], -# ) -# -# # Create a dictionary of the element values -# elements = {} -# for index, value in zip(indices, values): -# if char: -# # Variable is a netCDF classic style char array, so -# # collapse (by concatenation) the outermost (fastest -# # varying) dimension. E.g. [['a','b','c']] becomes -# # ['abc'] -# if value.dtype.kind == "U": -# value = value.astype("S") -# -# a = netCDF4.chartostring(value) -# shape = a.shape -# a = np.array([x.rstrip() for x in a.flat]) -# a = np.reshape(a, shape) -# value = np.ma.masked_where(a == "", a) -# -# if np.ma.is_masked(value): -# value = np.ma.masked -# else: -# try: -# value = value.item() -# except (AttributeError, ValueError): -# # AttributeError: A netCDF string type scalar -# # variable comes out as Python str object, which -# # has no 'item' method. -# # -# # ValueError: A size-0 array can't be converted to -# # a Python scalar. -# pass -# -# elements[index] = value -# -# # Store the elements in the data object -# data._set_cached_elements(elements) -# -# def _create_cfanetcdfarray( -# self, -# ncvar, -# unpacked_dtype=False, -# coord_ncvar=None, -# term=None, -# ): -# """Create a CFA-netCDF variable array. -# -# .. versionadded:: 3.14.0 -# -# :Parameters: -# -# ncvar: `str` -# The name of the CFA-netCDF aggregated variable. See -# the *term* parameter. -# -# unpacked_dtype: `False` or `numpy.dtype`, optional -# -# coord_ncvar: `str`, optional -# -# term: `str`, optional -# The name of a non-standard aggregation instruction -# term from which to create the array. If set then -# *ncvar* must be the value of the non-standard term in -# the ``aggregation_data`` attribute. -# -# .. versionadded:: 3.15.0 -# -# :Returns: -# -# (`CFANetCDFArray`, `dict`) -# The new `CFANetCDFArray` instance and dictionary of -# the kwargs used to create it. -# -# """ -# g = self.read_vars -# -# # Get the kwargs needed to instantiate a general netCDF array -# # instance -# kwargs = self._create_netcdfarray( -# ncvar, -# unpacked_dtype=unpacked_dtype, -# coord_ncvar=coord_ncvar, -# return_kwargs_only=True, -# ) -# -# # Get rid of the incorrect shape. This will end up getting set -# # correctly by the CFANetCDFArray instance. -# kwargs.pop("shape", None) -# aggregated_data = g["cfa_aggregated_data"][ncvar] -# -# standardised_terms = ("location", "file", "address", "format") -# -# instructions = [] -# aggregation_instructions = {} -# for t, term_ncvar in aggregated_data.items(): -# if t not in standardised_terms: -# continue -# -# aggregation_instructions[t] = g["cfa_aggregation_instructions"][ -# term_ncvar -# ] -# instructions.append(f"{t}: {term_ncvar}") -# -# if t == "file": -# kwargs["substitutions"] = g["cfa_file_substitutions"].get( -# term_ncvar -# ) -# -# kwargs["x"] = aggregation_instructions -# kwargs["instructions"] = " ".join(sorted(instructions)) -# -# # Use the kwargs to create a CFANetCDFArray instance -# if g["original_netCDF4"]: -# array = self.implementation.initialise_CFANetCDF4Array(**kwargs) -# else: -# # h5netcdf -# array = self.implementation.initialise_CFAH5netcdfArray(**kwargs) -# -# return array, kwargs -# -# def _create_cfanetcdfarray_term( -# self, -# parent_ncvar, -# term, -# ncvar, -# ): -# """Create a CFA-netCDF variable array. -# -# .. versionadded:: 3.14.0 -# -# :Parameters: -# -# parent_ncvar: `str` -# The name of the CFA-netCDF aggregated variable. See -# the *term* parameter. -# -# term: `str`, optional -# The name of a non-standard aggregation instruction -# term from which to create the array. If set then -# *ncvar* must be the value of the non-standard term in -# the ``aggregation_data`` attribute. -# -# .. versionadded:: 3.15.0 -# -# ncvar: `str` -# The name of the CFA-netCDF aggregated variable. See -# the *term* parameter. -# -# :Returns: -# -# (`CFANetCDFArray`, `dict`) -# The new `CFANetCDFArray` instance and dictionary of -# the kwargs used to create it. -# -# """ -# g = self.read_vars -# -# # Get the kwargs needed to instantiate a general netCDF array -# # instance -# kwargs = self._create_netcdfarray( -# ncvar, -# return_kwargs_only=True, -# ) -# -# # Get rid of the incorrect shape. This will end up getting set -# # correctly by the CFANetCDFArray instance. -# kwargs.pop("shape", None) -# -# instructions = [] -# aggregation_instructions = {} -# for t, term_ncvar in g["cfa_aggregated_data"][parent_ncvar].items(): -# if t in ("location", term): -# aggregation_instructions[t] = g[ -# "cfa_aggregation_instructions" -# ][term_ncvar] -# instructions.append(f"{t}: {ncvar}") -# -# kwargs["term"] = term -# kwargs["dtype"] = aggregation_instructions[term].dtype -# kwargs["x"] = aggregation_instructions -# kwargs["instructions"] = " ".join(sorted(instructions)) -# -# if g["original_netCDF4"]: -# array = self.implementation.initialise_CFANetCDF4Array(**kwargs) -# else: -# # h5netcdf -# array = self.implementation.initialise_CFAH5netcdfArray(**kwargs) -# -# return array, kwargs -# -# -# def _parse_chunks(self, ncvar): -# """Parse the dask chunks. -# -# .. versionadded:: 3.14.0 -# -# :Parameters: -# -# ncvar: `str` -# The name of the netCDF variable containing the array. -# -# :Returns: -# -# `str`, `int` or `dict` -# The parsed chunks that are suitable for passing to a -# `Data` object containing the variable's array. -# -# """ -# g = self.read_vars -# -# default_chunks = "auto" -# chunks = g.get("chunks", default_chunks) -# -# if chunks is None: -# return -1 -# -# if isinstance(chunks, dict): -# if not chunks: -# return default_chunks -# -# # For ncdimensions = ('time', 'lat'): -# # -# # chunks={} -> ["auto", "auto"] -# # chunks={'ncdim%time': 12} -> [12, "auto"] -# # chunks={'ncdim%time': 12, 'ncdim%lat': 10000} -> [12, 10000] -# # chunks={'ncdim%time': 12, 'ncdim%lat': "20MB"} -> [12, "20MB"] -# # chunks={'ncdim%time': 12, 'latitude': -1} -> [12, -1] -# # chunks={'ncdim%time': 12, 'Y': None} -> [12, None] -# # chunks={'ncdim%time': 12, 'ncdim%lat': (30, 90)} -> [12, (30, 90)] -# # chunks={'ncdim%time': 12, 'ncdim%lat': None, 'X': 5} -> [12, None] -# attributes = g["variable_attributes"] -# chunks2 = [] -# for ncdim in g["variable_dimensions"][ncvar]: -# key = f"ncdim%{ncdim}" -# if key in chunks: -# chunks2.append(chunks[key]) -# continue -# -# found_coord_attr = False -# dim_coord_attrs = attributes.get(ncdim) -# if dim_coord_attrs is not None: -# for attr in ("standard_name", "axis"): -# key = dim_coord_attrs.get(attr) -# if key in chunks: -# found_coord_attr = True -# chunks2.append(chunks[key]) -# break -# -# if not found_coord_attr: -# # Use default chunks for this dimension -# chunks2.append(default_chunks) -# -# chunks = chunks2 -# -# return chunks -# -# def _customise_field_ancillaries(self, parent_ncvar, f): -# """Create customised field ancillary constructs. -# -# This method currently creates: -# -# * Field ancillary constructs derived from non-standardised -# terms in CFA aggregation instructions. Each construct spans -# the same domain axes as the parent field construct. -# Constructs are never created for `Domain` instances. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# parent_ncvar: `str` -# The netCDF variable name of the parent variable. -# -# f: `Field` -# The parent field construct. -# -# :Returns: -# -# `dict` -# A mapping of netCDF variable names to newly-created -# construct identifiers. -# -# **Examples** -# -# >>> n._customise_field_ancillaries('tas', f) -# {} -# -# >>> n._customise_field_ancillaries('pr', f) -# {'tracking_id': 'fieldancillary1'} -# -# """ -# if not self._is_cfa_variable(parent_ncvar): -# return {} -# -# # ------------------------------------------------------------ -# # Still here? Then we have a CFA-netCDF variable: Loop round -# # the aggregation instruction terms and convert each -# # non-standard term into a field ancillary construct that -# # spans the same domain axes as the parent field. -# # ------------------------------------------------------------ -# g = self.read_vars -# -# standardised_terms = ("location", "file", "address", "format") -# -# out = {} -# for term, term_ncvar in g["cfa_aggregated_data"][parent_ncvar].items(): -# if term in standardised_terms: -# continue -# -# if g["variables"][term_ncvar].ndim != f.ndim: -# # Can only create field ancillaries with the same rank -# # as the field -# continue -# -# # Still here? Then we've got a non-standard aggregation -# # term from which we can create a field -# # ancillary construct. -# anc = self.implementation.initialise_FieldAncillary() -# -# self.implementation.set_properties( -# anc, g["variable_attributes"][term_ncvar] -# ) -# anc.set_property("long_name", term) -# -# # Store the term name as the 'id' attribute. This will be -# # used as the term name if the field field ancillary is -# # written to disk as a non-standard CFA term. -# anc.id = term -# -# data = self._create_data( -# parent_ncvar, anc, cfa_term={term: term_ncvar} -# ) -# -# self.implementation.set_data(anc, data, copy=False) -# self.implementation.nc_set_variable(anc, term_ncvar) -# -# key = self.implementation.set_field_ancillary( -# f, -# anc, -# axes=self.implementation.get_field_data_axes(f), -# copy=False, -# ) -# out[term_ncvar] = key -# -# return out -# -# def _cfa_parse_aggregated_data(self, ncvar, aggregated_data): -# """Parse a CFA-netCDF ``aggregated_data`` attribute. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# ncvar: `str` -# The netCDF variable name. -# -# aggregated_data: `str` or `None` -# The CFA-netCDF ``aggregated_data`` attribute. -# -# :Returns: -# -# `dict` -# The parsed attribute. -# -# """ -# if not aggregated_data: -# return {} -# -# g = self.read_vars -# aggregation_instructions = g["cfa_aggregation_instructions"] -# variable_attributes = g["variable_attributes"] -# -# # Loop round aggregation instruction terms -# out = {} -# for x in self._parse_x( -# ncvar, -# aggregated_data, -# keys_are_variables=True, -# ): -# term, term_ncvar = tuple(x.items())[0] -# term_ncvar = term_ncvar[0] -# out[term] = term_ncvar -# -# if term_ncvar in aggregation_instructions: -# # Already processed this term -# continue -# -# variable = g["variables"][term_ncvar] -# array = cfdm.netcdf_indexer( -# variable, -# mask=True, -# unpack=True, -# always_masked_array=False, -# orthogonal_indexing=False, -# copy=False, -# ) -# aggregation_instructions[term_ncvar] = array[...] -# -# if term == "file": -# # Find URI substitutions that may be stored in the -# # CFA file instruction variable's "substitutions" -# # attribute -# subs = variable_attributes[term_ncvar].get( -# "substitutions", -# ) -# if subs: -# # Convert the string "${base}: value" to the -# # dictionary {"${base}": "value"} -# s = subs.split() -# subs = { -# base[:-1]: sub for base, sub in zip(s[::2], s[1::2]) -# } -# -# # Apply user-defined substitutions, which take -# # precedence over those defined in the file. -# subs.update(g["cfa_options"].get("substitutions", {})) -# g["cfa_file_substitutions"][term_ncvar] = subs -# -# g["cfa_aggregated_data"][ncvar] = out -# return out -# diff --git a/cf/read_write/netcdf/netcdfwrite.py b/cf/read_write/netcdf/netcdfwrite.py index d488ba1260..dbba9488dd 100644 --- a/cf/read_write/netcdf/netcdfwrite.py +++ b/cf/read_write/netcdf/netcdfwrite.py @@ -1,261 +1,9 @@ import cfdm -#from .netcdfread import NetCDFRead - class NetCDFWrite(cfdm.read_write.netcdf.NetCDFWrite): """A container for writing Fields to a netCDF dataset.""" - #def __new__(cls, *args, **kwargs): - # """Store the NetCDFRead class. - # - # .. note:: If a child class requires a different NetCDFRead class - # than the one defined here, then it must be redefined in the - # child class. - # - # """ - # instance = super().__new__(cls) - # instance._NetCDFRead = NetCDFRead - # return instance - - # def _unlimited(self, field, axis): - # """Whether an axis is unlimited. - # - # If a CFA-netCDF file is being written then no axis can be - # unlimited, i.e. `False` is always returned. - # - # .. versionadded:: 3.15.3 - # - # :Parameters: - # - # field: `Field` or `Domain` - # - # axis: `str` - # Domain axis construct identifier, - # e.g. ``'domainaxis1'``. - # - # :Returns: - # - # `bool` - # - # """ - # if self.write_vars["cfa"]: - # return False - # - # return super()._unlimited(field, axis) - # - # def _write_as_cfa(self, cfvar, construct_type, domain_axes): - # """Whether or not to write as a CFA variable. - # - # .. versionadded:: 3.0.0 - # - # :Parameters: - # - # cfvar: cf instance that contains data - # - # construct_type: `str` - # The construct type of the *cfvar*, or its parent if - # *cfvar* is not a construct. - # - # .. versionadded:: 3.15.0 - # - # domain_axes: `None`, or `tuple` of `str` - # The domain axis construct identifiers for *cfvar*. - # - # .. versionadded:: 3.15.0 - # - # :Returns: - # - # `bool` - # True if the variable is to be written as a CFA - # variable. - # - # """ - # if construct_type is None: - # # This prevents recursion whilst writing CFA-netCDF term - # # variables. - # return False - # - # g = self.write_vars - # if not g["cfa"]: - # return False - # - # data = self.implementation.get_data(cfvar, None) - # if data is None: - # return False - # - # cfa_options = g["cfa_options"] - # for ctype, ndim in cfa_options.get("constructs", {}).items(): - # # Write as CFA if it has an appropriate construct type ... - # if ctype in ("all", construct_type): - # # ... and then only if it satisfies the - # # number-of-dimenions criterion and the data is - # # flagged as OK. - # if ndim is None or ndim == len(domain_axes): - # cfa_get_write = data.cfa_get_write() - # if not cfa_get_write and cfa_options["strict"]: - # if g["mode"] == "w": - # remove(g["filename"]) - # - # raise ValueError( - # f"Can't write {cfvar!r} as a CFA-netCDF " - # "aggregation variable. Possible reasons for this " - # "include 1) there is more than one Dask chunk " - # "per fragment, and 2) data values have been " - # "changed relative to those in the fragments." - # ) - # - # return cfa_get_write - # - # break - # - # return False - # - # def _customise_createVariable( - # self, cfvar, construct_type, domain_axes, kwargs - # ): - # """Customise keyword arguments for - # `netCDF4.Dataset.createVariable`. - # - # .. versionadded:: 3.0.0 - # - # :Parameters: - # - # cfvar: cf instance that contains data - # - # construct_type: `str` - # The construct type of the *cfvar*, or its parent if - # *cfvar* is not a construct. - # - # .. versionadded:: 3.15.0 - # - # domain_axes: `None`, or `tuple` of `str` - # The domain axis construct identifiers for *cfvar*. - # - # .. versionadded:: 3.15.0 - # - # kwargs: `dict` - # - # :Returns: - # - # `dict` - # Dictionary of keyword arguments to be passed to - # `netCDF4.Dataset.createVariable`. - # - # """ - # kwargs = super()._customise_createVariable( - # cfvar, construct_type, domain_axes, kwargs - # ) - # - # if self._write_as_cfa(cfvar, construct_type, domain_axes): - # kwargs["dimensions"] = () - # kwargs["chunksizes"] = None - # - # return kwargs - # - # def _write_data( - # self, - # data, - # cfvar, - # ncvar, - # ncdimensions, - # domain_axes=None, - # unset_values=(), - # compressed=False, - # attributes={}, - # construct_type=None, - # ): - # """Write a Data object. - # - # .. versionadded:: 3.0.0 - # - # :Parameters: - # - # data: `Data` - # - # cfvar: cf instance - # - # ncvar: `str` - # - # ncdimensions: `tuple` of `str` - # - # domain_axes: `None`, or `tuple` of `str` - # The domain axis construct identifiers for *cfvar*. - # - # .. versionadded:: 3.15.0 - # - # unset_values: sequence of numbers - # - # attributes: `dict`, optional - # The netCDF attributes for the constructs that have been - # written to the file. - # - # construct_type: `str`, optional - # The construct type of the *cfvar*, or its parent if - # *cfvar* is not a construct. - # - # .. versionadded:: 3.15.0 - # - # :Returns: - # - # `None` - # - # """ - # g = self.write_vars - # - # if self._write_as_cfa(cfvar, construct_type, domain_axes): - # # -------------------------------------------------------- - # # Write the data as CFA aggregated data - # # -------------------------------------------------------- - # self._create_cfa_data( - # ncvar, - # ncdimensions, - # data, - # cfvar, - # ) - # return - # - # # ------------------------------------------------------------ - # # Still here? The write a normal (non-CFA) variable - # # ------------------------------------------------------------ - # if compressed: - # # Write data in its compressed form - # data = data.source().source() - # - # # Get the dask array - # dx = da.asanyarray(data) - # - # # Convert the data type - # new_dtype = g["datatype"].get(dx.dtype) - # if new_dtype is not None: - # dx = dx.astype(new_dtype) - # - # # VLEN variables can not be assigned to by masked arrays - # # (https://github.com/Unidata/netcdf4-python/pull/465), so - # # fill missing data in string (as opposed to char) data types. - # if g["fmt"] == "NETCDF4" and dx.dtype.kind in "SU": - # dx = dx.map_blocks( - # self._filled_string_array, - # fill_value="", - # meta=np.array((), dx.dtype), - # ) - # - # # Check for out-of-range values - # if g["warn_valid"]: - # if construct_type: - # var = cfvar - # else: - # var = None - # - # dx = dx.map_blocks( - # self._check_valid, - # cfvar=var, - # attributes=attributes, - # meta=np.array((), dx.dtype), - # ) - # - # da.store(dx, g["nc"][ncvar], compute=True, return_stored=False) - def _write_dimension_coordinate( self, f, key, coord, ncdim=None, coordinates=None ): @@ -404,636 +152,3 @@ def _change_reference_datetime(self, coord): ) else: return coord2 - - -# def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar): -# """Write a CFA variable to the netCDF file. -# -# Any CFA private variables required will be autmatically created -# and written to the file. -# -# .. versionadded:: 3.0.0 -# -# :Parameters: -# -# ncvar: `str` -# The netCDF name for the variable. -# -# ncdimensions: sequence of `str` -# -# netcdf_attrs: `dict` -# -# data: `Data` -# -# :Returns: -# -# `None` -# -# """ -# g = self.write_vars -# -# ndim = data.ndim -# -# cfa = self._cfa_aggregation_instructions(data, cfvar) -# -# # ------------------------------------------------------------ -# # Get the location netCDF dimensions. These always start with -# # "f_{size}_loc". -# # ------------------------------------------------------------ -# location_ncdimensions = [] -# for size in cfa["location"].shape: -# l_ncdim = f"f_{size}_loc" -# if l_ncdim not in g["dimensions"]: -# # Create a new location dimension -# self._write_dimension(l_ncdim, None, size=size) -# -# location_ncdimensions.append(l_ncdim) -# -# location_ncdimensions = tuple(location_ncdimensions) -# -# # ------------------------------------------------------------ -# # Get the fragment netCDF dimensions. These always start with -# # "f_". -# # ------------------------------------------------------------ -# aggregation_address = cfa["address"] -# fragment_ncdimensions = [] -# for ncdim, size in zip( -# ncdimensions + ("extra",) * (aggregation_address.ndim - ndim), -# aggregation_address.shape, -# ): -# f_ncdim = f"f_{ncdim}" -# if f_ncdim not in g["dimensions"]: -# # Create a new fragment dimension -# self._write_dimension(f_ncdim, None, size=size) -# -# fragment_ncdimensions.append(f_ncdim) -# -# fragment_ncdimensions = tuple(fragment_ncdimensions) -# -# # ------------------------------------------------------------ -# # Write the standardised aggregation instruction variables to -# # the CFA-netCDF file -# # ------------------------------------------------------------ -# substitutions = data.cfa_file_substitutions() -# substitutions.update(g["cfa_options"].get("substitutions", {})) -# -# aggregated_data = data.cfa_get_aggregated_data() -# aggregated_data_attr = [] -# -# # Location -# term = "location" -# data = cfa[term] -# self.implementation.nc_set_hdf5_chunksizes(data, data.shape) -# term_ncvar = self._cfa_write_term_variable( -# data, -# aggregated_data.get(term, f"cfa_{term}"), -# location_ncdimensions, -# ) -# aggregated_data_attr.append(f"{term}: {term_ncvar}") -# -# # File -# term = "file" -# if substitutions: -# # Create the "substitutions" netCDF attribute -# subs = [] -# for base, sub in substitutions.items(): -# subs.append(f"{base}: {sub}") -# -# attributes = {"substitutions": " ".join(sorted(subs))} -# else: -# attributes = None -# -# data = cfa[term] -# self.implementation.nc_set_hdf5_chunksizes(data, data.shape) -# term_ncvar = self._cfa_write_term_variable( -# data, -# aggregated_data.get(term, f"cfa_{term}"), -# fragment_ncdimensions, -# attributes=attributes, -# ) -# aggregated_data_attr.append(f"{term}: {term_ncvar}") -# -# # Address -# term = "address" -# -# # Attempt to reduce addresses to a common scalar value -# u = cfa[term].unique().compressed().persist() -# if u.size == 1: -# cfa[term] = u.squeeze() -# dimensions = () -# else: -# dimensions = fragment_ncdimensions -# -# data = cfa[term] -# self.implementation.nc_set_hdf5_chunksizes(data, data.shape) -# term_ncvar = self._cfa_write_term_variable( -# data, -# aggregated_data.get(term, f"cfa_{term}"), -# dimensions, -# ) -# aggregated_data_attr.append(f"{term}: {term_ncvar}") -# -# # Format -# term = "format" -# -# # Attempt to reduce addresses to a common scalar value -# u = cfa[term].unique().compressed().persist() -# if u.size == 1: -# cfa[term] = u.squeeze() -# dimensions = () -# else: -# dimensions = fragment_ncdimensions -# -# data = cfa[term] -# self.implementation.nc_set_hdf5_chunksizes(data, data.shape) -# term_ncvar = self._cfa_write_term_variable( -# data, -# aggregated_data.get(term, f"cfa_{term}"), -# dimensions, -# ) -# aggregated_data_attr.append(f"{term}: {term_ncvar}") -# -# # ------------------------------------------------------------ -# # Look for non-standard CFA terms stored as field ancillaries -# # on a field and write them to the CFA-netCDF file -# # ------------------------------------------------------------ -# if self.implementation.is_field(cfvar): -# non_standard_terms = self._cfa_write_non_standard_terms( -# cfvar, fragment_ncdimensions[:ndim], aggregated_data -# ) -# aggregated_data_attr.extend(non_standard_terms) -# -# # ------------------------------------------------------------ -# # Add the CFA aggregation variable attributes -# # ------------------------------------------------------------ -# self._write_attributes( -# None, -# ncvar, -# extra={ -# "aggregated_dimensions": " ".join(ncdimensions), -# "aggregated_data": " ".join(sorted(aggregated_data_attr)), -# }, -# ) -# -# def _check_valid(self, array, cfvar=None, attributes=None): -# """Checks for array values outside of the valid range. -# -# Specifically, checks array for out-of-range values, as -# defined by the valid_[min|max|range] attributes. -# -# .. versionadded:: 3.14.0 -# -# :Parameters: -# -# array: `numpy.ndarray` -# The array to be checked. -# -# cfvar: construct -# The CF construct containing the array. -# -# attributes: `dict` -# The variable's CF properties. -# -# :Returns: -# -# `numpy.ndarray` -# The input array, unchanged. -# -# """ -# super()._check_valid(cfvar, array, attributes) -# return array -# -# def _filled_string_array(self, array, fill_value=""): -# """Fill a string array. -# -# .. versionadded:: 3.14.0 -# -# :Parameters: -# -# array: `numpy.ndarray` -# The `numpy` array with string (byte or unicode) data -# type. -# -# :Returns: -# -# `numpy.ndarray` -# The string array array with any missing data replaced -# by the fill value. -# -# """ -# if np.ma.isMA(array): -# return array.filled(fill_value) -# -# return array -# -# def _write_field_ancillary(self, f, key, anc): -# """Write a field ancillary to the netCDF file. -# -# If an equal field ancillary has already been written to the file -# then it is not re-written. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# f: `Field` -# -# key: `str` -# -# anc: `FieldAncillary` -# -# :Returns: -# -# `str` -# The netCDF variable name of the field ancillary -# object. If no ancillary variable was written then an -# empty string is returned. -# -# """ -# if anc.data.cfa_get_term(): -# # This field ancillary construct is to be written as a -# # non-standard CFA term belonging to the parent field, or -# # else not at all. -# return "" -# -# return super()._write_field_ancillary(f, key, anc) -# -# def _cfa_write_term_variable( -# self, data, ncvar, ncdimensions, attributes=None -# ): -# """Write a CFA aggregation instruction term variable -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# data `Data` -# The data to write. -# -# ncvar: `str` -# The netCDF variable name. -# -# ncdimensions: `tuple` of `str` -# The variable's netCDF dimensions. -# -# attributes: `dict`, optional -# Any attributes to attach to the variable. -# -# :Returns: -# -# `str` -# The netCDF variable name of the CFA term variable. -# -# """ -# create = not self._already_in_file(data, ncdimensions) -# -# if create: -# # Create a new CFA term variable in the file -# ncvar = self._netcdf_name(ncvar) -# self._write_netcdf_variable( -# ncvar, ncdimensions, data, None, extra=attributes -# ) -# else: -# # This CFA term variable has already been written to the -# # file -# ncvar = self.write_vars["seen"][id(data)]["ncvar"] -# -# return ncvar -# -# def _cfa_write_non_standard_terms( -# self, field, fragment_ncdimensions, aggregated_data -# ): -# """Write a non-standard CFA aggregation instruction term variable. -# -# Writes non-standard CFA terms stored as field ancillaries. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# field: `Field` -# -# fragment_ncdimensions: `list` of `str` -# -# aggregated_data: `dict` -# -# """ -# aggregated_data_attr = [] -# terms = ["location", "file", "address", "format"] -# for key, field_anc in self.implementation.get_field_ancillaries( -# field -# ).items(): -# if not field_anc.data.cfa_get_term(): -# continue -# -# data = self.implementation.get_data(field_anc, None) -# if data is None: -# continue -# -# # Check that the field ancillary has the same axes as its -# # parent field, and in the same order. -# if field.get_data_axes(key) != field.get_data_axes(): -# continue -# -# # Still here? Then this field ancillary can be represented -# # by a non-standard aggregation term. -# -# # Then transform the data so that it spans the fragment -# # dimensions, with one value per fragment. If a chunk has -# # more than one unique value then the fragment's value is -# # missing data. -# # -# # '_cfa_unique' has its own call to 'cfdm_asanyarray', so -# # we can set '_asanyarray=False'. -# dx = data.to_dask_array(_asanyarray=False) -# dx_ind = tuple(range(dx.ndim)) -# out_ind = dx_ind -# dx = da.blockwise( -# self._cfa_unique, -# out_ind, -# dx, -# dx_ind, -# adjust_chunks={i: 1 for i in out_ind}, -# dtype=dx.dtype, -# ) -# -# # Get the non-standard term name from the field -# # ancillary's 'id' attribute -# term = getattr(field_anc, "id", "term") -# term = term.replace(" ", "_") -# name = term -# n = 0 -# while term in terms: -# n += 1 -# term = f"{name}_{n}" -# -# terms.append(term) -# -# # Create the new CFA term variable -# data = type(data)(dx) -# self.implementation.nc_set_hdf5_chunksizes(data, data.shape) -# term_ncvar = self._cfa_write_term_variable( -# data=data, -# ncvar=aggregated_data.get(term, f"cfa_{term}"), -# ncdimensions=fragment_ncdimensions, -# ) -# -# aggregated_data_attr.append(f"{term}: {term_ncvar}") -# -# return aggregated_data_attr -# -# @classmethod -# def _cfa_unique(cls, a): -# """Return the unique value of an array. -# -# If there are multiple unique vales then missing data is -# returned. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# a: `numpy.ndarray` -# The array. -# -# :Returns: -# -# `numpy.ndarray` -# A size 1 array containing the unique value, or missing -# data if there is not a unique value. -# -# """ -# a = cfdm_asanyarray(a) -# -# out_shape = (1,) * a.ndim -# a = np.unique(a) -# if np.ma.isMA(a): -# # Remove a masked element -# a = a.compressed() -# -# if a.size == 1: -# return a.reshape(out_shape) -# -# return np.ma.masked_all(out_shape, dtype=a.dtype) -# -# def _cfa_aggregation_instructions(self, data, cfvar): -# """Convert data to standardised CFA aggregation instruction terms. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# data: `Data` -# The data to be converted to standardised CFA -# aggregation instruction terms. -# -# cfvar: construct -# The construct that contains the *data*. -# -# :Returns: -# -# `dict` -# A dictionary whose keys are the standardised CFA -# aggregation instruction terms, with values of `Data` -# instances containing the corresponding variables. -# -# **Examples** -# -# >>> n._cfa_aggregation_instructions(data, cfvar) -# {'location': , -# 'file': , -# 'format': , -# 'address': } -# -# """ -# from os.path import abspath, join, relpath -# from pathlib import PurePath -# from urllib.parse import urlparse -# -# g = self.write_vars -# -# # Define the CFA file susbstitutions, giving precedence over -# # those set on the Data object to those provided by the CFA -# # options. -# substitutions = data.cfa_file_substitutions() -# substitutions.update(g["cfa_options"].get("substitutions", {})) -# -# absolute_paths = g["cfa_options"].get("absolute_paths") -# cfa_dir = g["cfa_dir"] -# -# # Size of the trailing dimension -# n_trailing = 0 -# -# aggregation_file = [] -# aggregation_address = [] -# aggregation_format = [] -# for indices in data.chunk_indices(): -# file_details = self._cfa_get_file_details(data[indices]) -# -# if len(file_details) != 1: -# if file_details: -# raise ValueError( -# f"Can't write {cfvar!r} as a CFA-netCDF " -# "aggregation variable: Dask chunk defined by index " -# f"{indices} spans two or more fragments. " -# "A possible fix for this is to set chunks=None as " -# "an argument of a prior call to cf.read" -# ) -# -# raise ValueError( -# f"Can't write {cfvar!r} as a CFA-netCDF " -# "aggregation variable: Dask chunk defined by index " -# f"{indices} spans zero fragments." -# ) -# -# filenames, addresses, formats = file_details.pop() -# -# if len(filenames) > n_trailing: -# n_trailing = len(filenames) -# -# filenames2 = [] -# for filename in filenames: -# uri = urlparse(filename) -# uri_scheme = uri.scheme -# if not uri_scheme: -# filename = abspath(join(cfa_dir, filename)) -# if absolute_paths: -# filename = PurePath(filename).as_uri() -# else: -# filename = relpath(filename, start=cfa_dir) -# elif not absolute_paths and uri_scheme == "file": -# filename = relpath(uri.path, start=cfa_dir) -# -# if substitutions: -# # Apply the CFA file susbstitutions -# for base, sub in substitutions.items(): -# filename = filename.replace(sub, base) -# -# filenames2.append(filename) -# -# aggregation_file.append(tuple(filenames2)) -# aggregation_address.append(addresses) -# aggregation_format.append(formats) -# -# # Pad each value of the aggregation instruction arrays so that -# # it has 'n_trailing' elements -# a_shape = data.numblocks -# pad = None -# if n_trailing > 1: -# a_shape += (n_trailing,) -# -# # Pad the ... -# for i, (filenames, addresses, formats) in enumerate( -# zip(aggregation_file, aggregation_address, aggregation_format) -# ): -# n = n_trailing - len(filenames) -# if n: -# # This chunk has fewer fragment files than some -# # others, so some padding is required. -# pad = ("",) * n -# aggregation_file[i] = filenames + pad -# aggregation_format[i] = formats + pad -# if isinstance(addresses[0], int): -# pad = (-1,) * n -# -# aggregation_address[i] = addresses + pad -# -# # Reshape the 1-d aggregation instruction arrays to span the -# # data dimensions, plus the extra trailing dimension if there -# # is one. -# aggregation_file = np.array(aggregation_file).reshape(a_shape) -# aggregation_address = np.array(aggregation_address).reshape(a_shape) -# aggregation_format = np.array(aggregation_format).reshape(a_shape) -# -# # Mask any padded elements -# if pad: -# aggregation_file = np.ma.where( -# aggregation_file == "", np.ma.masked, aggregation_file -# ) -# mask = aggregation_file.mask -# aggregation_address = np.ma.array(aggregation_address, mask=mask) -# aggregation_format = np.ma.array(aggregation_format, mask=mask) -# -# # ------------------------------------------------------------ -# # Create the location array -# # ------------------------------------------------------------ -# dtype = np.dtype(np.int32) -# if ( -# max(data.to_dask_array(_asanyarray=False).chunksize) -# > np.iinfo(dtype).max -# ): -# dtype = np.dtype(np.int64) -# -# ndim = data.ndim -# aggregation_location = np.ma.masked_all( -# (ndim, max(a_shape[:ndim])), dtype=dtype -# ) -# -# for i, chunks in enumerate(data.chunks): -# aggregation_location[i, : len(chunks)] = chunks -# -# # ------------------------------------------------------------ -# # Return Data objects -# # ------------------------------------------------------------ -# data = type(data) -# return { -# "location": data(aggregation_location), -# "file": data(aggregation_file), -# "format": data(aggregation_format), -# "address": data(aggregation_address), -# } -# -# def _customise_write_vars(self): -# """Customise the write parameters. -# -# .. versionadded:: 3.15.0 -# -# """ -# g = self.write_vars -# -# if g.get("cfa"): -# from os.path import abspath -# from pathlib import PurePath -# -# # Find the absolute directory path of the output -# # CFA-netCDF file URI -# g["cfa_dir"] = PurePath(abspath(g["filename"])).parent -# -# def _cfa_get_file_details(self, data): -# """Get the details of all files referenced by the data. -# -# .. versionadded:: 3.15.0 -# -# :Parameters: -# -# data: `Data` -# The data -# -# :Returns: -# -# `set` of 3-tuples -# A set containing 3-tuples giving the file names, -# the addresses in the files, and the file formats. If -# no files are required to compute the data then -# an empty `set` is returned. -# -# **Examples** -# -# >>> n._cfa_get_file_details(data): -# {(('/home/file.nc',), ('tas',), ('nc',))} -# -# >>> n._cfa_get_file_details(data): -# {(('/home/file.pp',), (34556,), ('um',))} -# -# """ -# out = [] -# out_append = out.append -# for a in data.todict().values(): -# try: -# out_append( -# (a.get_filenames(), a.get_addresses(), a.get_formats()) -# ) -# except AttributeError: -# pass -# -# return set(out) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 846c20ba01..020d4c254a 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -18,7 +18,6 @@ from ..functions import _DEPRECATION_ERROR_FUNCTION_KWARGS, flat from ..query import Query -# from .netcdf import NetCDFRead from .um import UMRead _cached_temporary_files = {} @@ -64,7 +63,7 @@ def read( dask_chunks="storage-aligned", store_hdf5_chunks=True, domain=False, - cfa=None, + cfa=None, cfa_write=None, netcdf_backend=None, storage_options=None, @@ -1017,29 +1016,6 @@ def read( info = is_log_level_info(logger) - # # Parse the 'cfa' parameter - # if cfa is None: - # cfa_options = {} - # else: - # cfa_options = cfa.copy() - # keys = ("substitutions",) - # if not set(cfa_options).issubset(keys): - # raise ValueError( - # "Invalid dictionary key to the 'cfa' parameter." - # f"Valid keys are {keys}. Got: {cfa_options}" - # ) - # - # if "substitutions" in cfa_options: - # substitutions = cfa_options["substitutions"].copy() - # for base, sub in tuple(substitutions.items()): - # if not (base.startswith("${") and base.endswith("}")): - # # Add missing ${...} - # substitutions[f"${{{base}}}"] = substitutions.pop(base) - # else: - # substitutions = {} - # - # cfa_options["substitutions"] = substitutions - # Initialise the output list of fields/domains if domain: out = DomainList() @@ -1173,7 +1149,7 @@ def read( warn_valid=warn_valid, select=select, domain=domain, - cfa=cfa, + cfa=cfa, cfa_write=cfa_write, netcdf_backend=netcdf_backend, storage_options=storage_options, @@ -1293,7 +1269,7 @@ def _read_a_file( store_hdf5_chunks=True, select=None, domain=False, - cfa=None, + cfa=None, cfa_write=None, netcdf_backend=None, storage_options=None, @@ -1383,7 +1359,6 @@ def _read_a_file( extra_read_vars = { "fmt": selected_fmt, "ignore_read_error": ignore_read_error, - # "cfa_options": cfa_options, } # ---------------------------------------------------------------- @@ -1429,7 +1404,7 @@ def _read_a_file( dask_chunks=dask_chunks, store_hdf5_chunks=store_hdf5_chunks, cache=cache, - cfa=cfa, + cfa=cfa, cfa_write=cfa_write, ) except MaskError: diff --git a/cf/read_write/write.py b/cf/read_write/write.py index 761869d408..aadaff6273 100644 --- a/cf/read_write/write.py +++ b/cf/read_write/write.py @@ -752,58 +752,6 @@ def write( # Extra write variables extra_write_vars = {"reference_datetime": reference_datetime} - # # ------------------------------------------------------------ - # # CFA - # # ------------------------------------------------------------ - # if isinstance(cfa, dict): - # cfa_options = cfa.copy() - # cfa = True - # else: - # cfa_options = {} - # cfa = bool(cfa) - # - # if cfa: - # # Add CFA to the Conventions - # cfa_conventions = f"CFA-{CFA()}" - # if not Conventions: - # Conventions = cfa_conventions - # elif isinstance(Conventions, str): - # Conventions = (Conventions, cfa_conventions) - # else: - # Conventions = tuple(Conventions) + (cfa_conventions,) - # - # keys = ("constructs", "absolute_paths", "strict", "substitutions") - # if not set(cfa_options).issubset(keys): - # raise ValueError( - # "Invalid dictionary key to the 'cfa_options' " - # f"parameter. Valid keys are {keys}. Got: {cfa_options}" - # ) - # - # cfa_options.setdefault("constructs", "field") - # cfa_options.setdefault("absolute_paths", True) - # cfa_options.setdefault("strict", True) - # cfa_options.setdefault("substitutions", {}) - # - # constructs = cfa_options["constructs"] - # if isinstance(constructs, dict): - # cfa_options["constructs"] = constructs.copy() - # else: - # if isinstance(constructs, str): - # constructs = (constructs,) - # - # cfa_options["constructs"] = {c: None for c in constructs} - # - # substitutions = cfa_options["substitutions"].copy() - # for base, sub in tuple(substitutions.items()): - # if not (base.startswith("${") and base.endswith("}")): - # # Add missing ${...} - # substitutions[f"${{{base}}}"] = substitutions.pop(base) - # - # cfa_options["substitutions"] = substitutions - # - # extra_write_vars["cfa"] = cfa - # extra_write_vars["cfa_options"] = cfa_options - netcdf.write( fields, filename, diff --git a/cf/test/test_CFA.py b/cf/test/test_CFA.py index beab4e1509..bf3beccb61 100644 --- a/cf/test/test_CFA.py +++ b/cf/test/test_CFA.py @@ -632,462 +632,462 @@ # print() # unittest.main(verbosity=2) # -## n_tmpfiles = 5 -## tmpfiles = [ -## tempfile.mkstemp("_test_CFA.nc", dir=os.getcwd())[1] -## for i in range(n_tmpfiles) -## ] -## ( -## tmpfile1, -## tmpfile2, -## tmpfile3, -## tmpfile4, -## tmpfile5, -## ) = tmpfiles -## -## -## def _remove_tmpfiles(): -## """Try to remove defined temporary files by deleting their paths.""" -## for f in tmpfiles: -## try: -## os.remove(f) -## except OSError: -## pass -## -## -## atexit.register(_remove_tmpfiles) -## -## -## class CFATest(unittest.TestCase): -## netcdf3_fmts = [ -## "NETCDF3_CLASSIC", -## "NETCDF3_64BIT", -## "NETCDF3_64BIT_OFFSET", -## "NETCDF3_64BIT_DATA", -## ] -## netcdf4_fmts = ["NETCDF4", "NETCDF4_CLASSIC"] -## netcdf_fmts = netcdf3_fmts + netcdf4_fmts -## -## def test_CFA_fmt(self): -## """Test the cf.read 'fmt' and 'cfa' keywords.""" -## f = cf.example_field(0) -## cf.write(f, tmpfile1) -## f = cf.read(tmpfile1)[0] -## -## for fmt in self.netcdf_fmts: -## cf.write(f, tmpfile2, fmt=fmt, cfa=True) -## g = cf.read(tmpfile2) -## self.assertEqual(len(g), 1) -## self.assertTrue(f.equals(g[0])) -## -## def test_CFA_multiple_fragments(self): -## """Test CFA with more than one fragment.""" -## f = cf.example_field(0) -## -## cf.write(f[:2], tmpfile1) -## cf.write(f[2:], tmpfile2) -## -## a = cf.read([tmpfile1, tmpfile2]) -## self.assertEqual(len(a), 1) -## a = a[0] -## -## nc_file = tmpfile3 -## cfa_file = tmpfile4 -## cf.write(a, nc_file) -## cf.write(a, cfa_file, cfa=True) -## -## n = cf.read(nc_file) -## c = cf.read(cfa_file) -## self.assertEqual(len(n), 1) -## self.assertEqual(len(c), 1) -## self.assertTrue(c[0].equals(f)) -## self.assertTrue(n[0].equals(c[0])) -## -## def test_CFA_strict(self): -## """Test CFA 'strict' option to the cfa.write 'cfa' keyword.""" -## f = cf.example_field(0) -## -## # By default, can't write as CF-netCDF those variables -## # selected for CFA treatment, but which aren't suitable. -## with self.assertRaises(ValueError): -## cf.write(f, tmpfile1, cfa=True) -## -## # The previous line should have deleted the output file -## self.assertFalse(os.path.exists(tmpfile1)) -## -## cf.write(f, tmpfile1, cfa={"strict": False}) -## g = cf.read(tmpfile1) -## self.assertEqual(len(g), 1) -## self.assertTrue(g[0].equals(f)) -## -## cf.write(g, tmpfile2, cfa={"strict": True}) -## g = cf.read(tmpfile2) -## self.assertEqual(len(g), 1) -## self.assertTrue(g[0].equals(f)) -## -## def test_CFA_field_ancillaries(self): -## """Test creation of field ancillaries from non-standard CFA terms.""" -## f = cf.example_field(0) -## self.assertFalse(f.field_ancillaries()) -## -## a = f[:2] -## b = f[2:] -## a.set_property("foo", "bar_a") -## b.set_property("foo", "bar_b") -## cf.write(a, tmpfile1) -## cf.write(b, tmpfile2) -## -## c = cf.read( -## [tmpfile1, tmpfile2], aggregate={"field_ancillaries": "foo"} -## ) -## self.assertEqual(len(c), 1) -## c = c[0] -## self.assertEqual(len(c.field_ancillaries()), 1) -## anc = c.field_ancillary() -## self.assertTrue(anc.data.cfa_get_term()) -## self.assertFalse(anc.data.cfa_get_write()) -## -## cf.write(c, tmpfile3, cfa=False) -## c2 = cf.read(tmpfile3) -## self.assertEqual(len(c2), 1) -## self.assertFalse(c2[0].field_ancillaries()) -## -## cf.write(c, tmpfile4, cfa=True) -## d = cf.read(tmpfile4) -## self.assertEqual(len(d), 1) -## d = d[0] -## -## self.assertEqual(len(d.field_ancillaries()), 1) -## anc = d.field_ancillary() -## self.assertTrue(anc.data.cfa_get_term()) -## self.assertFalse(anc.data.cfa_get_write()) -## self.assertTrue(d.equals(c)) -## -## cf.write(d, tmpfile5, cfa=False) -## e = cf.read(tmpfile5) -## self.assertEqual(len(e), 1) -## self.assertFalse(e[0].field_ancillaries()) -## -## cf.write(d, tmpfile5, cfa=True) -## e = cf.read(tmpfile5) -## self.assertEqual(len(e), 1) -## self.assertTrue(e[0].equals(d)) -## -## def test_CFA_substitutions_0(self): -## """Test CFA substitution URI substitutions (0).""" -## f = cf.example_field(0) -## cf.write(f, tmpfile1) -## f = cf.read(tmpfile1)[0] -## -## cwd = os.getcwd() -## -## f.data.cfa_update_file_substitutions({"base": cwd}) -## -## cf.write( -## f, -## tmpfile2, -## cfa={"absolute_paths": True}, -## ) -## -## nc = netCDF4.Dataset(tmpfile2, "r") -## cfa_file = nc.variables["cfa_file"] -## self.assertEqual( -## cfa_file.getncattr("substitutions"), -## f"${{base}}: {cwd}", -## ) -## self.assertEqual( -## cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" -## ) -## nc.close() -## -## g = cf.read(tmpfile2) -## self.assertEqual(len(g), 1) -## self.assertTrue(f.equals(g[0])) -## -## def test_CFA_substitutions_1(self): -## """Test CFA substitution URI substitutions (1).""" -## f = cf.example_field(0) -## cf.write(f, tmpfile1) -## f = cf.read(tmpfile1)[0] -## -## cwd = os.getcwd() -## for base in ("base", "${base}"): -## cf.write( -## f, -## tmpfile2, -## cfa={"absolute_paths": True, "substitutions": {base: cwd}}, -## ) -## -## nc = netCDF4.Dataset(tmpfile2, "r") -## cfa_file = nc.variables["cfa_file"] -## self.assertEqual( -## cfa_file.getncattr("substitutions"), -## f"${{base}}: {cwd}", -## ) -## self.assertEqual( -## cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" -## ) -## nc.close() -## -## g = cf.read(tmpfile2) -## self.assertEqual(len(g), 1) -## self.assertTrue(f.equals(g[0])) -## -## def test_CFA_substitutions_2(self): -## """Test CFA substitution URI substitutions (2).""" -## f = cf.example_field(0) -## cf.write(f, tmpfile1) -## f = cf.read(tmpfile1)[0] -## -## cwd = os.getcwd() -## -## f.data.cfa_clear_file_substitutions() -## f.data.cfa_update_file_substitutions({"base": cwd}) -## -## cf.write( -## f, -## tmpfile2, -## cfa={ -## "absolute_paths": True, -## "substitutions": {"base2": "/bad/location"}, -## }, -## ) -## -## nc = netCDF4.Dataset(tmpfile2, "r") -## cfa_file = nc.variables["cfa_file"] -## self.assertEqual( -## cfa_file.getncattr("substitutions"), -## f"${{base2}}: /bad/location ${{base}}: {cwd}", -## ) -## self.assertEqual( -## cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" -## ) -## nc.close() -## -## g = cf.read(tmpfile2) -## self.assertEqual(len(g), 1) -## self.assertTrue(f.equals(g[0])) -## -## f.data.cfa_clear_file_substitutions() -## f.data.cfa_update_file_substitutions({"base": "/bad/location"}) -## -## cf.write( -## f, -## tmpfile2, -## cfa={"absolute_paths": True, "substitutions": {"base": cwd}}, -## ) -## -## nc = netCDF4.Dataset(tmpfile2, "r") -## cfa_file = nc.variables["cfa_file"] -## self.assertEqual( -## cfa_file.getncattr("substitutions"), -## f"${{base}}: {cwd}", -## ) -## self.assertEqual( -## cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" -## ) -## nc.close() -## -## g = cf.read(tmpfile2) -## self.assertEqual(len(g), 1) -## self.assertTrue(f.equals(g[0])) -## -## f.data.cfa_clear_file_substitutions() -## f.data.cfa_update_file_substitutions({"base2": "/bad/location"}) -## -## cf.write( -## f, -## tmpfile2, -## cfa={"absolute_paths": True, "substitutions": {"base": cwd}}, -## ) -## -## nc = netCDF4.Dataset(tmpfile2, "r") -## cfa_file = nc.variables["cfa_file"] -## self.assertEqual( -## cfa_file.getncattr("substitutions"), -## f"${{base2}}: /bad/location ${{base}}: {cwd}", -## ) -## self.assertEqual( -## cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" -## ) -## nc.close() -## -## g = cf.read(tmpfile2) -## self.assertEqual(len(g), 1) -## self.assertTrue(f.equals(g[0])) -## -## def test_CFA_absolute_paths(self): -## """Test CFA 'absolute_paths' option to the cfa.write 'cfa' keyword.""" -## f = cf.example_field(0) -## cf.write(f, tmpfile1) -## f = cf.read(tmpfile1)[0] -## -## for absolute_paths, filename in zip( -## (True, False), -## ( -## PurePath(os.path.abspath(tmpfile1)).as_uri(), -## os.path.basename(tmpfile1), -## ), -## ): -## cf.write(f, tmpfile2, cfa={"absolute_paths": absolute_paths}) -## -## nc = netCDF4.Dataset(tmpfile2, "r") -## cfa_file = nc.variables["cfa_file"] -## self.assertEqual(cfa_file[...], filename) -## nc.close() -## -## g = cf.read(tmpfile2) -## self.assertEqual(len(g), 1) -## self.assertTrue(f.equals(g[0])) -## -## def test_CFA_constructs(self): -## """Test choice of constructs to write as CFA-netCDF variables.""" -## f = cf.example_field(1) -## f.del_construct("T") -## f.del_construct("long_name=Grid latitude name") -## cf.write(f, tmpfile1) -## f = cf.read(tmpfile1)[0] -## -## # No constructs -## cf.write(f, tmpfile2, cfa={"constructs": []}) -## nc = netCDF4.Dataset(tmpfile2, "r") -## for var in nc.variables.values(): -## attrs = var.ncattrs() -## self.assertNotIn("aggregated_dimensions", attrs) -## self.assertNotIn("aggregated_data", attrs) -## -## nc.close() -## -## # Field construct -## cf.write(f, tmpfile2, cfa={"constructs": "field"}) -## nc = netCDF4.Dataset(tmpfile2, "r") -## for ncvar, var in nc.variables.items(): -## attrs = var.ncattrs() -## if ncvar in ("ta",): -## self.assertFalse(var.ndim) -## self.assertIn("aggregated_dimensions", attrs) -## self.assertIn("aggregated_data", attrs) -## else: -## self.assertNotIn("aggregated_dimensions", attrs) -## self.assertNotIn("aggregated_data", attrs) -## -## nc.close() -## -## # Dimension construct -## for constructs in ( -## "dimension_coordinate", -## ["dimension_coordinate"], -## {"dimension_coordinate": None}, -## {"dimension_coordinate": 1}, -## {"dimension_coordinate": cf.eq(1)}, -## ): -## cf.write(f, tmpfile2, cfa={"constructs": constructs}) -## nc = netCDF4.Dataset(tmpfile2, "r") -## for ncvar, var in nc.variables.items(): -## attrs = var.ncattrs() -## if ncvar in ( -## "x", -## "x_bnds", -## "y", -## "y_bnds", -## "atmosphere_hybrid_height_coordinate", -## "atmosphere_hybrid_height_coordinate_bounds", -## ): -## self.assertFalse(var.ndim) -## self.assertIn("aggregated_dimensions", attrs) -## self.assertIn("aggregated_data", attrs) -## else: -## self.assertNotIn("aggregated_dimensions", attrs) -## self.assertNotIn("aggregated_data", attrs) -## -## nc.close() -## -## # Dimension and auxiliary constructs -## for constructs in ( -## ["dimension_coordinate", "auxiliary_coordinate"], -## {"dimension_coordinate": None, "auxiliary_coordinate": cf.ge(2)}, -## ): -## cf.write(f, tmpfile2, cfa={"constructs": constructs}) -## nc = netCDF4.Dataset(tmpfile2, "r") -## for ncvar, var in nc.variables.items(): -## attrs = var.ncattrs() -## if ncvar in ( -## "x", -## "x_bnds", -## "y", -## "y_bnds", -## "atmosphere_hybrid_height_coordinate", -## "atmosphere_hybrid_height_coordinate_bounds", -## "latitude_1", -## "longitude_1", -## ): -## self.assertFalse(var.ndim) -## self.assertIn("aggregated_dimensions", attrs) -## self.assertIn("aggregated_data", attrs) -## else: -## self.assertNotIn("aggregated_dimensions", attrs) -## self.assertNotIn("aggregated_data", attrs) -## -## nc.close() -## -## def test_CFA_PP(self): -## """Test writing CFA-netCDF with PP format fragments.""" -## f = cf.read("file1.pp")[0] -## cf.write(f, tmpfile1, cfa=True) -## -## # Check that only the fields have been aggregated -## nc = netCDF4.Dataset(tmpfile1, "r") -## for ncvar, var in nc.variables.items(): -## attrs = var.ncattrs() -## if ncvar in ("UM_m01s15i201_vn405",): -## self.assertFalse(var.ndim) -## self.assertIn("aggregated_dimensions", attrs) -## self.assertIn("aggregated_data", attrs) -## else: -## self.assertNotIn("aggregated_dimensions", attrs) -## self.assertNotIn("aggregated_data", attrs) -## -## nc.close() -## -## g = cf.read(tmpfile1) -## self.assertEqual(len(g), 1) -## self.assertTrue(f.equals(g[0])) -## -## def test_CFA_multiple_files(self): -## """Test storing multiple CFA frgament locations.""" -## f = cf.example_field(0) -## cf.write(f, tmpfile1) -## f = cf.read(tmpfile1)[0] -## f.add_file_location("/new/location") -## -## cf.write(f, tmpfile2, cfa=True) -## g = cf.read(tmpfile2) -## self.assertEqual(len(g), 1) -## g = g[0] -## self.assertTrue(f.equals(g)) -## -## self.assertEqual(len(g.data.get_filenames()), 2) -## self.assertEqual(len(g.get_filenames()), 3) -## -## def test_CFA_unlimited_dimension(self): -## """Test CFA with unlimited dimensions""" -## # Create a CFA file from a field that has an unlimited -## # dimension and no metadata constructs spanning that dimension -## f = cf.example_field(0) -## d = f.domain_axis("X") -## d.nc_set_unlimited(True) -## f.del_construct("X") -## cf.write(f, tmpfile1) -## g = cf.read(tmpfile1) -## cf.write(g, tmpfile2, cfa=True) -## -## # Check that the CFA file can be read -## h = cf.read(tmpfile2) -## self.assertEqual(len(h), 1) -## -## -## if __name__ == "__main__": -## print("Run date:", datetime.datetime.now()) -## cf.environment() -## print() -## unittest.main(verbosity=2) +# n_tmpfiles = 5 +# tmpfiles = [ +# tempfile.mkstemp("_test_CFA.nc", dir=os.getcwd())[1] +# for i in range(n_tmpfiles) +# ] +# ( +# tmpfile1, +# tmpfile2, +# tmpfile3, +# tmpfile4, +# tmpfile5, +# ) = tmpfiles +# +# +# def _remove_tmpfiles(): +# """Try to remove defined temporary files by deleting their paths.""" +# for f in tmpfiles: +# try: +# os.remove(f) +# except OSError: +# pass +# +# +# atexit.register(_remove_tmpfiles) +# +# +# class CFATest(unittest.TestCase): +# netcdf3_fmts = [ +# "NETCDF3_CLASSIC", +# "NETCDF3_64BIT", +# "NETCDF3_64BIT_OFFSET", +# "NETCDF3_64BIT_DATA", +# ] +# netcdf4_fmts = ["NETCDF4", "NETCDF4_CLASSIC"] +# netcdf_fmts = netcdf3_fmts + netcdf4_fmts +# +# def test_CFA_fmt(self): +# """Test the cf.read 'fmt' and 'cfa' keywords.""" +# f = cf.example_field(0) +# cf.write(f, tmpfile1) +# f = cf.read(tmpfile1)[0] +# +# for fmt in self.netcdf_fmts: +# cf.write(f, tmpfile2, fmt=fmt, cfa=True) +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# self.assertTrue(f.equals(g[0])) +# +# def test_CFA_multiple_fragments(self): +# """Test CFA with more than one fragment.""" +# f = cf.example_field(0) +# +# cf.write(f[:2], tmpfile1) +# cf.write(f[2:], tmpfile2) +# +# a = cf.read([tmpfile1, tmpfile2]) +# self.assertEqual(len(a), 1) +# a = a[0] +# +# nc_file = tmpfile3 +# cfa_file = tmpfile4 +# cf.write(a, nc_file) +# cf.write(a, cfa_file, cfa=True) +# +# n = cf.read(nc_file) +# c = cf.read(cfa_file) +# self.assertEqual(len(n), 1) +# self.assertEqual(len(c), 1) +# self.assertTrue(c[0].equals(f)) +# self.assertTrue(n[0].equals(c[0])) +# +# def test_CFA_strict(self): +# """Test CFA 'strict' option to the cfa.write 'cfa' keyword.""" +# f = cf.example_field(0) +# +# # By default, can't write as CF-netCDF those variables +# # selected for CFA treatment, but which aren't suitable. +# with self.assertRaises(ValueError): +# cf.write(f, tmpfile1, cfa=True) +# +# # The previous line should have deleted the output file +# self.assertFalse(os.path.exists(tmpfile1)) +# +# cf.write(f, tmpfile1, cfa={"strict": False}) +# g = cf.read(tmpfile1) +# self.assertEqual(len(g), 1) +# self.assertTrue(g[0].equals(f)) +# +# cf.write(g, tmpfile2, cfa={"strict": True}) +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# self.assertTrue(g[0].equals(f)) +# +# def test_CFA_field_ancillaries(self): +# """Test creation of field ancillaries from non-standard CFA terms.""" +# f = cf.example_field(0) +# self.assertFalse(f.field_ancillaries()) +# +# a = f[:2] +# b = f[2:] +# a.set_property("foo", "bar_a") +# b.set_property("foo", "bar_b") +# cf.write(a, tmpfile1) +# cf.write(b, tmpfile2) +# +# c = cf.read( +# [tmpfile1, tmpfile2], aggregate={"field_ancillaries": "foo"} +# ) +# self.assertEqual(len(c), 1) +# c = c[0] +# self.assertEqual(len(c.field_ancillaries()), 1) +# anc = c.field_ancillary() +# self.assertTrue(anc.data.cfa_get_term()) +# self.assertFalse(anc.data.cfa_get_write()) +# +# cf.write(c, tmpfile3, cfa=False) +# c2 = cf.read(tmpfile3) +# self.assertEqual(len(c2), 1) +# self.assertFalse(c2[0].field_ancillaries()) +# +# cf.write(c, tmpfile4, cfa=True) +# d = cf.read(tmpfile4) +# self.assertEqual(len(d), 1) +# d = d[0] +# +# self.assertEqual(len(d.field_ancillaries()), 1) +# anc = d.field_ancillary() +# self.assertTrue(anc.data.cfa_get_term()) +# self.assertFalse(anc.data.cfa_get_write()) +# self.assertTrue(d.equals(c)) +# +# cf.write(d, tmpfile5, cfa=False) +# e = cf.read(tmpfile5) +# self.assertEqual(len(e), 1) +# self.assertFalse(e[0].field_ancillaries()) +# +# cf.write(d, tmpfile5, cfa=True) +# e = cf.read(tmpfile5) +# self.assertEqual(len(e), 1) +# self.assertTrue(e[0].equals(d)) +# +# def test_CFA_substitutions_0(self): +# """Test CFA substitution URI substitutions (0).""" +# f = cf.example_field(0) +# cf.write(f, tmpfile1) +# f = cf.read(tmpfile1)[0] +# +# cwd = os.getcwd() +# +# f.data.cfa_update_file_substitutions({"base": cwd}) +# +# cf.write( +# f, +# tmpfile2, +# cfa={"absolute_paths": True}, +# ) +# +# nc = netCDF4.Dataset(tmpfile2, "r") +# cfa_file = nc.variables["cfa_file"] +# self.assertEqual( +# cfa_file.getncattr("substitutions"), +# f"${{base}}: {cwd}", +# ) +# self.assertEqual( +# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +# ) +# nc.close() +# +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# self.assertTrue(f.equals(g[0])) +# +# def test_CFA_substitutions_1(self): +# """Test CFA substitution URI substitutions (1).""" +# f = cf.example_field(0) +# cf.write(f, tmpfile1) +# f = cf.read(tmpfile1)[0] +# +# cwd = os.getcwd() +# for base in ("base", "${base}"): +# cf.write( +# f, +# tmpfile2, +# cfa={"absolute_paths": True, "substitutions": {base: cwd}}, +# ) +# +# nc = netCDF4.Dataset(tmpfile2, "r") +# cfa_file = nc.variables["cfa_file"] +# self.assertEqual( +# cfa_file.getncattr("substitutions"), +# f"${{base}}: {cwd}", +# ) +# self.assertEqual( +# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +# ) +# nc.close() +# +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# self.assertTrue(f.equals(g[0])) +# +# def test_CFA_substitutions_2(self): +# """Test CFA substitution URI substitutions (2).""" +# f = cf.example_field(0) +# cf.write(f, tmpfile1) +# f = cf.read(tmpfile1)[0] +# +# cwd = os.getcwd() +# +# f.data.cfa_clear_file_substitutions() +# f.data.cfa_update_file_substitutions({"base": cwd}) +# +# cf.write( +# f, +# tmpfile2, +# cfa={ +# "absolute_paths": True, +# "substitutions": {"base2": "/bad/location"}, +# }, +# ) +# +# nc = netCDF4.Dataset(tmpfile2, "r") +# cfa_file = nc.variables["cfa_file"] +# self.assertEqual( +# cfa_file.getncattr("substitutions"), +# f"${{base2}}: /bad/location ${{base}}: {cwd}", +# ) +# self.assertEqual( +# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +# ) +# nc.close() +# +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# self.assertTrue(f.equals(g[0])) +# +# f.data.cfa_clear_file_substitutions() +# f.data.cfa_update_file_substitutions({"base": "/bad/location"}) +# +# cf.write( +# f, +# tmpfile2, +# cfa={"absolute_paths": True, "substitutions": {"base": cwd}}, +# ) +# +# nc = netCDF4.Dataset(tmpfile2, "r") +# cfa_file = nc.variables["cfa_file"] +# self.assertEqual( +# cfa_file.getncattr("substitutions"), +# f"${{base}}: {cwd}", +# ) +# self.assertEqual( +# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +# ) +# nc.close() +# +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# self.assertTrue(f.equals(g[0])) +# +# f.data.cfa_clear_file_substitutions() +# f.data.cfa_update_file_substitutions({"base2": "/bad/location"}) +# +# cf.write( +# f, +# tmpfile2, +# cfa={"absolute_paths": True, "substitutions": {"base": cwd}}, +# ) +# +# nc = netCDF4.Dataset(tmpfile2, "r") +# cfa_file = nc.variables["cfa_file"] +# self.assertEqual( +# cfa_file.getncattr("substitutions"), +# f"${{base2}}: /bad/location ${{base}}: {cwd}", +# ) +# self.assertEqual( +# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" +# ) +# nc.close() +# +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# self.assertTrue(f.equals(g[0])) +# +# def test_CFA_absolute_paths(self): +# """Test CFA 'absolute_paths' option to the cfa.write 'cfa' keyword.""" +# f = cf.example_field(0) +# cf.write(f, tmpfile1) +# f = cf.read(tmpfile1)[0] +# +# for absolute_paths, filename in zip( +# (True, False), +# ( +# PurePath(os.path.abspath(tmpfile1)).as_uri(), +# os.path.basename(tmpfile1), +# ), +# ): +# cf.write(f, tmpfile2, cfa={"absolute_paths": absolute_paths}) +# +# nc = netCDF4.Dataset(tmpfile2, "r") +# cfa_file = nc.variables["cfa_file"] +# self.assertEqual(cfa_file[...], filename) +# nc.close() +# +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# self.assertTrue(f.equals(g[0])) +# +# def test_CFA_constructs(self): +# """Test choice of constructs to write as CFA-netCDF variables.""" +# f = cf.example_field(1) +# f.del_construct("T") +# f.del_construct("long_name=Grid latitude name") +# cf.write(f, tmpfile1) +# f = cf.read(tmpfile1)[0] +# +# # No constructs +# cf.write(f, tmpfile2, cfa={"constructs": []}) +# nc = netCDF4.Dataset(tmpfile2, "r") +# for var in nc.variables.values(): +# attrs = var.ncattrs() +# self.assertNotIn("aggregated_dimensions", attrs) +# self.assertNotIn("aggregated_data", attrs) +# +# nc.close() +# +# # Field construct +# cf.write(f, tmpfile2, cfa={"constructs": "field"}) +# nc = netCDF4.Dataset(tmpfile2, "r") +# for ncvar, var in nc.variables.items(): +# attrs = var.ncattrs() +# if ncvar in ("ta",): +# self.assertFalse(var.ndim) +# self.assertIn("aggregated_dimensions", attrs) +# self.assertIn("aggregated_data", attrs) +# else: +# self.assertNotIn("aggregated_dimensions", attrs) +# self.assertNotIn("aggregated_data", attrs) +# +# nc.close() +# +# # Dimension construct +# for constructs in ( +# "dimension_coordinate", +# ["dimension_coordinate"], +# {"dimension_coordinate": None}, +# {"dimension_coordinate": 1}, +# {"dimension_coordinate": cf.eq(1)}, +# ): +# cf.write(f, tmpfile2, cfa={"constructs": constructs}) +# nc = netCDF4.Dataset(tmpfile2, "r") +# for ncvar, var in nc.variables.items(): +# attrs = var.ncattrs() +# if ncvar in ( +# "x", +# "x_bnds", +# "y", +# "y_bnds", +# "atmosphere_hybrid_height_coordinate", +# "atmosphere_hybrid_height_coordinate_bounds", +# ): +# self.assertFalse(var.ndim) +# self.assertIn("aggregated_dimensions", attrs) +# self.assertIn("aggregated_data", attrs) +# else: +# self.assertNotIn("aggregated_dimensions", attrs) +# self.assertNotIn("aggregated_data", attrs) +# +# nc.close() +# +# # Dimension and auxiliary constructs +# for constructs in ( +# ["dimension_coordinate", "auxiliary_coordinate"], +# {"dimension_coordinate": None, "auxiliary_coordinate": cf.ge(2)}, +# ): +# cf.write(f, tmpfile2, cfa={"constructs": constructs}) +# nc = netCDF4.Dataset(tmpfile2, "r") +# for ncvar, var in nc.variables.items(): +# attrs = var.ncattrs() +# if ncvar in ( +# "x", +# "x_bnds", +# "y", +# "y_bnds", +# "atmosphere_hybrid_height_coordinate", +# "atmosphere_hybrid_height_coordinate_bounds", +# "latitude_1", +# "longitude_1", +# ): +# self.assertFalse(var.ndim) +# self.assertIn("aggregated_dimensions", attrs) +# self.assertIn("aggregated_data", attrs) +# else: +# self.assertNotIn("aggregated_dimensions", attrs) +# self.assertNotIn("aggregated_data", attrs) +# +# nc.close() +# +# def test_CFA_PP(self): +# """Test writing CFA-netCDF with PP format fragments.""" +# f = cf.read("file1.pp")[0] +# cf.write(f, tmpfile1, cfa=True) +# +# # Check that only the fields have been aggregated +# nc = netCDF4.Dataset(tmpfile1, "r") +# for ncvar, var in nc.variables.items(): +# attrs = var.ncattrs() +# if ncvar in ("UM_m01s15i201_vn405",): +# self.assertFalse(var.ndim) +# self.assertIn("aggregated_dimensions", attrs) +# self.assertIn("aggregated_data", attrs) +# else: +# self.assertNotIn("aggregated_dimensions", attrs) +# self.assertNotIn("aggregated_data", attrs) +# +# nc.close() +# +# g = cf.read(tmpfile1) +# self.assertEqual(len(g), 1) +# self.assertTrue(f.equals(g[0])) +# +# def test_CFA_multiple_files(self): +# """Test storing multiple CFA frgament locations.""" +# f = cf.example_field(0) +# cf.write(f, tmpfile1) +# f = cf.read(tmpfile1)[0] +# f.add_file_location("/new/location") +# +# cf.write(f, tmpfile2, cfa=True) +# g = cf.read(tmpfile2) +# self.assertEqual(len(g), 1) +# g = g[0] +# self.assertTrue(f.equals(g)) +# +# self.assertEqual(len(g.data.get_filenames()), 2) +# self.assertEqual(len(g.get_filenames()), 3) +# +# def test_CFA_unlimited_dimension(self): +# """Test CFA with unlimited dimensions""" +# # Create a CFA file from a field that has an unlimited +# # dimension and no metadata constructs spanning that dimension +# f = cf.example_field(0) +# d = f.domain_axis("X") +# d.nc_set_unlimited(True) +# f.del_construct("X") +# cf.write(f, tmpfile1) +# g = cf.read(tmpfile1) +# cf.write(g, tmpfile2, cfa=True) +# +# # Check that the CFA file can be read +# h = cf.read(tmpfile2) +# self.assertEqual(len(h), 1) +# +# +# if __name__ == "__main__": +# print("Run date:", datetime.datetime.now()) +# cf.environment() +# print() +# unittest.main(verbosity=2) From c594c7571706d068faa51cdb064822c0825fef12 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Sun, 17 Nov 2024 21:44:19 +0000 Subject: [PATCH 13/51] dev --- cf/read_write/read.py | 3183 +++++++++++++++++++++++++++-------------- 1 file changed, 2126 insertions(+), 1057 deletions(-) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 020d4c254a..03760c40c6 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -6,7 +6,7 @@ from re import Pattern from urllib.parse import urlparse -from cfdm import is_log_level_info +import cfdm from cfdm.read_write.netcdf import NetCDFRead from numpy.ma.core import MaskError @@ -17,7 +17,6 @@ from ..fieldlist import FieldList from ..functions import _DEPRECATION_ERROR_FUNCTION_KWARGS, flat from ..query import Query - from .um import UMRead _cached_temporary_files = {} @@ -34,42 +33,7 @@ logger = logging.getLogger(__name__) -@_manage_log_level_via_verbosity -def read( - files, - external=None, - verbose=None, - warnings=False, - ignore_read_error=False, - aggregate=True, - nfields=None, - squeeze=False, - unsqueeze=False, - fmt=None, - cdl_string=False, - select=None, - extra=None, - recursive=False, - followlinks=False, - um=None, - chunk=True, - field=None, - height_at_top_of_model=None, - select_options=None, - follow_symlinks=False, - mask=True, - unpack=True, - warn_valid=False, - dask_chunks="storage-aligned", - store_hdf5_chunks=True, - domain=False, - cfa=None, - cfa_write=None, - netcdf_backend=None, - storage_options=None, - cache=True, - chunks="auto", -): +class read(cfdm.read): """Read field or domain constructs from files. The following file formats are supported: netCDF, CFA-netCDF, CDL, @@ -239,98 +203,13 @@ def read( value is assumed to be a string of CDL input rather than the above. - external: (sequence of) `str`, optional - Read external variables (i.e. variables which are named by - attributes, but are not present, in the parent file given - by the *filename* parameter) from the given external - files. Ignored if the parent file does not contain a - global "external_variables" attribute. Multiple external - files may be provided, which are searched in random order - for the required external variables. - - If an external variable is not found in any external - files, or is found in multiple external files, then the - relevant metadata construct is still created, but without - any metadata or data. In this case the construct's - `!is_external` method will return `True`. + {{read external: (sequence of) `str`, optional}} - *Parameter example:* - ``external='cell_measure.nc'`` + {{read extra: (sequence of) `str`, optional}} - *Parameter example:* - ``external=['cell_measure.nc']`` + {{read verbose: `int` or `str` or `None`, optional}} - *Parameter example:* - ``external=('cell_measure_A.nc', 'cell_measure_O.nc')`` - - extra: (sequence of) `str`, optional - Create extra, independent field constructs from netCDF - variables that correspond to particular types metadata - constructs. The *extra* parameter may be one, or a - sequence, of: - - ========================== =============================== - *extra* Metadata constructs - ========================== =============================== - ``'field_ancillary'`` Field ancillary constructs - ``'domain_ancillary'`` Domain ancillary constructs - ``'dimension_coordinate'`` Dimension coordinate constructs - ``'auxiliary_coordinate'`` Auxiliary coordinate constructs - ``'cell_measure'`` Cell measure constructs - ========================== =============================== - - This parameter replaces the deprecated *field* parameter. - - *Parameter example:* - To create field constructs from auxiliary coordinate - constructs: ``extra='auxiliary_coordinate'`` or - ``extra=['auxiliary_coordinate']``. - - *Parameter example:* - To create field constructs from domain ancillary and - cell measure constructs: ``extra=['domain_ancillary', - 'cell_measure']``. - - An extra field construct created via the *extra* parameter - will have a domain limited to that which can be inferred - from the corresponding netCDF variable, but without the - connections that are defined by the parent netCDF data - variable. It is possible to create independent fields from - metadata constructs that do incorporate as much of the - parent field construct's domain as possible by using the - `~cf.Field.convert` method of a returned field construct, - instead of setting the *extra* parameter. - - verbose: `int` or `str` or `None`, optional - If an integer from ``-1`` to ``3``, or an equivalent string - equal ignoring case to one of: - - * ``'DISABLE'`` (``0``) - * ``'WARNING'`` (``1``) - * ``'INFO'`` (``2``) - * ``'DETAIL'`` (``3``) - * ``'DEBUG'`` (``-1``) - - set for the duration of the method call only as the minimum - cut-off for the verboseness level of displayed output (log) - messages, regardless of the globally-configured `cf.log_level`. - Note that increasing numerical value corresponds to increasing - verbosity, with the exception of ``-1`` as a special case of - maximal and extreme verbosity. - - Otherwise, if `None` (the default value), output messages will - be shown according to the value of the `cf.log_level` setting. - - Overall, the higher a non-negative integer or equivalent string - that is set (up to a maximum of ``3``/``'DETAIL'``) for - increasing verbosity, the more description that is printed to - convey how the contents of the netCDF file were parsed and - mapped to CF data model constructs. - - warnings: `bool`, optional - If True then print warnings when an output field construct - is incomplete due to structural non-compliance of the - dataset. By default such warnings are not displayed. + {{read warnings: `bool`, optional}} ignore_read_error: `bool`, optional If True then ignore any file which raises an IOError @@ -361,6 +240,79 @@ def read( ignored as the format is assumed to be CDL, so in that case it is not necessary to also specify ``fmt='CDL'``. + um: `dict`, optional + For Met Office (UK) PP files and Met Office (UK) fields + files only, provide extra decoding instructions. This + option is ignored for input files which are not PP or + fields files. In most cases, how to decode a file is + inferrable from the file's contents, but if not then each + key/value pair in the dictionary sets a decoding option as + follows: + + * ``'fmt'``: `str` + + The file format (``'PP'`` or ``'FF'``) + + * ``'word_size'``: `int` + + The word size in bytes (``4`` or ``8``). + + * ``'endian'``: `str` + + The byte order (``'big'`` or ``'little'``). + + * ``'version'``: `int` or `str` + + The UM version to be used when decoding the + header. Valid versions are, for example, ``4.2``, + ``'6.6.3'`` and ``'8.2'``. In general, a given version + is ignored if it can be inferred from the header (which + is usually the case for files created by the UM at + versions 5.3 and later). The exception to this is when + the given version has a third element (such as the 3 in + 6.6.3), in which case any version in the header is + ignored. The default version is ``4.5``. + + * ``'height_at_top_of_model'``: `float` + + The height in metres of the upper bound of the top model + level. By default the height at top model is taken from + the top level's upper bound defined by BRSVD1 in the + lookup header. If the height can't be determined from + the header, or the given height is less than or equal to + 0, then a coordinate reference system will still be + created that contains the 'a' and 'b' formula term + values, but without an atmosphere hybrid height + dimension coordinate construct. + + .. note:: A current limitation is that if pseudolevels + and atmosphere hybrid height coordinates are + defined by same the lookup headers then the + height **can't be determined + automatically**. In this case the height may + be found after reading as the maximum value of + the bounds of the domain ancillary construct + containing the 'a' formula term. The file can + then be re-read with this height as a *um* + parameter. + + If format is specified as ``'PP'`` then the word size and + byte order default to ``4`` and ``'big'`` respectively. + + This parameter replaces the deprecated *umversion* and + *height_at_top_of_model* parameters. + + *Parameter example:* + To specify that the input files are 32-bit, big-endian + PP files: ``um={'fmt': 'PP'}`` + + *Parameter example:* + To specify that the input files are 32-bit, + little-endian PP files from version 5.1 of the UM: + ``um={'fmt': 'PP', 'endian': 'little', 'version': 5.1}`` + + .. versionadded:: 1.5 + aggregate: `bool` or `dict`, optional If True (the default) or a dictionary (possibly empty) then aggregate the field constructs read in from all input @@ -415,463 +367,47 @@ def read( This parameter replaces the deprecated *follow_symlinks* parameter. - mask: `bool`, optional - If True (the default) then mask by convention the data of - field and metadata constructs. - - A netCDF array is masked depending on the values of any of - the netCDF attributes ``_FillValue``, ``missing_value``, - ``_Unsigned``, ``valid_min``, ``valid_max``, and - ``valid_range``. - - The masking by convention of a PP or UM array depends on - the value of BMDI in the lookup header. A value other than - ``-1.0e30`` indicates the data value to be masked. - - See - https://ncas-cms.github.io/cf-python/tutorial.html#data-mask - for details. + {{read warn_valid: `bool`, optional}} .. versionadded:: 3.4.0 - unpack: `bool`, optional - If True, the default, then unpack arrays by convention - when the data is read from disk. - - Unpacking is determined by netCDF conventions for the - following variable attributes: ``add_offset``, - ``scale_factor``, and ``_Unsigned``. - - .. versionadded:: NEXTVERSION - - warn_valid: `bool`, optional - If True then print a warning for the presence of - ``valid_min``, ``valid_max`` or ``valid_range`` properties - on field constructs and metadata constructs that have - data. By default no such warning is issued. - - "Out-of-range" data values in the file, as defined by any - of these properties, are automatically masked by default, - which may not be as intended. See the *mask* parameter for - turning off all automatic masking. - - See - https://ncas-cms.github.io/cf-python/tutorial.html#data-mask - for details. + {{read mask: `bool`, optional}} .. versionadded:: 3.4.0 - um: `dict`, optional - For Met Office (UK) PP files and Met Office (UK) fields - files only, provide extra decoding instructions. This - option is ignored for input files which are not PP or - fields files. In most cases, how to decode a file is - inferrable from the file's contents, but if not then each - key/value pair in the dictionary sets a decoding option as - follows: - - ============================ ===================================== - Key Value - ============================ ===================================== - ``'fmt'`` The file format (``'PP'`` or - ``'FF'``) - - ``'word_size'`` The word size in bytes - (``4`` or ``8``). - - ``'endian'`` The byte order (``'big'`` or - ``'little'``). - - ``'version'`` The UM version to be used - when decoding the - header. Valid versions are, - for example, ``4.2``, - ``'6.6.3'`` and - ``'8.2'``. In general, a - given version is ignored if - it can be inferred from the - header (which is usually the - case for files created by - the UM at versions 5.3 and - later). The exception to - this is when the given - version has a third element - (such as the 3 in 6.6.3), in - which case any version in - the header is ignored. - - The default version is - ``4.5``. - - ``'height_at_top_of_model'`` The height (in metres) of - the upper bound of the top - model level. By default the - height at top model is taken - from the top level's upper - bound defined by BRSVD1 in - the lookup header. If the - height can't be determined - from the header, or the - given height is less than or - equal to 0, then a - coordinate reference system - will still be created that - contains the 'a' and 'b' - formula term values, but - without an atmosphere hybrid - height dimension coordinate - construct. - - .. note:: A current - limitation is that if - pseudolevels and - atmosphere hybrid height - coordinates are defined - by same the lookup - headers then the height - **can't be determined - automatically**. In this - case the height may be - found after reading as - the maximum value of the - bounds of the domain - ancillary construct - containing the 'a' - formula term. The file - can then be re-read with - this height as a *um* - parameter. - ============================ ===================================== - - If format is specified as ``'PP'`` then the word size and - byte order default to ``4`` and ``'big'`` respectively. - - This parameter replaces the deprecated *umversion* and - *height_at_top_of_model* parameters. - - *Parameter example:* - To specify that the input files are 32-bit, big-endian - PP files: ``um={'fmt': 'PP'}`` - - *Parameter example:* - To specify that the input files are 32-bit, - little-endian PP files from version 5.1 of the UM: - ``um={'fmt': 'PP', 'endian': 'little', 'version': 5.1}`` - - .. versionadded:: 1.5 - - dask_chunks: `str`, `int`, `None`, or `dict`, optional - Specify the Dask chunking for data. May be one of the - following: - - * ``'storage-aligned'`` - - This is the default. The Dask chunk size in bytes will - be as close as possible the size given by - `cf.chunksize`, favouring square-like chunk shapes, - with the added restriction that the entirety of each - storage chunk must also lie within exactly one Dask - chunk. - - When reading the data from disk, an entire storage chunk - will be read once per Dask storage chunk that contains - any part of it, so ensuring that a storage chunk lies - within only one Dask chunk can increase performance by - reducing the amount of disk access (particularly when - the data are stored remotely to the client). - - For instance, consider a file variable that has an array - of 64-bit floats with shape (400, 300, 60) and a storage - chunk shape of (100, 5, 60), giving 240 storage chunks - each of size 100*5*60*8 bytes = 0.23 MiB. Then: - - * If `cf.chunksize` returned 134217728 (i.e. 128 MiB), - then the storage-aligned Dask chunks will have shape - (400, 300, 60), giving 1 Dask chunk with size of 54.93 - MiB (compare with a Dask chunk shape of (400, 300, 60) - and size 54.93 MiB, if *dask_chunks* were ``'auto'``.) - - * If `cf.chunksize` returned 33554432 (i.e. 32 MiB), - then the storage-aligned Dask chunks will have shape - (200, 260, 60), giving 4 Dask chunks with a maximum - size of 23.80 MiB (compare with a Dask chunk shape of - (264, 264, 60) and maximum size 31.90 MiB, if - *dask_chunks* were ``'auto'``.) - - * If `cf.chunksize` returned 4194304 (i.e. 4 MiB), - then the storage-aligned Dask chunks will have shape - (100, 85, 60), giving 16 Dask chunks with a maximum - size of 3.89 MiB (compare with a Dask chunk shape of - (93, 93, 60) and maximum size 3.96 MiB, if - *dask_chunks* were ``'auto'``.) - - There are, however, some occasions when, for particular - data arrays in the file, the ``'auto'`` option will - automatically be used instead of storage-aligned Dask - chunks. This occurs when: - - * The data array in the file is stored contiguously. - - * The data array in the file is compressed by convention - (e.g. ragged array representations, compression by - gathering, subsampled coordinates, etc.). In this case - the Dask chunks are for the uncompressed data, and so - cannot be aligned with the storage chunks of the - compressed array in the file. - - * ``'storage-exact'`` - - Each Dask chunk will contain exactly one storage chunk - and each storage chunk will lie within exactly one Dask - chunk. - - For instance, consider a file variable that has an array - of 64-bit floats with shape (400, 300, 60) and a storage - chunk shape of (100, 5, 60) (i.e. there are 240 storage - chunks, each of size 0.23 MiB). Then the storage-exact - Dask chunks will also have shape (100, 5, 60) giving 240 - Dask chunks with a maximum size of 0.23 MiB. - - There are, however, some occasions when, for particular - data arrays in the file, the ``'auto'`` option will - automatically be used instead of storage-exact Dask - chunks. This occurs when: - - * The data array in the file is stored contiguously. - - * The data array in the file is compressed by convention - (e.g. ragged array representations, compression by - gathering, subsampled coordinates, etc.). In this case - the Dask chunks are for the uncompressed data, and so - cannot be aligned with the storage chunks of the - compressed array in the file. - - * ``auto`` - - The Dask chunk size in bytes will be as close as - possible to the size given by `cf.chunksize`, - favouring square-like chunk shapes. This may give - similar Dask chunk shapes as the ``'storage-aligned'`` - option, but without the guarantee that each storage - chunk will lie within exactly one Dask chunk. - - * A byte-size given by a `str` - - The Dask chunk size in bytes will be as close as - possible to the given byte-size, favouring square-like - chunk shapes. Any string value, accepted by the *chunks* - parameter of the `dask.array.from_array` function is - permitted. - - *Example:* - A Dask chunksize of 2 MiB may be specified as - ``'2097152'`` or ``'2 MiB'``. - - * `-1` or `None` - - There is no Dask chunking, i.e. every data array has one - Dask chunk regardless of its size. - - * Positive `int` - - Every dimension of all Dask chunks has this number of - elements. - - *Example:* - For 3-dimensional data, *dask_chunks* of `10` will - give Dask chunks with shape (10, 10, 10). - - * `dict` - - Each of dictionary key identifies a file dimension, with - a value that defines the Dask chunking for that - dimension whenever it is spanned by a data array. A file - dimension is identified in one of three ways: - - 1. the netCDF dimension name, preceded by ``ncdim%`` - (e.g. ``'ncdim%lat'``); - - 2. the value of the "standard name" attribute of a - CF-netCDF coordinate variable that spans the - dimension (e.g. ``'latitude'``); - - 3. the value of the "axis" attribute of a CF-netCDF - coordinate variable that spans the dimension - (e.g. ``'Y'``). - - The dictionary values may be a byte-size string, - ``'auto'``, `int` or `None`, with the same meanings as - those types for the *dask_chunks* parameter itself, but - applying only to the specified dimension. In addition, a - dictionary value may be a `tuple` or `list` of integers - that sum to the dimension size. - - Not specifying a file dimension in the dictionary is - equivalent to it being defined with a value of - ``'auto'``. - - *Example:* - ``{'T': '0.5 MiB', 'Z': 'auto', 'Y': [36, 37], 'X': - None}`` - - *Example:* - If a netCDF file contains dimensions ``time``, ``z``, - ``lat`` and ``lon``, then ``{'ncdim%time': 12, - 'ncdim%lat', None, 'ncdim%lon': None}`` will ensure - that, for all applicable data arrays, all ``time`` - axes have a `dask` chunksize of 12; all ``lat`` and - ``lon`` axes are not `dask` chunked; and all ``z`` - axes are `dask` chunked to comply as closely as - possible with the default `dask` chunk size. - - If the netCDF file also contains a ``time`` coordinate - variable with a "standard_name" attribute of - ``'time'`` and an "axis" attribute of ``'T'``, then - the same `dask` chunking could be specified with - either ``{'time': 12, 'ncdim%lat', None, 'ncdim%lon': - None}`` or ``{'T': 12, 'ncdim%lat', None, 'ncdim%lon': - None}``. - - .. versionadded:: NEXTVERSION - - store_hdf5_chunks: `bool`, optional - If True (the default) then store the HDF5 chunking - strategy for each returned data array. The HDF5 chunking - strategy is then accessible via an object's - `nc_hdf5_chunksizes` method. When the HDF5 chunking - strategy is stored, it will be used when the data is - written to a new netCDF4 file with `cf.write` (unless - the strategy was modified prior to writing). - - If False, or if the file being read is not in netCDF4 - format, then no HDF5 chunking strategy is stored. - (i.e. an `nc_hdf5_chunksizes` method will return `None` - for all `Data` objects). In this case, when the data is - written to a new netCDF4 file, the HDF5 chunking strategy - will be determined by `cf.write`. - - See the `cf.write` *hdf5_chunks* parameter for details - on how the HDF5 chunking strategy is determined at the - time of writing. + {{read unpack: `bool`}} .. versionadded:: NEXTVERSION - domain: `bool`, optional - If True then return only the domain constructs that are - explicitly defined by CF-netCDF domain variables, ignoring - all CF-netCDF data variables. By default only the field - constructs defined by CF-netCDF data variables are - returned. - - CF-netCDF domain variables are only defined from CF-1.9, - so older datasets automatically contain no CF-netCDF - domain variables. - - The unique domain constructs of the dataset are easily - found with the `cf.unique_constructs` function. For - example:: - - >>> d = cf.read('file.nc', domain=True) - >>> ud = cf.unique_constructs(d) - >>> f = cf.read('file.nc') - >>> ufd = cf.unique_constructs(x.domain for x in f) - - Domain constructs can not be read from UM or PP datasets. + {{read domain: `bool`, optional}} .. versionadded:: 3.11.0 - cfa: `dict`, optional - Configure the reading of CFA-netCDF files. The dictionary - may have any subset of the following key/value pairs to - override the information read from the file: - - * ``'substitutions'``: `dict` - - A dictionary whose key/value pairs define text - substitutions to be applied to the fragment file - names. Each key may be specified with or without the - ``${*}`` syntax (where `*` represents any amount of any - characters). For instance, ``{'substitution': - 'replacement'}`` and ``{'${substitution}': 'replacement'}``' - are equivalent. The substitutions are used in - conjunction with, and take precedence over, any that are - stored in the CFA-netCDF file by the ``substitutions`` - attribute of the ``file`` fragement array variable. - - *Example:* - ``{'replacement': 'file:///data/'}`` - - .. versionadded:: 3.15.0 - - netcdf_backend: `None` or `str`, optional - Specify which library to use for reading netCDF files. By - default, or if `None`, then the first one of `netCDF4` and - `h5netcdf` to successfully open the file netCDF file is - used. Setting *netcdf_backend* to one of ``'netCDF4'`` and - ``'h5netcdf'`` will force the use of that library. - - .. note:: The *netcdf_backend* parameter does not affect - the opening of netCDF fragment files that define - the data of aggregation variables. For these, it - is always the case that the first one of - `netCDF4` and `h5netcdf` to successfully open - the file is used. + {{read netcdf_engine: `None` or `str`, optional}} .. versionadded:: NEXTVERSION - storage_options: `dict` or `None`, optional - Pass parameters to the backend file system driver, such as - username, password, server, port, etc. How the storage - options are interpreted depends on the location of the - file: - - **Local File System** + {{read storage_options: `dict` or `None`, optional}} - Storage options are ignored for local files. + .. versionadded:: NEXTVERSION - **HTTP(S)** + {{read cache: `bool`, optional}} - Storage options are ignored for files available across the - network via OPeNDAP. + .. versionadded:: NEXTVERSION - **S3-compatible services** + {{read dask_chunks: `str`, `int`, `None`, or `dict`, optional}} - The backend used is `s3fs`, and the storage options are - used to initialise an `s3fs.S3FileSystem` file system - object. By default, or if `None`, then *storage_options* - is taken as ``{}``. + .. versionadded:: NEXTVERSION - If the ``'endpoint_url'`` key is not in *storage_options*, - nor in a dictionary defined by the ``'client_kwargs'`` key - (both of which are the case when *storage_options* is - `None`), then one will be automatically inserted for - accessing an S3 file. For example, for a file name of - ``'s3://store/data/file.nc'``, an ``'endpoint_url'`` key - with value ``'https://store'`` would be created. To - disable this, set ``'endpoint_url'`` to `None`. + {{read store_hdf5_chunks: `bool`, optional}} - *Parameter example:* - For a file name of ``'s3://store/data/file.nc'``, the - following are equivalent: ``None``, ``{}``, - ``{'endpoint_url': 'https://store'}``, and - ``{'client_kwargs': {'endpoint_url': 'https://store'}}`` + .. versionadded:: NEXTVERSION - *Parameter example:* - ``{'key': 'scaleway-api-key...', 'secret': - 'scaleway-secretkey...', 'endpoint_url': - 'https://s3.fr-par.scw.cloud', 'client_kwargs': - {'region_name': 'fr-par'}}`` + {{read cfa: `dict`, optional}} - .. versionadded:: NEXTVERSION + .. versionadded:: 3.15.0 - cache: `bool`, optional - If True, the default, then cache the first and last array - elements of metadata constructs (not field constructs) for - fast future access. In addition, the second and - penultimate array elements will be cached from coordinate - bounds when there are two bounds per cell. For remote - data, setting *cache* to False may speed up the parsing of - the file. + {{read cfa_write: sequence of `str`, optional}} .. versionadded:: NEXTVERSION @@ -901,7 +437,6 @@ def read( `FieldList` or `DomainList` The field or domain constructs found in the input dataset(s). The list may be empty. - **Examples** >>> x = cf.read('file.nc') @@ -942,453 +477,501 @@ def read( """ - if field: - _DEPRECATION_ERROR_FUNCTION_KWARGS( - "cf.read", - {"field": field}, - "Use keyword 'extra' instead", - removed_at="4.0.0", - ) # pragma: no cover - - if select_options: - _DEPRECATION_ERROR_FUNCTION_KWARGS( - "cf.read", {"select_options": select_options}, removed_at="4.0.0" - ) # pragma: no cover - - if follow_symlinks: - _DEPRECATION_ERROR_FUNCTION_KWARGS( - "cf.read", - {"follow_symlinks": follow_symlinks}, - "Use keyword 'followlink' instead.", - removed_at="4.0.0", - ) # pragma: no cover - - if height_at_top_of_model is not None: - _DEPRECATION_ERROR_FUNCTION_KWARGS( - "cf.read", - {"height_at_top_of_model": height_at_top_of_model}, - "Use keyword 'um' instead.", - removed_at="4.0.0", - ) # pragma: no cover - - if chunk is not True: - _DEPRECATION_ERROR_FUNCTION_KWARGS( - "cf.read", - {"chunk": chunk}, - "Use keyword 'dask_chunks' instead.", - version="3.14.0", - removed_at="5.0.0", - ) # pragma: no cover - - if chunks != "auto": - _DEPRECATION_ERROR_FUNCTION_KWARGS( - "cf.read", - {"chunk": chunk}, - "Use keyword 'dask_chunks' instead.", - version="3.14.0", - removed_at="5.0.0", - ) # pragma: no cover - - # Parse select - if isinstance(select, (str, Query, Pattern)): - select = (select,) - - # Manage input parameters where contradictions are possible: - if cdl_string and fmt: - if fmt == "CDL": - if is_log_level_info(logger): - logger.info( - "It is not necessary to set the cf.read fmt as 'CDL' when " - "cdl_string is True, since that implies CDL is the format." - ) # pragma: no cover - else: - raise ValueError( - "cdl_string can only be True when the format is CDL, though " - "fmt is ignored in that case so there is no need to set it." - ) - if squeeze and unsqueeze: - raise ValueError("squeeze and unsqueeze can not both be True") - if follow_symlinks and not recursive: - raise ValueError( - f"Can't set follow_symlinks={follow_symlinks!r} " - f"when recursive={recursive!r}" - ) - - info = is_log_level_info(logger) - - # Initialise the output list of fields/domains - if domain: - out = DomainList() - else: - out = FieldList() - - if isinstance(aggregate, dict): - aggregate_options = aggregate.copy() - aggregate = True - else: - aggregate_options = {} - - aggregate_options["copy"] = False - - # Parse the extra parameter - if extra is None: - extra = () - elif isinstance(extra, str): - extra = (extra,) - - ftypes = set() - - # Count the number of fields (in all files) and the number of - # files - field_counter = -1 - file_counter = 0 - - if cdl_string: - files2 = [] - - # 'files' input may be a single string or a sequence of them and to - # handle both cases it is easiest to convert former to a one-item seq. - if isinstance(files, str): - files = [files] - - for cdl_file in files: - c = tempfile.NamedTemporaryFile( - mode="w", - dir=tempfile.gettempdir(), - prefix="cf_", - suffix=".cdl", - ) - c_name = c.name - with open(c_name, "w") as f: - f.write(cdl_file) + implementation = implementation() + + @_manage_log_level_via_verbosity + def __new__( + cls, + files, + external=None, + verbose=None, + warnings=False, + ignore_read_error=False, + aggregate=True, + nfields=None, + squeeze=False, + unsqueeze=False, + fmt=None, + cdl_string=False, + select=None, + extra=None, + recursive=False, + followlinks=False, + um=None, + chunk=True, + field=None, + height_at_top_of_model=None, + select_options=None, + follow_symlinks=False, + mask=True, + unpack=True, + warn_valid=False, + dask_chunks="storage-aligned", + store_hdf5_chunks=True, + domain=False, + cfa=None, + cfa_write=None, + netcdf_backend=None, + storage_options=None, + cache=True, + chunks="auto", + ): + """TODOCFA""" + if field: + _DEPRECATION_ERROR_FUNCTION_KWARGS( + "cf.read", + {"field": field}, + "Use keyword 'extra' instead", + removed_at="4.0.0", + ) # pragma: no cover - # ---------------------------------------------------------------- - # Need to cache the TemporaryFile object so that it doesn't get - # deleted too soon - # ---------------------------------------------------------------- - _cached_temporary_files[c_name] = c + if select_options: + _DEPRECATION_ERROR_FUNCTION_KWARGS( + "cf.read", + {"select_options": select_options}, + removed_at="4.0.0", + ) # pragma: no cover - files2.append(c.name) + if follow_symlinks: + _DEPRECATION_ERROR_FUNCTION_KWARGS( + "cf.read", + {"follow_symlinks": follow_symlinks}, + "Use keyword 'followlink' instead.", + removed_at="4.0.0", + ) # pragma: no cover - files = files2 + if height_at_top_of_model is not None: + _DEPRECATION_ERROR_FUNCTION_KWARGS( + "cf.read", + {"height_at_top_of_model": height_at_top_of_model}, + "Use keyword 'um' instead.", + removed_at="4.0.0", + ) # pragma: no cover - for file_glob in flat(files): - # Expand variables - file_glob = os.path.expanduser(os.path.expandvars(file_glob)) + if chunk is not True: + _DEPRECATION_ERROR_FUNCTION_KWARGS( + "cf.read", + {"chunk": chunk}, + "Use keyword 'dask_chunks' instead.", + version="3.14.0", + removed_at="5.0.0", + ) # pragma: no cover - scheme = urlparse(file_glob).scheme - if scheme in ("https", "http", "s3"): - # Do not glob a remote URL - files2 = (file_glob,) - else: - # Glob files on disk - files2 = glob(file_glob) - - if not files2 and not ignore_read_error: - open(file_glob, "rb") - - files3 = [] - for x in files2: - if isdir(x): - # Walk through directories, possibly recursively - for path, subdirs, filenames in os.walk( - x, followlinks=followlinks - ): - files3.extend(os.path.join(path, f) for f in filenames) - if not recursive: - break - else: - files3.append(x) + if chunks != "auto": + _DEPRECATION_ERROR_FUNCTION_KWARGS( + "cf.read", + {"chunk": chunk}, + "Use keyword 'dask_chunks' instead.", + version="3.14.0", + removed_at="5.0.0", + ) # pragma: no cover - files2 = files3 + # Parse select + if isinstance(select, (str, Query, Pattern)): + select = (select,) - for filename in files2: - if info: - logger.info(f"File: {filename}") # pragma: no cover + info = cfdm.is_log_level_info(logger) - if um: - ftype = "UM" + # Manage input parameters where contradictions are possible: + if cdl_string and fmt: + if fmt == "CDL": + if info: + logger.info( + "It is not necessary to set the cf.read fmt as 'CDL' when " + "cdl_string is True, since that implies CDL is the format." + ) # pragma: no cover else: - try: - ftype = file_type(filename) - except Exception as error: - if not ignore_read_error: - raise ValueError(error) - - logger.warning(f"WARNING: {error}") # pragma: no cover - continue - - if domain and ftype == "UM": raise ValueError( - f"Can't read PP/UM file {filename} into domain constructs" + "cdl_string can only be True when the format is CDL, though " + "fmt is ignored in that case so there is no need to set it." ) - - ftypes.add(ftype) - - # -------------------------------------------------------- - # Read the file - # -------------------------------------------------------- - file_contents = _read_a_file( - filename, - ftype=ftype, - external=external, - ignore_read_error=ignore_read_error, - verbose=verbose, - warnings=warnings, - aggregate=aggregate, - aggregate_options=aggregate_options, - selected_fmt=fmt, - um=um, - extra=extra, - height_at_top_of_model=height_at_top_of_model, - dask_chunks=dask_chunks, - store_hdf5_chunks=store_hdf5_chunks, - mask=mask, - unpack=unpack, - warn_valid=warn_valid, - select=select, - domain=domain, - cfa=cfa, - cfa_write=cfa_write, - netcdf_backend=netcdf_backend, - storage_options=storage_options, - cache=cache, + if squeeze and unsqueeze: + raise ValueError("squeeze and unsqueeze can not both be True") + if follow_symlinks and not recursive: + raise ValueError( + f"Can't set follow_symlinks={follow_symlinks!r} " + f"when recursive={recursive!r}" ) - # -------------------------------------------------------- - # Select matching fields (not from UM files, yet) - # -------------------------------------------------------- - if select and ftype != "UM": - file_contents = file_contents.select_by_identity(*select) - - # -------------------------------------------------------- - # Add this file's contents to that already read from other - # files - # -------------------------------------------------------- - out.extend(file_contents) - - field_counter = len(out) - file_counter += 1 - - if info: - logger.info( - f"Read {field_counter} field{_plural(field_counter)} from " - f"{file_counter} file{_plural(file_counter)}" - ) # pragma: no cover - - # ---------------------------------------------------------------- - # Aggregate the output fields/domains - # ---------------------------------------------------------------- - if aggregate and len(out) > 1: - org_len = len(out) # pragma: no cover - - out = cf_aggregate(out, **aggregate_options) - - n = len(out) # pragma: no cover - if info: - logger.info( - f"{org_len} input field{_plural(org_len)} aggregated into " - f"{n} field{_plural(n)}" - ) # pragma: no cover - - # ---------------------------------------------------------------- - # Sort by netCDF variable name - # ---------------------------------------------------------------- - if len(out) > 1: - out.sort(key=lambda f: f.nc_get_variable("")) - - # ---------------------------------------------------------------- - # Add standard names to UM/PP fields (post aggregation) - # ---------------------------------------------------------------- - for f in out: - standard_name = f._custom.get("standard_name", None) - if standard_name is not None: - f.set_property("standard_name", standard_name, copy=False) - del f._custom["standard_name"] - - # ---------------------------------------------------------------- - # Select matching fields from UM/PP fields (post setting of - # standard names) - # ---------------------------------------------------------------- - if select and "UM" in ftypes: - out = out.select_by_identity(*select) - - # ---------------------------------------------------------------- - # Squeeze size one dimensions from the data arrays. Do one of: - # - # 1) Squeeze the fields, i.e. remove all size one dimensions from - # all field data arrays - # - # 2) Unsqueeze the fields, i.e. Include all size 1 domain - # dimensions in the data array. - # - # 3) Nothing - # ---------------------------------------------------------------- - if not domain: - if squeeze: - for f in out: - f.squeeze(inplace=True) - elif unsqueeze: - for f in out: - f.unsqueeze(inplace=True) - - if nfields is not None and len(out) != nfields: - raise ValueError( - f"{nfields} field{_plural(nfields)} requested but " - f"{len(out)} field/domain constucts found in " - f"file{_plural(file_counter)}" - ) - - return out - - -def _plural(n): # pragma: no cover - """Return a suffix which reflects a word's plural.""" - return "s" if n != 1 else "" # pragma: no cover - - -@_manage_log_level_via_verbosity -def _read_a_file( - filename, - ftype=None, - aggregate=True, - aggregate_options=None, - ignore_read_error=False, - verbose=None, - warnings=False, - external=None, - selected_fmt=None, - um=None, - extra=None, - height_at_top_of_model=None, - mask=True, - unpack=True, - warn_valid=False, - dask_chunks="storage-aligned", - store_hdf5_chunks=True, - select=None, - domain=False, - cfa=None, - cfa_write=None, - netcdf_backend=None, - storage_options=None, - cache=True, -): - """Read the contents of a single file into a field list. + netcdf = NetCDFRead(cls.implementation) - :Parameters: - - filename: `str` - See `cf.read` for details. + # Initialise the output list of fields/domains + if domain: + out = DomainList() + else: + out = FieldList() - ftype: `str` - The file format to interpret the file. Recognised formats are - ``'netCDF'``, ``'CDL'``, ``'UM'`` and ``'PP'``. + if isinstance(aggregate, dict): + aggregate_options = aggregate.copy() + aggregate = True + else: + aggregate_options = {} + + aggregate_options["copy"] = False + + # Parse the extra parameter + if extra is None: + extra = () + elif isinstance(extra, str): + extra = (extra,) + + ftypes = set() + + # Count the number of fields (in all files) and the number of + # files + field_counter = -1 + file_counter = 0 + + if cdl_string: + files2 = [] + + # 'files' input may be a single string or a sequence of + # them and to handle both cases it is easiest to convert + # former to a one-item seq. + if isinstance(files, str): + files = [files] + + for cdl_file in files: + c = tempfile.NamedTemporaryFile( + mode="w", + dir=tempfile.gettempdir(), + prefix="cf_", + suffix=".cdl", + ) - aggregate_options: `dict`, optional - See `cf.read` for details. + c_name = c.name + with open(c_name, "w") as f: + f.write(cdl_file) - ignore_read_error: `bool`, optional - See `cf.read` for details. + # Need to cache the TemporaryFile object so that it + # doesn't get deleted too soon + _cached_temporary_files[c_name] = c - mask: `bool`, optional - See `cf.read` for details. + files2.append(c.name) - unpack: `bool`, optional - See `cf.read` for details. + files = files2 - verbose: `int` or `str` or `None`, optional - See `cf.read` for details. + for file_glob in flat(files): + # Expand variables + file_glob = os.path.expanduser(os.path.expandvars(file_glob)) - select: optional - For `read. Ignored for a netCDF file. - - domain: `bool`, optional - See `cf.read` for details. + scheme = urlparse(file_glob).scheme + if scheme in ("https", "http", "s3"): + # Do not glob a remote URL + files2 = (file_glob,) + else: + # Glob files on disk + files2 = glob(file_glob) + + if not files2 and not ignore_read_error: + open(file_glob, "rb") + + files3 = [] + for x in files2: + if isdir(x): + # Walk through directories, possibly recursively + for path, subdirs, filenames in os.walk( + x, followlinks=followlinks + ): + files3.extend( + os.path.join(path, f) for f in filenames + ) + if not recursive: + break + else: + files3.append(x) + + files2 = files3 + + for filename in files2: + if info: + logger.info(f"File: {filename}") # pragma: no cover + + if um: + ftype = "UM" + else: + try: + ftype = cls.file_type(filename) + except Exception as error: + if not ignore_read_error: + raise ValueError(error) + + logger.warning(f"WARNING: {error}") # pragma: no cover + continue + + if domain and ftype == "UM": + raise ValueError( + f"Can't read PP/UM file {filename} into domain constructs" + ) + + ftypes.add(ftype) + + # -------------------------------------------------------- + # Read the file + # -------------------------------------------------------- + file_contents = cls._read_a_file( + filename, + ftype=ftype, + external=external, + ignore_read_error=ignore_read_error, + verbose=verbose, + warnings=warnings, + aggregate=aggregate, + aggregate_options=aggregate_options, + selected_fmt=fmt, + um=um, + extra=extra, + height_at_top_of_model=height_at_top_of_model, + dask_chunks=dask_chunks, + store_hdf5_chunks=store_hdf5_chunks, + mask=mask, + unpack=unpack, + warn_valid=warn_valid, + select=select, + domain=domain, + cfa=cfa, + cfa_write=cfa_write, + netcdf_backend=netcdf_backend, + storage_options=storage_options, + cache=cache, + ) - cfa: `dict`, optional - See `cf.read` for details. + # -------------------------------------------------------- + # Select matching fields (not from UM files, yet) + # -------------------------------------------------------- + if select and ftype != "UM": + file_contents = file_contents.select_by_identity(*select) - .. versionadded:: 3.15.0 + # -------------------------------------------------------- + # Add this file's contents to that already read from other + # files + # -------------------------------------------------------- + out.extend(file_contents) - storage_options: `dict` or `None`, optional - See `cf.read` for details. + field_counter = len(out) + file_counter += 1 - .. versionadded:: NEXTVERSION + if info: + logger.info( + f"Read {field_counter} field{cls._plural(field_counter)} from " + f"{file_counter} file{cls._plural(file_counter)}" + ) # pragma: no cover - netcdf_backend: `str` or `None`, optional - See `cf.read` for details. + # ---------------------------------------------------------------- + # Aggregate the output fields/domains + # ---------------------------------------------------------------- + if aggregate and len(out) > 1: + org_len = len(out) # pragma: no cover - .. versionadded:: NEXTVERSION + out = cf_aggregate(out, **aggregate_options) - cache: `bool`, optional - See `cf.read` for details. + n = len(out) # pragma: no cover + if info: + logger.info( + f"{org_len} input field{cls._plural(org_len)} aggregated into " + f"{n} field{cls._plural(n)}" + ) # pragma: no cover - .. versionadded:: NEXTVERSION + # ---------------------------------------------------------------- + # Sort by netCDF variable name + # ---------------------------------------------------------------- + if len(out) > 1: + out.sort(key=lambda f: f.nc_get_variable("")) + + # ---------------------------------------------------------------- + # Add standard names to UM/PP fields (post aggregation) + # ---------------------------------------------------------------- + for f in out: + standard_name = f._custom.get("standard_name", None) + if standard_name is not None: + f.set_property("standard_name", standard_name, copy=False) + del f._custom["standard_name"] + + # ---------------------------------------------------------------- + # Select matching fields from UM/PP fields (post setting of + # standard names) + # ---------------------------------------------------------------- + if select and "UM" in ftypes: + out = out.select_by_identity(*select) + + # ---------------------------------------------------------------- + # Squeeze size one dimensions from the data arrays. Do one of: + # + # 1) Squeeze the fields, i.e. remove all size one dimensions from + # all field data arrays + # + # 2) Unsqueeze the fields, i.e. Include all size 1 domain + # dimensions in the data array. + # + # 3) Nothing + # ---------------------------------------------------------------- + if not domain: + if squeeze: + for f in out: + f.squeeze(inplace=True) + elif unsqueeze: + for f in out: + f.unsqueeze(inplace=True) + + if nfields is not None and len(out) != nfields: + raise ValueError( + f"{nfields} field{cls._plural(nfields)} requested but " + f"{len(out)} field/domain constucts found in " + f"file{cls._plural(file_counter)}" + ) - :Returns: + return out - `FieldList` or `DomainList` - The field or domain constructs in the dataset. + @staticmethod + def _plural(n): # pragma: no cover + """Return a suffix which reflects a word's plural.""" + return "s" if n != 1 else "" # pragma: no cover + + @classmethod + @_manage_log_level_via_verbosity + def _read_a_file( + cls, + filename, + ftype=None, + aggregate=True, + aggregate_options=None, + ignore_read_error=False, + verbose=None, + warnings=False, + external=None, + selected_fmt=None, + um=None, + extra=None, + height_at_top_of_model=None, + mask=True, + unpack=True, + warn_valid=False, + dask_chunks="storage-aligned", + store_hdf5_chunks=True, + select=None, + domain=False, + cfa=None, + cfa_write=None, + netcdf_backend=None, + storage_options=None, + cache=True, + ): + """Read the contents of a single file into a field list. + + :Parameters: + + filename: `str` + See `cf.read` for details. + + ftype: `str` + The file format to interpret the file. Recognised formats are + ``'netCDF'``, ``'CDL'``, ``'UM'`` and ``'PP'``. + + aggregate_options: `dict`, optional + See `cf.read` for details. + + ignore_read_error: `bool`, optional + See `cf.read` for details. + + mask: `bool`, optional + See `cf.read` for details. + + unpack: `bool`, optional + See `cf.read` for details. + + verbose: `int` or `str` or `None`, optional + See `cf.read` for details. + + select: optional + For `read. Ignored for a netCDF file. + + domain: `bool`, optional + See `cf.read` for details. + + cfa: `dict`, optional + See `cf.read` for details. + + .. versionadded:: 3.15.0 + + storage_options: `dict` or `None`, optional + See `cf.read` for details. + + .. versionadded:: NEXTVERSION + + netcdf_backend: `str` or `None`, optional + See `cf.read` for details. + + .. versionadded:: NEXTVERSION + + cache: `bool`, optional + See `cf.read` for details. + + .. versionadded:: NEXTVERSION + + :Returns: + + `FieldList` or `DomainList` + The field or domain constructs in the dataset. + + """ + if aggregate_options is None: + aggregate_options = {} + + # Find this file's type + fmt = None + word_size = None + endian = None + height_at_top_of_model = None + umversion = 405 + + if um: + fmt = um.get("fmt") + word_size = um.get("word_size") + endian = um.get("endian") + umversion = um.get("version", umversion) + height_at_top_of_model = um.get("height_at_top_of_model") + + if fmt is not None: + fmt = fmt.upper() + + if umversion is not None: + umversion = float(str(umversion).replace(".", "0", 1)) + + extra_read_vars = { + "fmt": selected_fmt, + "ignore_read_error": ignore_read_error, + } + + # ---------------------------------------------------------------- + # Still here? Read the file into fields or domains. + # ---------------------------------------------------------------- + originally_cdl = ftype == "CDL" + if originally_cdl: + # Create a temporary netCDF file from input CDL + ftype = "netCDF" + cdl_filename = filename + filename = netcdf.cdl_to_netcdf(filename) + extra_read_vars["fmt"] = "NETCDF" - """ - if aggregate_options is None: - aggregate_options = {} - - # Find this file's type - fmt = None - word_size = None - endian = None - height_at_top_of_model = None - umversion = 405 - - if um: - fmt = um.get("fmt") - word_size = um.get("word_size") - endian = um.get("endian") - umversion = um.get("version", umversion) - height_at_top_of_model = um.get("height_at_top_of_model") - - if fmt is not None: - fmt = fmt.upper() - - if umversion is not None: - umversion = float(str(umversion).replace(".", "0", 1)) - - extra_read_vars = { - "fmt": selected_fmt, - "ignore_read_error": ignore_read_error, - } - - # ---------------------------------------------------------------- - # Still here? Read the file into fields or domains. - # ---------------------------------------------------------------- - originally_cdl = ftype == "CDL" - if originally_cdl: - # Create a temporary netCDF file from input CDL - ftype = "netCDF" - cdl_filename = filename - filename = netcdf.cdl_to_netcdf(filename) - extra_read_vars["fmt"] = "NETCDF" - - if not netcdf.is_netcdf_file(filename): - error_msg = ( - f"Can't determine format of file {filename} generated " - f"from CDL file {cdl_filename}" - ) - if ignore_read_error: - logger.warning(error_msg) # pragma: no cover - return FieldList() - else: - raise IOError(error_msg) - - if ftype == "netCDF" and extra_read_vars["fmt"] in (None, "NETCDF", "CFA"): - # See https://github.com/NCAS-CMS/cfdm/issues/128 for context on the - # try/except here, which acts as a temporary fix pending decisions on - # the best way to handle CDL with only header or coordinate info. - try: - out = netcdf.read( + if not netcdf.is_netcdf_file(filename): + error_msg = ( + f"Can't determine format of file {filename} generated " + f"from CDL file {cdl_filename}" + ) + if ignore_read_error: + logger.warning(error_msg) # pragma: no cover + return FieldList() + else: + raise IOError(error_msg) + + if ftype == "netCDF" and extra_read_vars["fmt"] in ( + None, + "NETCDF", + "CFA", + ): + out = super().__new__( + cls, filename, external=external, extra=extra, @@ -1407,92 +990,1578 @@ def _read_a_file( cfa=cfa, cfa_write=cfa_write, ) - except MaskError: - # Some data required for field interpretation is missing, - # manifesting downstream as a NumPy MaskError. - if originally_cdl: + # # See https://github.com/NCAS-CMS/cfdm/issues/128 for context on the + # # try/except here, which acts as a temporary fix pending decisions on + # # the best way to handle CDL with only header or coordinate info. + # try: + # out = netcdf.read( + # filename, + # external=external, + # extra=extra, + # verbose=verbose, + # warnings=warnings, + # extra_read_vars=extra_read_vars, + # mask=mask, + # unpack=unpack, + # warn_valid=warn_valid, + # domain=domain, + # storage_options=storage_options, + # netcdf_backend=netcdf_backend, + # dask_chunks=dask_chunks, + # store_hdf5_chunks=store_hdf5_chunks, + # cache=cache, + # cfa=cfa, + # cfa_write=cfa_write, + # ) + # except MaskError: + # # Some data required for field interpretation is missing, + # # manifesting downstream as a NumPy MaskError. + # if originally_cdl: + # raise ValueError( + # "Unable to convert CDL without data to field construct(s) " + # "because there is insufficient information provided by " + # "the header and/or coordinates alone in this case." + # ) + # else: + # raise ValueError( + # "Unable to convert netCDF to field or domain construct " + # "because there is missing data." + # ) + + elif ftype == "UM" and extra_read_vars["fmt"] in (None, "UM"): + if domain: raise ValueError( - "Unable to convert CDL without data to field construct(s) " - "because there is insufficient information provided by " - "the header and/or coordinates alone in this case." - ) - else: - raise ValueError( - "Unable to convert netCDF to field or domain construct " - "because there is missing data." + "Can't set domain=True when reading UM or PP datasets" ) - elif ftype == "UM" and extra_read_vars["fmt"] in (None, "UM"): - if domain: - raise ValueError( - "Can't set domain=True when reading UM or PP datasets" + out = UM.read( + filename, + um_version=umversion, + verbose=verbose, + set_standard_name=False, + height_at_top_of_model=height_at_top_of_model, + fmt=fmt, + word_size=word_size, + endian=endian, + select=select, ) - out = UM.read( - filename, - um_version=umversion, - verbose=verbose, - set_standard_name=False, - height_at_top_of_model=height_at_top_of_model, - fmt=fmt, - word_size=word_size, - endian=endian, - select=select, - ) - - # PP fields are aggregated intrafile prior to interfile - # aggregation - if aggregate: - # For PP fields, the default is strict_units=False - if "strict_units" not in aggregate_options: - aggregate_options["relaxed_units"] = True - - # ---------------------------------------------------------------- - # Return the fields - # ---------------------------------------------------------------- - if domain: - return DomainList(out) - - return FieldList(out) - - -def file_type(filename): - """Return the file format. - - :Parameters: - - filename: `str` - The file name. - - :Returns: - - `str` - The format type of the file. One of ``'netCDF'``, ``'UM'`` - or ``'CDL'``. - - **Examples** - - >>> file_type(filename) - 'netCDF' + # PP fields are aggregated intrafile prior to interfile + # aggregation + if aggregate: + # For PP fields, the default is strict_units=False + if "strict_units" not in aggregate_options: + aggregate_options["relaxed_units"] = True - """ - # ---------------------------------------------------------------- - # NetCDF - # ---------------------------------------------------------------- - if netcdf.is_netcdf_file(filename): - return "netCDF" - - # ---------------------------------------------------------------- - # PP or FF - # ---------------------------------------------------------------- - if UM.is_um_file(filename): - return "UM" - - # ---------------------------------------------------------------- - # CDL - # ---------------------------------------------------------------- - if netcdf.is_cdl_file(filename): - return "CDL" - - # Still here? - raise IOError(f"Can't determine format of file {filename}") + # Return the fields/domains + if domain: + return DomainList(out) + + return FieldList(out) + + @classmethod + def file_type(cls, filename): + """Return the file format. + + :Parameters: + + filename: `str` + The file name. + + :Returns: + + `str` + The format type of the file. One of ``'netCDF'``, ``'UM'`` + or ``'CDL'``. + + **Examples** + + >>> file_type(filename) + 'netCDF' + + """ + # ---------------------------------------------------------------- + # NetCDF + # ---------------------------------------------------------------- + if netcdf.is_netcdf_file(filename): + return "netCDF" + + # ---------------------------------------------------------------- + # PP or FF + # ---------------------------------------------------------------- + if UM.is_um_file(filename): + return "UM" + + # ---------------------------------------------------------------- + # CDL + # ---------------------------------------------------------------- + if netcdf.is_cdl_file(filename): + return "CDL" + + # Still here? + raise IOError(f"Can't determine format of file {filename}") + + +# @_manage_log_level_via_verbosity +# def read_old( +# files, +# external=None, +# verbose=None, +# warnings=False, +# ignore_read_error=False, +# aggregate=True, +# nfields=None, +# squeeze=False, +# unsqueeze=False, +# fmt=None, +# cdl_string=False, +# select=None, +# extra=None, +# recursive=False, +# followlinks=False, +# um=None, +# chunk=True, +# field=None, +# height_at_top_of_model=None, +# select_options=None, +# follow_symlinks=False, +# mask=True, +# unpack=True, +# warn_valid=False, +# dask_chunks="storage-aligned", +# store_hdf5_chunks=True, +# domain=False, +# cfa=None, +# cfa_write=None, +# netcdf_backend=None, +# storage_options=None, +# cache=True, +# chunks="auto", +# ): +# """Read field or domain constructs from files. +# +# The following file formats are supported: netCDF, CFA-netCDF, CDL, +# UM fields file, and PP. +# +# Input datasets are mapped to constructs in memory which are +# returned as elements of a `FieldList` or if the *domain* parameter +# is True, a `DomainList`. +# +# NetCDF files may be on disk, on an OPeNDAP server, or in an S3 +# object store. +# +# Any amount of files of any combination of file types may be read. +# +# **NetCDF unlimited dimensions** +# +# Domain axis constructs that correspond to NetCDF unlimited +# dimensions may be accessed with the +# `~cf.DomainAxis.nc_is_unlimited` and +# `~cf.DomainAxis.nc_set_unlimited` methods of a domain axis +# construct. +# +# **NetCDF hierarchical groups** +# +# Hierarchical groups in CF provide a mechanism to structure +# variables within netCDF4 datasets. Field constructs are +# constructed from grouped datasets by applying the well defined +# rules in the CF conventions for resolving references to +# out-of-group netCDF variables and dimensions. The group structure +# is preserved in the field construct's netCDF interface. Groups +# were incorporated into CF-1.8. For files with groups that state +# compliance to earlier versions of the CF conventions, the groups +# will be interpreted as per the latest release of CF. +# +# **CF-compliance** +# +# If the dataset is partially CF-compliant to the extent that it is +# not possible to unambiguously map an element of the netCDF dataset +# to an element of the CF data model, then a field construct is +# still returned, but may be incomplete. This is so that datasets +# which are partially conformant may nonetheless be modified in +# memory and written to new datasets. +# +# Such "structural" non-compliance would occur, for example, if the +# "coordinates" attribute of a CF-netCDF data variable refers to +# another variable that does not exist, or refers to a variable that +# spans a netCDF dimension that does not apply to the data +# variable. Other types of non-compliance are not checked, such +# whether or not controlled vocabularies have been adhered to. The +# structural compliance of the dataset may be checked with the +# `~cf.Field.dataset_compliance` method of the field construct, as +# well as optionally displayed when the dataset is read by setting +# the warnings parameter. +# +# **CDL files** +# +# A file is considered to be a CDL representation of a netCDF +# dataset if it is a text file whose first non-comment line starts +# with the seven characters "netcdf " (six letters followed by a +# space). A comment line is identified as one which starts with any +# amount white space (including none) followed by "//" (two +# slashes). It is converted to a temporary netCDF4 file using the +# external ``ncgen`` command, and the temporary file persists until +# the end of the Python session, at which time it is automatically +# deleted. The CDL file may omit data array values (as would be the +# case, for example, if the file was created with the ``-h`` or +# ``-c`` option to ``ncdump``), in which case the the relevant +# constructs in memory will be created with data with all missing +# values. +# +# **PP and UM fields files** +# +# 32-bit and 64-bit PP and UM fields files of any endian-ness can be +# read. In nearly all cases the file format is auto-detected from +# the first 64 bits in the file, but for the few occasions when this +# is not possible, the *um* keyword allows the format to be +# specified, as well as the UM version (if the latter is not +# inferrable from the PP or lookup header information). +# +# 2-d "slices" within a single file are always combined, where +# possible, into field constructs with 3-d, 4-d or 5-d data. This is +# done prior to any field construct aggregation (see the *aggregate* +# parameter). +# +# When reading PP and UM fields files, the *relaxed_units* aggregate +# option is set to `True` by default, because units are not always +# available to field constructs derived from UM fields files or PP +# files. +# +# **Performance** +# +# Descriptive properties are always read into memory, but lazy +# loading is employed for all data arrays which means that, in +# general, data is not read into memory until the data is required +# for inspection or to modify the array contents. This maximises the +# number of field constructs that may be read within a session, and +# makes the read operation fast. The exceptions to the lazy reading +# of data arrays are: +# +# * Data that define purely structural elements of other data arrays +# that are compressed by convention (such as a count variable for +# a ragged contiguous array). These are always read from disk. +# +# * If field or domain aggregation is in use (as it is by default, +# see the *aggregate* parameter), then the data of metadata +# constructs may have to be read to determine how the contents of +# the input files may be aggregated. This won't happen for a +# particular field or domain's metadata, though, if it can be +# ascertained from descriptive properties alone that it can't be +# aggregated with anything else (as would be the case, for +# instance, when a field has a unique standard name). +# +# However, when two or more field or domain constructs are +# aggregated to form a single construct then the data arrays of some +# metadata constructs (coordinates, cell measures, etc.) must be +# compared non-lazily to ascertain if aggregation is possible. +# +# .. seealso:: `cf.aggregate`, `cf.write`, `cf.Field`, `cf.Domain`, +# `cf.load_stash2standard_name`, `cf.unique_constructs` +# +# :Parameters: +# +# files: (arbitrarily nested sequence of) `str` +# A string or arbitrarily nested sequence of strings giving +# the file names, directory names, or OPenDAP URLs from +# which to read field constructs. Various type of expansion +# are applied to the names: +# +# ==================== ====================================== +# Expansion Description +# ==================== ====================================== +# Tilde An initial component of ``~`` or +# ``~user`` is replaced by that *user*'s +# home directory. +# +# Environment variable Substrings of the form ``$name`` or +# ``${name}`` are replaced by the value +# of environment variable *name*. +# +# Pathname A string containing UNIX file name +# metacharacters as understood by the +# Python `glob` module is replaced by +# the list of matching file names. This +# type of expansion is ignored for +# OPenDAP URLs. +# ==================== ====================================== +# +# Where more than one type of expansion is used in the same +# string, they are applied in the order given in the above +# table. +# +# *Parameter example:* +# The file ``file.nc`` in the user's home directory could +# be described by any of the following: +# ``'$HOME/file.nc'``, ``'${HOME}/file.nc'``, +# ``'~/file.nc'``, ``'~/tmp/../file.nc'``. +# +# When a directory is specified, all files in that directory +# are read. Sub-directories are not read unless the +# *recursive* parameter is True. If any directories contain +# files that are not valid datasets then an exception will +# be raised, unless the *ignore_read_error* parameter is +# True. +# +# As a special case, if the `cdl_string` parameter is set to +# True, the interpretation of `files` changes so that each +# value is assumed to be a string of CDL input rather +# than the above. +# +# external: (sequence of) `str`, optional +# Read external variables (i.e. variables which are named by +# attributes, but are not present, in the parent file given +# by the *filename* parameter) from the given external +# files. Ignored if the parent file does not contain a +# global "external_variables" attribute. Multiple external +# files may be provided, which are searched in random order +# for the required external variables. +# +# If an external variable is not found in any external +# files, or is found in multiple external files, then the +# relevant metadata construct is still created, but without +# any metadata or data. In this case the construct's +# `!is_external` method will return `True`. +# +# *Parameter example:* +# ``external='cell_measure.nc'`` +# +# *Parameter example:* +# ``external=['cell_measure.nc']`` +# +# *Parameter example:* +# ``external=('cell_measure_A.nc', 'cell_measure_O.nc')`` +# +# extra: (sequence of) `str`, optional +# Create extra, independent field constructs from netCDF +# variables that correspond to particular types metadata +# constructs. The *extra* parameter may be one, or a +# sequence, of: +# +# ========================== =============================== +# *extra* Metadata constructs +# ========================== =============================== +# ``'field_ancillary'`` Field ancillary constructs +# ``'domain_ancillary'`` Domain ancillary constructs +# ``'dimension_coordinate'`` Dimension coordinate constructs +# ``'auxiliary_coordinate'`` Auxiliary coordinate constructs +# ``'cell_measure'`` Cell measure constructs +# ========================== =============================== +# +# This parameter replaces the deprecated *field* parameter. +# +# *Parameter example:* +# To create field constructs from auxiliary coordinate +# constructs: ``extra='auxiliary_coordinate'`` or +# ``extra=['auxiliary_coordinate']``. +# +# *Parameter example:* +# To create field constructs from domain ancillary and +# cell measure constructs: ``extra=['domain_ancillary', +# 'cell_measure']``. +# +# An extra field construct created via the *extra* parameter +# will have a domain limited to that which can be inferred +# from the corresponding netCDF variable, but without the +# connections that are defined by the parent netCDF data +# variable. It is possible to create independent fields from +# metadata constructs that do incorporate as much of the +# parent field construct's domain as possible by using the +# `~cf.Field.convert` method of a returned field construct, +# instead of setting the *extra* parameter. +# +# verbose: `int` or `str` or `None`, optional +# If an integer from ``-1`` to ``3``, or an equivalent string +# equal ignoring case to one of: +# +# * ``'DISABLE'`` (``0``) +# * ``'WARNING'`` (``1``) +# * ``'INFO'`` (``2``) +# * ``'DETAIL'`` (``3``) +# * ``'DEBUG'`` (``-1``) +# +# set for the duration of the method call only as the minimum +# cut-off for the verboseness level of displayed output (log) +# messages, regardless of the globally-configured `cf.log_level`. +# Note that increasing numerical value corresponds to increasing +# verbosity, with the exception of ``-1`` as a special case of +# maximal and extreme verbosity. +# +# Otherwise, if `None` (the default value), output messages will +# be shown according to the value of the `cf.log_level` setting. +# +# Overall, the higher a non-negative integer or equivalent string +# that is set (up to a maximum of ``3``/``'DETAIL'``) for +# increasing verbosity, the more description that is printed to +# convey how the contents of the netCDF file were parsed and +# mapped to CF data model constructs. +# +# warnings: `bool`, optional +# If True then print warnings when an output field construct +# is incomplete due to structural non-compliance of the +# dataset. By default such warnings are not displayed. +# +# ignore_read_error: `bool`, optional +# If True then ignore any file which raises an IOError +# whilst being read, as would be the case for an empty file, +# unknown file format, etc. By default the IOError is +# raised. +# +# fmt: `str`, optional +# Only read files of the given format, ignoring all other +# files. Valid formats are ``'NETCDF'`` for CF-netCDF files, +# ``'CFA'`` for CFA-netCDF files, ``'UM'`` for PP or UM +# fields files, and ``'CDL'`` for CDL text files. By default +# files of any of these formats are read. +# +# cdl_string: `bool`, optional +# If True and the format to read is CDL, read a string +# input, or sequence of string inputs, each being interpreted +# as a string of CDL rather than names of locations from +# which field constructs can be read from, as standard. +# +# By default, each string input or string element in the input +# sequence is taken to be a file or directory name or an +# OPenDAP URL from which to read field constructs, rather +# than a string of CDL input, including when the `fmt` +# parameter is set as CDL. +# +# Note that when `cdl_string` is True, the `fmt` parameter is +# ignored as the format is assumed to be CDL, so in that case +# it is not necessary to also specify ``fmt='CDL'``. +# +# aggregate: `bool` or `dict`, optional +# If True (the default) or a dictionary (possibly empty) +# then aggregate the field constructs read in from all input +# files into as few field constructs as possible by passing +# all of the field constructs found the input files to the +# `cf.aggregate`, and returning the output of this function +# call. +# +# If *aggregate* is a dictionary then it is used to +# configure the aggregation process passing its contents as +# keyword arguments to the `cf.aggregate` function. +# +# If *aggregate* is False then the field constructs are not +# aggregated. +# +# squeeze: `bool`, optional +# If True then remove size 1 axes from each field construct's +# data array. +# +# unsqueeze: `bool`, optional +# If True then insert size 1 axes from each field +# construct's domain into its data array. +# +# select: (sequence of) `str` or `Query` or `re.Pattern`, optional +# Only return field constructs whose identities match the +# given values(s), i.e. those fields ``f`` for which +# ``f.match_by_identity(*select)`` is `True`. See +# `cf.Field.match_by_identity` for details. +# +# This is equivalent to, but faster than, not using the +# *select* parameter but applying its value to the returned +# field list with its `cf.FieldList.select_by_identity` +# method. For example, ``fl = cf.read(file, +# select='air_temperature')`` is equivalent to ``fl = +# cf.read(file).select_by_identity('air_temperature')``. +# +# recursive: `bool`, optional +# If True then recursively read sub-directories of any +# directories specified with the *files* parameter. +# +# followlinks: `bool`, optional +# If True, and *recursive* is True, then also search for +# files in sub-directories which resolve to symbolic +# links. By default directories which resolve to symbolic +# links are ignored. Ignored of *recursive* is False. Files +# which are symbolic links are always followed. +# +# Note that setting ``recursive=True, followlinks=True`` can +# lead to infinite recursion if a symbolic link points to a +# parent directory of itself. +# +# This parameter replaces the deprecated *follow_symlinks* +# parameter. +# +# mask: `bool`, optional +# If True (the default) then mask by convention the data of +# field and metadata constructs. +# +# A netCDF array is masked depending on the values of any of +# the netCDF attributes ``_FillValue``, ``missing_value``, +# ``_Unsigned``, ``valid_min``, ``valid_max``, and +# ``valid_range``. +# +# The masking by convention of a PP or UM array depends on +# the value of BMDI in the lookup header. A value other than +# ``-1.0e30`` indicates the data value to be masked. +# +# See +# https://ncas-cms.github.io/cf-python/tutorial.html#data-mask +# for details. +# +# .. versionadded:: 3.4.0 +# +# unpack: `bool`, optional +# If True, the default, then unpack arrays by convention +# when the data is read from disk. +# +# Unpacking is determined by netCDF conventions for the +# following variable attributes: ``add_offset``, +# ``scale_factor``, and ``_Unsigned``. +# +# .. versionadded:: NEXTVERSION +# +# warn_valid: `bool`, optional +# If True then print a warning for the presence of +# ``valid_min``, ``valid_max`` or ``valid_range`` properties +# on field constructs and metadata constructs that have +# data. By default no such warning is issued. +# +# "Out-of-range" data values in the file, as defined by any +# of these properties, are automatically masked by default, +# which may not be as intended. See the *mask* parameter for +# turning off all automatic masking. +# +# See +# https://ncas-cms.github.io/cf-python/tutorial.html#data-mask +# for details. +# +# .. versionadded:: 3.4.0 +# +# um: `dict`, optional +# For Met Office (UK) PP files and Met Office (UK) fields +# files only, provide extra decoding instructions. This +# option is ignored for input files which are not PP or +# fields files. In most cases, how to decode a file is +# inferrable from the file's contents, but if not then each +# key/value pair in the dictionary sets a decoding option as +# follows: +# +# ============================ ===================================== +# Key Value +# ============================ ===================================== +# ``'fmt'`` The file format (``'PP'`` or +# ``'FF'``) +# +# ``'word_size'`` The word size in bytes +# (``4`` or ``8``). +# +# ``'endian'`` The byte order (``'big'`` or +# ``'little'``). +# +# ``'version'`` The UM version to be used +# when decoding the +# header. Valid versions are, +# for example, ``4.2``, +# ``'6.6.3'`` and +# ``'8.2'``. In general, a +# given version is ignored if +# it can be inferred from the +# header (which is usually the +# case for files created by +# the UM at versions 5.3 and +# later). The exception to +# this is when the given +# version has a third element +# (such as the 3 in 6.6.3), in +# which case any version in +# the header is ignored. +# +# The default version is +# ``4.5``. +# +# ``'height_at_top_of_model'`` The height (in metres) of +# the upper bound of the top +# model level. By default the +# height at top model is taken +# from the top level's upper +# bound defined by BRSVD1 in +# the lookup header. If the +# height can't be determined +# from the header, or the +# given height is less than or +# equal to 0, then a +# coordinate reference system +# will still be created that +# contains the 'a' and 'b' +# formula term values, but +# without an atmosphere hybrid +# height dimension coordinate +# construct. +# +# .. note:: A current +# limitation is that if +# pseudolevels and +# atmosphere hybrid height +# coordinates are defined +# by same the lookup +# headers then the height +# **can't be determined +# automatically**. In this +# case the height may be +# found after reading as +# the maximum value of the +# bounds of the domain +# ancillary construct +# containing the 'a' +# formula term. The file +# can then be re-read with +# this height as a *um* +# parameter. +# ============================ ===================================== +# +# If format is specified as ``'PP'`` then the word size and +# byte order default to ``4`` and ``'big'`` respectively. +# +# This parameter replaces the deprecated *umversion* and +# *height_at_top_of_model* parameters. +# +# *Parameter example:* +# To specify that the input files are 32-bit, big-endian +# PP files: ``um={'fmt': 'PP'}`` +# +# *Parameter example:* +# To specify that the input files are 32-bit, +# little-endian PP files from version 5.1 of the UM: +# ``um={'fmt': 'PP', 'endian': 'little', 'version': 5.1}`` +# +# .. versionadded:: 1.5 +# +# dask_chunks: `str`, `int`, `None`, or `dict`, optional +# Specify the Dask chunking for data. May be one of the +# following: +# +# * ``'storage-aligned'`` +# +# This is the default. The Dask chunk size in bytes will +# be as close as possible the size given by +# `cf.chunksize`, favouring square-like chunk shapes, +# with the added restriction that the entirety of each +# storage chunk must also lie within exactly one Dask +# chunk. +# +# When reading the data from disk, an entire storage chunk +# will be read once per Dask storage chunk that contains +# any part of it, so ensuring that a storage chunk lies +# within only one Dask chunk can increase performance by +# reducing the amount of disk access (particularly when +# the data are stored remotely to the client). +# +# For instance, consider a file variable that has an array +# of 64-bit floats with shape (400, 300, 60) and a storage +# chunk shape of (100, 5, 60), giving 240 storage chunks +# each of size 100*5*60*8 bytes = 0.23 MiB. Then: +# +# * If `cf.chunksize` returned 134217728 (i.e. 128 MiB), +# then the storage-aligned Dask chunks will have shape +# (400, 300, 60), giving 1 Dask chunk with size of 54.93 +# MiB (compare with a Dask chunk shape of (400, 300, 60) +# and size 54.93 MiB, if *dask_chunks* were ``'auto'``.) +# +# * If `cf.chunksize` returned 33554432 (i.e. 32 MiB), +# then the storage-aligned Dask chunks will have shape +# (200, 260, 60), giving 4 Dask chunks with a maximum +# size of 23.80 MiB (compare with a Dask chunk shape of +# (264, 264, 60) and maximum size 31.90 MiB, if +# *dask_chunks* were ``'auto'``.) +# +# * If `cf.chunksize` returned 4194304 (i.e. 4 MiB), +# then the storage-aligned Dask chunks will have shape +# (100, 85, 60), giving 16 Dask chunks with a maximum +# size of 3.89 MiB (compare with a Dask chunk shape of +# (93, 93, 60) and maximum size 3.96 MiB, if +# *dask_chunks* were ``'auto'``.) +# +# There are, however, some occasions when, for particular +# data arrays in the file, the ``'auto'`` option will +# automatically be used instead of storage-aligned Dask +# chunks. This occurs when: +# +# * The data array in the file is stored contiguously. +# +# * The data array in the file is compressed by convention +# (e.g. ragged array representations, compression by +# gathering, subsampled coordinates, etc.). In this case +# the Dask chunks are for the uncompressed data, and so +# cannot be aligned with the storage chunks of the +# compressed array in the file. +# +# * ``'storage-exact'`` +# +# Each Dask chunk will contain exactly one storage chunk +# and each storage chunk will lie within exactly one Dask +# chunk. +# +# For instance, consider a file variable that has an array +# of 64-bit floats with shape (400, 300, 60) and a storage +# chunk shape of (100, 5, 60) (i.e. there are 240 storage +# chunks, each of size 0.23 MiB). Then the storage-exact +# Dask chunks will also have shape (100, 5, 60) giving 240 +# Dask chunks with a maximum size of 0.23 MiB. +# +# There are, however, some occasions when, for particular +# data arrays in the file, the ``'auto'`` option will +# automatically be used instead of storage-exact Dask +# chunks. This occurs when: +# +# * The data array in the file is stored contiguously. +# +# * The data array in the file is compressed by convention +# (e.g. ragged array representations, compression by +# gathering, subsampled coordinates, etc.). In this case +# the Dask chunks are for the uncompressed data, and so +# cannot be aligned with the storage chunks of the +# compressed array in the file. +# +# * ``auto`` +# +# The Dask chunk size in bytes will be as close as +# possible to the size given by `cf.chunksize`, +# favouring square-like chunk shapes. This may give +# similar Dask chunk shapes as the ``'storage-aligned'`` +# option, but without the guarantee that each storage +# chunk will lie within exactly one Dask chunk. +# +# * A byte-size given by a `str` +# +# The Dask chunk size in bytes will be as close as +# possible to the given byte-size, favouring square-like +# chunk shapes. Any string value, accepted by the *chunks* +# parameter of the `dask.array.from_array` function is +# permitted. +# +# *Example:* +# A Dask chunksize of 2 MiB may be specified as +# ``'2097152'`` or ``'2 MiB'``. +# +# * `-1` or `None` +# +# There is no Dask chunking, i.e. every data array has one +# Dask chunk regardless of its size. +# +# * Positive `int` +# +# Every dimension of all Dask chunks has this number of +# elements. +# +# *Example:* +# For 3-dimensional data, *dask_chunks* of `10` will +# give Dask chunks with shape (10, 10, 10). +# +# * `dict` +# +# Each of dictionary key identifies a file dimension, with +# a value that defines the Dask chunking for that +# dimension whenever it is spanned by a data array. A file +# dimension is identified in one of three ways: +# +# 1. the netCDF dimension name, preceded by ``ncdim%`` +# (e.g. ``'ncdim%lat'``); +# +# 2. the value of the "standard name" attribute of a +# CF-netCDF coordinate variable that spans the +# dimension (e.g. ``'latitude'``); +# +# 3. the value of the "axis" attribute of a CF-netCDF +# coordinate variable that spans the dimension +# (e.g. ``'Y'``). +# +# The dictionary values may be a byte-size string, +# ``'auto'``, `int` or `None`, with the same meanings as +# those types for the *dask_chunks* parameter itself, but +# applying only to the specified dimension. In addition, a +# dictionary value may be a `tuple` or `list` of integers +# that sum to the dimension size. +# +# Not specifying a file dimension in the dictionary is +# equivalent to it being defined with a value of +# ``'auto'``. +# +# *Example:* +# ``{'T': '0.5 MiB', 'Z': 'auto', 'Y': [36, 37], 'X': +# None}`` +# +# *Example:* +# If a netCDF file contains dimensions ``time``, ``z``, +# ``lat`` and ``lon``, then ``{'ncdim%time': 12, +# 'ncdim%lat', None, 'ncdim%lon': None}`` will ensure +# that, for all applicable data arrays, all ``time`` +# axes have a `dask` chunksize of 12; all ``lat`` and +# ``lon`` axes are not `dask` chunked; and all ``z`` +# axes are `dask` chunked to comply as closely as +# possible with the default `dask` chunk size. +# +# If the netCDF file also contains a ``time`` coordinate +# variable with a "standard_name" attribute of +# ``'time'`` and an "axis" attribute of ``'T'``, then +# the same `dask` chunking could be specified with +# either ``{'time': 12, 'ncdim%lat', None, 'ncdim%lon': +# None}`` or ``{'T': 12, 'ncdim%lat', None, 'ncdim%lon': +# None}``. +# +# .. versionadded:: NEXTVERSION +# +# store_hdf5_chunks: `bool`, optional +# If True (the default) then store the HDF5 chunking +# strategy for each returned data array. The HDF5 chunking +# strategy is then accessible via an object's +# `nc_hdf5_chunksizes` method. When the HDF5 chunking +# strategy is stored, it will be used when the data is +# written to a new netCDF4 file with `cf.write` (unless +# the strategy was modified prior to writing). +# +# If False, or if the file being read is not in netCDF4 +# format, then no HDF5 chunking strategy is stored. +# (i.e. an `nc_hdf5_chunksizes` method will return `None` +# for all `Data` objects). In this case, when the data is +# written to a new netCDF4 file, the HDF5 chunking strategy +# will be determined by `cf.write`. +# +# See the `cf.write` *hdf5_chunks* parameter for details +# on how the HDF5 chunking strategy is determined at the +# time of writing. +# +# .. versionadded:: NEXTVERSION +# +# domain: `bool`, optional +# If True then return only the domain constructs that are +# explicitly defined by CF-netCDF domain variables, ignoring +# all CF-netCDF data variables. By default only the field +# constructs defined by CF-netCDF data variables are +# returned. +# +# CF-netCDF domain variables are only defined from CF-1.9, +# so older datasets automatically contain no CF-netCDF +# domain variables. +# +# The unique domain constructs of the dataset are easily +# found with the `cf.unique_constructs` function. For +# example:: +# +# >>> d = cf.read('file.nc', domain=True) +# >>> ud = cf.unique_constructs(d) +# >>> f = cf.read('file.nc') +# >>> ufd = cf.unique_constructs(x.domain for x in f) +# +# Domain constructs can not be read from UM or PP datasets. +# +# .. versionadded:: 3.11.0 +# +# cfa: `dict`, optional +# Configure the reading of CFA-netCDF files. The dictionary +# may have any subset of the following key/value pairs to +# override the information read from the file: +# +# * ``'substitutions'``: `dict` +# +# A dictionary whose key/value pairs define text +# substitutions to be applied to the fragment file +# names. Each key may be specified with or without the +# ``${*}`` syntax (where `*` represents any amount of any +# characters). For instance, ``{'substitution': +# 'replacement'}`` and ``{'${substitution}': 'replacement'}``' +# are equivalent. The substitutions are used in +# conjunction with, and take precedence over, any that are +# stored in the CFA-netCDF file by the ``substitutions`` +# attribute of the ``file`` fragement array variable. +# +# *Example:* +# ``{'replacement': 'file:///data/'}`` +# +# .. versionadded:: 3.15.0 +# +# netcdf_backend: `None` or `str`, optional +# Specify which library to use for reading netCDF files. By +# default, or if `None`, then the first one of `netCDF4` and +# `h5netcdf` to successfully open the file netCDF file is +# used. Setting *netcdf_backend* to one of ``'netCDF4'`` and +# ``'h5netcdf'`` will force the use of that library. +# +# .. note:: The *netcdf_backend* parameter does not affect +# the opening of netCDF fragment files that define +# the data of aggregation variables. For these, it +# is always the case that the first one of +# `netCDF4` and `h5netcdf` to successfully open +# the file is used. +# +# .. versionadded:: NEXTVERSION +# +# storage_options: `dict` or `None`, optional +# Pass parameters to the backend file system driver, such as +# username, password, server, port, etc. How the storage +# options are interpreted depends on the location of the +# file: +# +# **Local File System** +# +# Storage options are ignored for local files. +# +# **HTTP(S)** +# +# Storage options are ignored for files available across the +# network via OPeNDAP. +# +# **S3-compatible services** +# +# The backend used is `s3fs`, and the storage options are +# used to initialise an `s3fs.S3FileSystem` file system +# object. By default, or if `None`, then *storage_options* +# is taken as ``{}``. +# +# If the ``'endpoint_url'`` key is not in *storage_options*, +# nor in a dictionary defined by the ``'client_kwargs'`` key +# (both of which are the case when *storage_options* is +# `None`), then one will be automatically inserted for +# accessing an S3 file. For example, for a file name of +# ``'s3://store/data/file.nc'``, an ``'endpoint_url'`` key +# with value ``'https://store'`` would be created. To +# disable this, set ``'endpoint_url'`` to `None`. +# +# *Parameter example:* +# For a file name of ``'s3://store/data/file.nc'``, the +# following are equivalent: ``None``, ``{}``, +# ``{'endpoint_url': 'https://store'}``, and +# ``{'client_kwargs': {'endpoint_url': 'https://store'}}`` +# +# *Parameter example:* +# ``{'key': 'scaleway-api-key...', 'secret': +# 'scaleway-secretkey...', 'endpoint_url': +# 'https://s3.fr-par.scw.cloud', 'client_kwargs': +# {'region_name': 'fr-par'}}`` +# +# .. versionadded:: NEXTVERSION +# +# cache: `bool`, optional +# If True, the default, then cache the first and last array +# elements of metadata constructs (not field constructs) for +# fast future access. In addition, the second and +# penultimate array elements will be cached from coordinate +# bounds when there are two bounds per cell. For remote +# data, setting *cache* to False may speed up the parsing of +# the file. +# +# .. versionadded:: NEXTVERSION +# +# umversion: deprecated at version 3.0.0 +# Use the *um* parameter instead. +# +# height_at_top_of_model: deprecated at version 3.0.0 +# Use the *um* parameter instead. +# +# field: deprecated at version 3.0.0 +# Use the *extra* parameter instead. +# +# follow_symlinks: deprecated at version 3.0.0 +# Use the *followlinks* parameter instead. +# +# select_options: deprecated at version 3.0.0 +# Use methods on the returned `FieldList` instead. +# +# chunk: deprecated at version 3.14.0 +# Use the *dask_chunks* parameter instead. +# +# chunks: deprecated at version NEXTVERSION +# Use the *dask_chunks* parameter instead. +# +# :Returns: +# +# `FieldList` or `DomainList` +# The field or domain constructs found in the input +# dataset(s). The list may be empty. +# +# **Examples** +# +# >>> x = cf.read('file.nc') +# +# Read a file and create field constructs from CF-netCDF data +# variables as well as from the netCDF variables that correspond to +# particular types metadata constructs: +# +# >>> f = cf.read('file.nc', extra='domain_ancillary') +# >>> g = cf.read('file.nc', extra=['dimension_coordinate', +# ... 'auxiliary_coordinate']) +# +# Read a file that contains external variables: +# +# >>> h = cf.read('parent.nc') +# >>> i = cf.read('parent.nc', external='external.nc') +# >>> j = cf.read('parent.nc', external=['external1.nc', 'external2.nc']) +# +# >>> f = cf.read('file*.nc') +# >>> f +# [, +# , +# , +# ] +# +# >>> cf.read('file*.nc')[0:2] +# [, +# ] +# +# >>> cf.read('file*.nc')[-1] +# +# +# >>> cf.read('file*.nc', select='units=K') +# [, +# ] +# +# >>> cf.read('file*.nc', select='ncvar%ta') +# +# +# """ +# if field: +# _DEPRECATION_ERROR_FUNCTION_KWARGS( +# "cf.read", +# {"field": field}, +# "Use keyword 'extra' instead", +# removed_at="4.0.0", +# ) # pragma: no cover +# +# if select_options: +# _DEPRECATION_ERROR_FUNCTION_KWARGS( +# "cf.read", {"select_options": select_options}, removed_at="4.0.0" +# ) # pragma: no cover +# +# if follow_symlinks: +# _DEPRECATION_ERROR_FUNCTION_KWARGS( +# "cf.read", +# {"follow_symlinks": follow_symlinks}, +# "Use keyword 'followlink' instead.", +# removed_at="4.0.0", +# ) # pragma: no cover +# +# if height_at_top_of_model is not None: +# _DEPRECATION_ERROR_FUNCTION_KWARGS( +# "cf.read", +# {"height_at_top_of_model": height_at_top_of_model}, +# "Use keyword 'um' instead.", +# removed_at="4.0.0", +# ) # pragma: no cover +# +# if chunk is not True: +# _DEPRECATION_ERROR_FUNCTION_KWARGS( +# "cf.read", +# {"chunk": chunk}, +# "Use keyword 'dask_chunks' instead.", +# version="3.14.0", +# removed_at="5.0.0", +# ) # pragma: no cover +# +# if chunks != "auto": +# _DEPRECATION_ERROR_FUNCTION_KWARGS( +# "cf.read", +# {"chunk": chunk}, +# "Use keyword 'dask_chunks' instead.", +# version="3.14.0", +# removed_at="5.0.0", +# ) # pragma: no cover +# +# # Parse select +# if isinstance(select, (str, Query, Pattern)): +# select = (select,) +# +# # Manage input parameters where contradictions are possible: +# if cdl_string and fmt: +# if fmt == "CDL": +# if is_log_level_info(logger): +# logger.info( +# "It is not necessary to set the cf.read fmt as 'CDL' when " +# "cdl_string is True, since that implies CDL is the format." +# ) # pragma: no cover +# else: +# raise ValueError( +# "cdl_string can only be True when the format is CDL, though " +# "fmt is ignored in that case so there is no need to set it." +# ) +# if squeeze and unsqueeze: +# raise ValueError("squeeze and unsqueeze can not both be True") +# if follow_symlinks and not recursive: +# raise ValueError( +# f"Can't set follow_symlinks={follow_symlinks!r} " +# f"when recursive={recursive!r}" +# ) +# +# info = cfdm.is_log_level_info(logger) +# +# # Initialise the output list of fields/domains +# if domain: +# out = DomainList() +# else: +# out = FieldList() +# +# if isinstance(aggregate, dict): +# aggregate_options = aggregate.copy() +# aggregate = True +# else: +# aggregate_options = {} +# +# aggregate_options["copy"] = False +# +# # Parse the extra parameter +# if extra is None: +# extra = () +# elif isinstance(extra, str): +# extra = (extra,) +# +# ftypes = set() +# +# # Count the number of fields (in all files) and the number of +# # files +# field_counter = -1 +# file_counter = 0 +# +# if cdl_string: +# files2 = [] +# +# # 'files' input may be a single string or a sequence of them and to +# # handle both cases it is easiest to convert former to a one-item seq. +# if isinstance(files, str): +# files = [files] +# +# for cdl_file in files: +# c = tempfile.NamedTemporaryFile( +# mode="w", +# dir=tempfile.gettempdir(), +# prefix="cf_", +# suffix=".cdl", +# ) +# +# c_name = c.name +# with open(c_name, "w") as f: +# f.write(cdl_file) +# +# # ---------------------------------------------------------------- +# # Need to cache the TemporaryFile object so that it doesn't get +# # deleted too soon +# # ---------------------------------------------------------------- +# _cached_temporary_files[c_name] = c +# +# files2.append(c.name) +# +# files = files2 +# +# for file_glob in flat(files): +# # Expand variables +# file_glob = os.path.expanduser(os.path.expandvars(file_glob)) +# +# scheme = urlparse(file_glob).scheme +# if scheme in ("https", "http", "s3"): +# # Do not glob a remote URL +# files2 = (file_glob,) +# else: +# # Glob files on disk +# files2 = glob(file_glob) +# +# if not files2 and not ignore_read_error: +# open(file_glob, "rb") +# +# files3 = [] +# for x in files2: +# if isdir(x): +# # Walk through directories, possibly recursively +# for path, subdirs, filenames in os.walk( +# x, followlinks=followlinks +# ): +# files3.extend(os.path.join(path, f) for f in filenames) +# if not recursive: +# break +# else: +# files3.append(x) +# +# files2 = files3 +# +# for filename in files2: +# if info: +# logger.info(f"File: {filename}") # pragma: no cover +# +# if um: +# ftype = "UM" +# else: +# try: +# ftype = file_type(filename) +# except Exception as error: +# if not ignore_read_error: +# raise ValueError(error) +# +# logger.warning(f"WARNING: {error}") # pragma: no cover +# continue +# +# if domain and ftype == "UM": +# raise ValueError( +# f"Can't read PP/UM file {filename} into domain constructs" +# ) +# +# ftypes.add(ftype) +# +# # -------------------------------------------------------- +# # Read the file +# # -------------------------------------------------------- +# file_contents = _read_a_file( +# filename, +# ftype=ftype, +# external=external, +# ignore_read_error=ignore_read_error, +# verbose=verbose, +# warnings=warnings, +# aggregate=aggregate, +# aggregate_options=aggregate_options, +# selected_fmt=fmt, +# um=um, +# extra=extra, +# height_at_top_of_model=height_at_top_of_model, +# dask_chunks=dask_chunks, +# store_hdf5_chunks=store_hdf5_chunks, +# mask=mask, +# unpack=unpack, +# warn_valid=warn_valid, +# select=select, +# domain=domain, +# cfa=cfa, +# cfa_write=cfa_write, +# netcdf_backend=netcdf_backend, +# storage_options=storage_options, +# cache=cache, +# ) +# +# # -------------------------------------------------------- +# # Select matching fields (not from UM files, yet) +# # -------------------------------------------------------- +# if select and ftype != "UM": +# file_contents = file_contents.select_by_identity(*select) +# +# # -------------------------------------------------------- +# # Add this file's contents to that already read from other +# # files +# # -------------------------------------------------------- +# out.extend(file_contents) +# +# field_counter = len(out) +# file_counter += 1 +# +# if info: +# logger.info( +# f"Read {field_counter} field{_plural(field_counter)} from " +# f"{file_counter} file{_plural(file_counter)}" +# ) # pragma: no cover +# +# # ---------------------------------------------------------------- +# # Aggregate the output fields/domains +# # ---------------------------------------------------------------- +# if aggregate and len(out) > 1: +# org_len = len(out) # pragma: no cover +# +# out = cf_aggregate(out, **aggregate_options) +# +# n = len(out) # pragma: no cover +# if info: +# logger.info( +# f"{org_len} input field{_plural(org_len)} aggregated into " +# f"{n} field{_plural(n)}" +# ) # pragma: no cover +# +# # ---------------------------------------------------------------- +# # Sort by netCDF variable name +# # ---------------------------------------------------------------- +# if len(out) > 1: +# out.sort(key=lambda f: f.nc_get_variable("")) +# +# # ---------------------------------------------------------------- +# # Add standard names to UM/PP fields (post aggregation) +# # ---------------------------------------------------------------- +# for f in out: +# standard_name = f._custom.get("standard_name", None) +# if standard_name is not None: +# f.set_property("standard_name", standard_name, copy=False) +# del f._custom["standard_name"] +# +# # ---------------------------------------------------------------- +# # Select matching fields from UM/PP fields (post setting of +# # standard names) +# # ---------------------------------------------------------------- +# if select and "UM" in ftypes: +# out = out.select_by_identity(*select) +# +# # ---------------------------------------------------------------- +# # Squeeze size one dimensions from the data arrays. Do one of: +# # +# # 1) Squeeze the fields, i.e. remove all size one dimensions from +# # all field data arrays +# # +# # 2) Unsqueeze the fields, i.e. Include all size 1 domain +# # dimensions in the data array. +# # +# # 3) Nothing +# # ---------------------------------------------------------------- +# if not domain: +# if squeeze: +# for f in out: +# f.squeeze(inplace=True) +# elif unsqueeze: +# for f in out: +# f.unsqueeze(inplace=True) +# +# if nfields is not None and len(out) != nfields: +# raise ValueError( +# f"{nfields} field{_plural(nfields)} requested but " +# f"{len(out)} field/domain constucts found in " +# f"file{_plural(file_counter)}" +# ) +# +# return out +# +# +# def _plural(n): # pragma: no cover +# """Return a suffix which reflects a word's plural.""" +# return "s" if n != 1 else "" # pragma: no cover +# +# +# @_manage_log_level_via_verbosity +# def _read_a_file( +# filename, +# ftype=None, +# aggregate=True, +# aggregate_options=None, +# ignore_read_error=False, +# verbose=None, +# warnings=False, +# external=None, +# selected_fmt=None, +# um=None, +# extra=None, +# height_at_top_of_model=None, +# mask=True, +# unpack=True, +# warn_valid=False, +# dask_chunks="storage-aligned", +# store_hdf5_chunks=True, +# select=None, +# domain=False, +# cfa=None, +# cfa_write=None, +# netcdf_backend=None, +# storage_options=None, +# cache=True, +# ): +# """Read the contents of a single file into a field list. +# +# :Parameters: +# +# filename: `str` +# See `cf.read` for details. +# +# ftype: `str` +# The file format to interpret the file. Recognised formats are +# ``'netCDF'``, ``'CDL'``, ``'UM'`` and ``'PP'``. +# +# aggregate_options: `dict`, optional +# See `cf.read` for details. +# +# ignore_read_error: `bool`, optional +# See `cf.read` for details. +# +# mask: `bool`, optional +# See `cf.read` for details. +# +# unpack: `bool`, optional +# See `cf.read` for details. +# +# verbose: `int` or `str` or `None`, optional +# See `cf.read` for details. +# +# select: optional +# For `read. Ignored for a netCDF file. +# +# domain: `bool`, optional +# See `cf.read` for details. +# +# cfa: `dict`, optional +# See `cf.read` for details. +# +# .. versionadded:: 3.15.0 +# +# storage_options: `dict` or `None`, optional +# See `cf.read` for details. +# +# .. versionadded:: NEXTVERSION +# +# netcdf_backend: `str` or `None`, optional +# See `cf.read` for details. +# +# .. versionadded:: NEXTVERSION +# +# cache: `bool`, optional +# See `cf.read` for details. +# +# .. versionadded:: NEXTVERSION +# +# :Returns: +# +# `FieldList` or `DomainList` +# The field or domain constructs in the dataset. +# +# """ +# if aggregate_options is None: +# aggregate_options = {} +# +# # Find this file's type +# fmt = None +# word_size = None +# endian = None +# height_at_top_of_model = None +# umversion = 405 +# +# if um: +# fmt = um.get("fmt") +# word_size = um.get("word_size") +# endian = um.get("endian") +# umversion = um.get("version", umversion) +# height_at_top_of_model = um.get("height_at_top_of_model") +# +# if fmt is not None: +# fmt = fmt.upper() +# +# if umversion is not None: +# umversion = float(str(umversion).replace(".", "0", 1)) +# +# extra_read_vars = { +# "fmt": selected_fmt, +# "ignore_read_error": ignore_read_error, +# } +# +# # ---------------------------------------------------------------- +# # Still here? Read the file into fields or domains. +# # ---------------------------------------------------------------- +# originally_cdl = ftype == "CDL" +# if originally_cdl: +# # Create a temporary netCDF file from input CDL +# ftype = "netCDF" +# cdl_filename = filename +# filename = netcdf.cdl_to_netcdf(filename) +# extra_read_vars["fmt"] = "NETCDF" +# +# if not netcdf.is_netcdf_file(filename): +# error_msg = ( +# f"Can't determine format of file {filename} generated " +# f"from CDL file {cdl_filename}" +# ) +# if ignore_read_error: +# logger.warning(error_msg) # pragma: no cover +# return FieldList() +# else: +# raise IOError(error_msg) +# +# if ftype == "netCDF" and extra_read_vars["fmt"] in (None, "NETCDF", "CFA"): +# # See https://github.com/NCAS-CMS/cfdm/issues/128 for context on the +# # try/except here, which acts as a temporary fix pending decisions on +# # the best way to handle CDL with only header or coordinate info. +# try: +# # out = netcdf.read( +# out = netcdf.read( +# filename, +# external=external, +# extra=extra, +# verbose=verbose, +# warnings=warnings, +# extra_read_vars=extra_read_vars, +# mask=mask, +# unpack=unpack, +# warn_valid=warn_valid, +# domain=domain, +# storage_options=storage_options, +# netcdf_backend=netcdf_backend, +# dask_chunks=dask_chunks, +# store_hdf5_chunks=store_hdf5_chunks, +# cache=cache, +# cfa=cfa, +# cfa_write=cfa_write, +# ) +# except MaskError: +# # Some data required for field interpretation is missing, +# # manifesting downstream as a NumPy MaskError. +# if originally_cdl: +# raise ValueError( +# "Unable to convert CDL without data to field construct(s) " +# "because there is insufficient information provided by " +# "the header and/or coordinates alone in this case." +# ) +# else: +# raise ValueError( +# "Unable to convert netCDF to field or domain construct " +# "because there is missing data." +# ) +# +# elif ftype == "UM" and extra_read_vars["fmt"] in (None, "UM"): +# if domain: +# raise ValueError( +# "Can't set domain=True when reading UM or PP datasets" +# ) +# +# out = UM.read( +# filename, +# um_version=umversion, +# verbose=verbose, +# set_standard_name=False, +# height_at_top_of_model=height_at_top_of_model, +# fmt=fmt, +# word_size=word_size, +# endian=endian, +# select=select, +# ) +# +# # PP fields are aggregated intrafile prior to interfile +# # aggregation +# if aggregate: +# # For PP fields, the default is strict_units=False +# if "strict_units" not in aggregate_options: +# aggregate_options["relaxed_units"] = True +# +# # ---------------------------------------------------------------- +# # Return the fields +# # ---------------------------------------------------------------- +# if domain: +# return DomainList(out) +# +# return FieldList(out) +# +# +# def file_type(filename): +# """Return the file format. +# +# :Parameters: +# +# filename: `str` +# The file name. +# +# :Returns: +# +# `str` +# The format type of the file. One of ``'netCDF'``, ``'UM'`` +# or ``'CDL'``. +# +# **Examples** +# +# >>> file_type(filename) +# 'netCDF' +# +# """ +# # ---------------------------------------------------------------- +# # NetCDF +# # ---------------------------------------------------------------- +# if netcdf.is_netcdf_file(filename): +# return "netCDF" +# +# # ---------------------------------------------------------------- +# # PP or FF +# # ---------------------------------------------------------------- +# if UM.is_um_file(filename): +# return "UM" +# +# # ---------------------------------------------------------------- +# # CDL +# # ---------------------------------------------------------------- +# if netcdf.is_cdl_file(filename): +# return "CDL" +# +# # Still here? +# raise IOError(f"Can't determine format of file {filename}") From e7cac6d357cbec928f3ef4cc82a4de48422ca268 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 18 Nov 2024 18:04:03 +0000 Subject: [PATCH 14/51] dev --- cf/read_write/read.py | 1527 +----------------------------------- cf/read_write/write.py | 780 +----------------- cf/test/test_read_write.py | 26 +- 3 files changed, 23 insertions(+), 2310 deletions(-) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 03760c40c6..2e7d10a7da 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -8,7 +8,6 @@ import cfdm from cfdm.read_write.netcdf import NetCDFRead -from numpy.ma.core import MaskError from ..aggregate import aggregate as cf_aggregate from ..cfimplementation import implementation @@ -25,9 +24,9 @@ # Create an implementation container and initialise a read object for # each format # -------------------------------------------------------------------- -_implementation = implementation() -netcdf = NetCDFRead(_implementation) -UM = UMRead(_implementation) +#_implementation = implementation() +#netcdf = NetCDFRead(_implementation) +#UM = UMRead(_implementation) logger = logging.getLogger(__name__) @@ -567,6 +566,9 @@ def __new__( removed_at="5.0.0", ) # pragma: no cover + cls.netcdf = NetCDFRead(cls.implementation) + cls.um = UMRead(cls.implementation) + # Parse select if isinstance(select, (str, Query, Pattern)): select = (select,) @@ -594,7 +596,7 @@ def __new__( f"when recursive={recursive!r}" ) - netcdf = NetCDFRead(cls.implementation) + # netcdf = NetCDFRead(cls.implementation) # Initialise the output list of fields/domains if domain: @@ -951,10 +953,10 @@ def _read_a_file( # Create a temporary netCDF file from input CDL ftype = "netCDF" cdl_filename = filename - filename = netcdf.cdl_to_netcdf(filename) + filename = cls.netcdf.cdl_to_netcdf(filename) extra_read_vars["fmt"] = "NETCDF" - if not netcdf.is_netcdf_file(filename): + if not cls.netcdf.is_netcdf_file(filename): error_msg = ( f"Can't determine format of file {filename} generated " f"from CDL file {cdl_filename}" @@ -990,51 +992,13 @@ def _read_a_file( cfa=cfa, cfa_write=cfa_write, ) - # # See https://github.com/NCAS-CMS/cfdm/issues/128 for context on the - # # try/except here, which acts as a temporary fix pending decisions on - # # the best way to handle CDL with only header or coordinate info. - # try: - # out = netcdf.read( - # filename, - # external=external, - # extra=extra, - # verbose=verbose, - # warnings=warnings, - # extra_read_vars=extra_read_vars, - # mask=mask, - # unpack=unpack, - # warn_valid=warn_valid, - # domain=domain, - # storage_options=storage_options, - # netcdf_backend=netcdf_backend, - # dask_chunks=dask_chunks, - # store_hdf5_chunks=store_hdf5_chunks, - # cache=cache, - # cfa=cfa, - # cfa_write=cfa_write, - # ) - # except MaskError: - # # Some data required for field interpretation is missing, - # # manifesting downstream as a NumPy MaskError. - # if originally_cdl: - # raise ValueError( - # "Unable to convert CDL without data to field construct(s) " - # "because there is insufficient information provided by " - # "the header and/or coordinates alone in this case." - # ) - # else: - # raise ValueError( - # "Unable to convert netCDF to field or domain construct " - # "because there is missing data." - # ) - elif ftype == "UM" and extra_read_vars["fmt"] in (None, "UM"): if domain: raise ValueError( "Can't set domain=True when reading UM or PP datasets" ) - out = UM.read( + out = cls.um.read( filename, um_version=umversion, verbose=verbose, @@ -1083,1485 +1047,20 @@ def file_type(cls, filename): # ---------------------------------------------------------------- # NetCDF # ---------------------------------------------------------------- - if netcdf.is_netcdf_file(filename): + if cls.netcdf.is_netcdf_file(filename): return "netCDF" # ---------------------------------------------------------------- # PP or FF # ---------------------------------------------------------------- - if UM.is_um_file(filename): + if cls.um.is_um_file(filename): return "UM" # ---------------------------------------------------------------- # CDL # ---------------------------------------------------------------- - if netcdf.is_cdl_file(filename): + if cls.netcdf.is_cdl_file(filename): return "CDL" # Still here? raise IOError(f"Can't determine format of file {filename}") - - -# @_manage_log_level_via_verbosity -# def read_old( -# files, -# external=None, -# verbose=None, -# warnings=False, -# ignore_read_error=False, -# aggregate=True, -# nfields=None, -# squeeze=False, -# unsqueeze=False, -# fmt=None, -# cdl_string=False, -# select=None, -# extra=None, -# recursive=False, -# followlinks=False, -# um=None, -# chunk=True, -# field=None, -# height_at_top_of_model=None, -# select_options=None, -# follow_symlinks=False, -# mask=True, -# unpack=True, -# warn_valid=False, -# dask_chunks="storage-aligned", -# store_hdf5_chunks=True, -# domain=False, -# cfa=None, -# cfa_write=None, -# netcdf_backend=None, -# storage_options=None, -# cache=True, -# chunks="auto", -# ): -# """Read field or domain constructs from files. -# -# The following file formats are supported: netCDF, CFA-netCDF, CDL, -# UM fields file, and PP. -# -# Input datasets are mapped to constructs in memory which are -# returned as elements of a `FieldList` or if the *domain* parameter -# is True, a `DomainList`. -# -# NetCDF files may be on disk, on an OPeNDAP server, or in an S3 -# object store. -# -# Any amount of files of any combination of file types may be read. -# -# **NetCDF unlimited dimensions** -# -# Domain axis constructs that correspond to NetCDF unlimited -# dimensions may be accessed with the -# `~cf.DomainAxis.nc_is_unlimited` and -# `~cf.DomainAxis.nc_set_unlimited` methods of a domain axis -# construct. -# -# **NetCDF hierarchical groups** -# -# Hierarchical groups in CF provide a mechanism to structure -# variables within netCDF4 datasets. Field constructs are -# constructed from grouped datasets by applying the well defined -# rules in the CF conventions for resolving references to -# out-of-group netCDF variables and dimensions. The group structure -# is preserved in the field construct's netCDF interface. Groups -# were incorporated into CF-1.8. For files with groups that state -# compliance to earlier versions of the CF conventions, the groups -# will be interpreted as per the latest release of CF. -# -# **CF-compliance** -# -# If the dataset is partially CF-compliant to the extent that it is -# not possible to unambiguously map an element of the netCDF dataset -# to an element of the CF data model, then a field construct is -# still returned, but may be incomplete. This is so that datasets -# which are partially conformant may nonetheless be modified in -# memory and written to new datasets. -# -# Such "structural" non-compliance would occur, for example, if the -# "coordinates" attribute of a CF-netCDF data variable refers to -# another variable that does not exist, or refers to a variable that -# spans a netCDF dimension that does not apply to the data -# variable. Other types of non-compliance are not checked, such -# whether or not controlled vocabularies have been adhered to. The -# structural compliance of the dataset may be checked with the -# `~cf.Field.dataset_compliance` method of the field construct, as -# well as optionally displayed when the dataset is read by setting -# the warnings parameter. -# -# **CDL files** -# -# A file is considered to be a CDL representation of a netCDF -# dataset if it is a text file whose first non-comment line starts -# with the seven characters "netcdf " (six letters followed by a -# space). A comment line is identified as one which starts with any -# amount white space (including none) followed by "//" (two -# slashes). It is converted to a temporary netCDF4 file using the -# external ``ncgen`` command, and the temporary file persists until -# the end of the Python session, at which time it is automatically -# deleted. The CDL file may omit data array values (as would be the -# case, for example, if the file was created with the ``-h`` or -# ``-c`` option to ``ncdump``), in which case the the relevant -# constructs in memory will be created with data with all missing -# values. -# -# **PP and UM fields files** -# -# 32-bit and 64-bit PP and UM fields files of any endian-ness can be -# read. In nearly all cases the file format is auto-detected from -# the first 64 bits in the file, but for the few occasions when this -# is not possible, the *um* keyword allows the format to be -# specified, as well as the UM version (if the latter is not -# inferrable from the PP or lookup header information). -# -# 2-d "slices" within a single file are always combined, where -# possible, into field constructs with 3-d, 4-d or 5-d data. This is -# done prior to any field construct aggregation (see the *aggregate* -# parameter). -# -# When reading PP and UM fields files, the *relaxed_units* aggregate -# option is set to `True` by default, because units are not always -# available to field constructs derived from UM fields files or PP -# files. -# -# **Performance** -# -# Descriptive properties are always read into memory, but lazy -# loading is employed for all data arrays which means that, in -# general, data is not read into memory until the data is required -# for inspection or to modify the array contents. This maximises the -# number of field constructs that may be read within a session, and -# makes the read operation fast. The exceptions to the lazy reading -# of data arrays are: -# -# * Data that define purely structural elements of other data arrays -# that are compressed by convention (such as a count variable for -# a ragged contiguous array). These are always read from disk. -# -# * If field or domain aggregation is in use (as it is by default, -# see the *aggregate* parameter), then the data of metadata -# constructs may have to be read to determine how the contents of -# the input files may be aggregated. This won't happen for a -# particular field or domain's metadata, though, if it can be -# ascertained from descriptive properties alone that it can't be -# aggregated with anything else (as would be the case, for -# instance, when a field has a unique standard name). -# -# However, when two or more field or domain constructs are -# aggregated to form a single construct then the data arrays of some -# metadata constructs (coordinates, cell measures, etc.) must be -# compared non-lazily to ascertain if aggregation is possible. -# -# .. seealso:: `cf.aggregate`, `cf.write`, `cf.Field`, `cf.Domain`, -# `cf.load_stash2standard_name`, `cf.unique_constructs` -# -# :Parameters: -# -# files: (arbitrarily nested sequence of) `str` -# A string or arbitrarily nested sequence of strings giving -# the file names, directory names, or OPenDAP URLs from -# which to read field constructs. Various type of expansion -# are applied to the names: -# -# ==================== ====================================== -# Expansion Description -# ==================== ====================================== -# Tilde An initial component of ``~`` or -# ``~user`` is replaced by that *user*'s -# home directory. -# -# Environment variable Substrings of the form ``$name`` or -# ``${name}`` are replaced by the value -# of environment variable *name*. -# -# Pathname A string containing UNIX file name -# metacharacters as understood by the -# Python `glob` module is replaced by -# the list of matching file names. This -# type of expansion is ignored for -# OPenDAP URLs. -# ==================== ====================================== -# -# Where more than one type of expansion is used in the same -# string, they are applied in the order given in the above -# table. -# -# *Parameter example:* -# The file ``file.nc`` in the user's home directory could -# be described by any of the following: -# ``'$HOME/file.nc'``, ``'${HOME}/file.nc'``, -# ``'~/file.nc'``, ``'~/tmp/../file.nc'``. -# -# When a directory is specified, all files in that directory -# are read. Sub-directories are not read unless the -# *recursive* parameter is True. If any directories contain -# files that are not valid datasets then an exception will -# be raised, unless the *ignore_read_error* parameter is -# True. -# -# As a special case, if the `cdl_string` parameter is set to -# True, the interpretation of `files` changes so that each -# value is assumed to be a string of CDL input rather -# than the above. -# -# external: (sequence of) `str`, optional -# Read external variables (i.e. variables which are named by -# attributes, but are not present, in the parent file given -# by the *filename* parameter) from the given external -# files. Ignored if the parent file does not contain a -# global "external_variables" attribute. Multiple external -# files may be provided, which are searched in random order -# for the required external variables. -# -# If an external variable is not found in any external -# files, or is found in multiple external files, then the -# relevant metadata construct is still created, but without -# any metadata or data. In this case the construct's -# `!is_external` method will return `True`. -# -# *Parameter example:* -# ``external='cell_measure.nc'`` -# -# *Parameter example:* -# ``external=['cell_measure.nc']`` -# -# *Parameter example:* -# ``external=('cell_measure_A.nc', 'cell_measure_O.nc')`` -# -# extra: (sequence of) `str`, optional -# Create extra, independent field constructs from netCDF -# variables that correspond to particular types metadata -# constructs. The *extra* parameter may be one, or a -# sequence, of: -# -# ========================== =============================== -# *extra* Metadata constructs -# ========================== =============================== -# ``'field_ancillary'`` Field ancillary constructs -# ``'domain_ancillary'`` Domain ancillary constructs -# ``'dimension_coordinate'`` Dimension coordinate constructs -# ``'auxiliary_coordinate'`` Auxiliary coordinate constructs -# ``'cell_measure'`` Cell measure constructs -# ========================== =============================== -# -# This parameter replaces the deprecated *field* parameter. -# -# *Parameter example:* -# To create field constructs from auxiliary coordinate -# constructs: ``extra='auxiliary_coordinate'`` or -# ``extra=['auxiliary_coordinate']``. -# -# *Parameter example:* -# To create field constructs from domain ancillary and -# cell measure constructs: ``extra=['domain_ancillary', -# 'cell_measure']``. -# -# An extra field construct created via the *extra* parameter -# will have a domain limited to that which can be inferred -# from the corresponding netCDF variable, but without the -# connections that are defined by the parent netCDF data -# variable. It is possible to create independent fields from -# metadata constructs that do incorporate as much of the -# parent field construct's domain as possible by using the -# `~cf.Field.convert` method of a returned field construct, -# instead of setting the *extra* parameter. -# -# verbose: `int` or `str` or `None`, optional -# If an integer from ``-1`` to ``3``, or an equivalent string -# equal ignoring case to one of: -# -# * ``'DISABLE'`` (``0``) -# * ``'WARNING'`` (``1``) -# * ``'INFO'`` (``2``) -# * ``'DETAIL'`` (``3``) -# * ``'DEBUG'`` (``-1``) -# -# set for the duration of the method call only as the minimum -# cut-off for the verboseness level of displayed output (log) -# messages, regardless of the globally-configured `cf.log_level`. -# Note that increasing numerical value corresponds to increasing -# verbosity, with the exception of ``-1`` as a special case of -# maximal and extreme verbosity. -# -# Otherwise, if `None` (the default value), output messages will -# be shown according to the value of the `cf.log_level` setting. -# -# Overall, the higher a non-negative integer or equivalent string -# that is set (up to a maximum of ``3``/``'DETAIL'``) for -# increasing verbosity, the more description that is printed to -# convey how the contents of the netCDF file were parsed and -# mapped to CF data model constructs. -# -# warnings: `bool`, optional -# If True then print warnings when an output field construct -# is incomplete due to structural non-compliance of the -# dataset. By default such warnings are not displayed. -# -# ignore_read_error: `bool`, optional -# If True then ignore any file which raises an IOError -# whilst being read, as would be the case for an empty file, -# unknown file format, etc. By default the IOError is -# raised. -# -# fmt: `str`, optional -# Only read files of the given format, ignoring all other -# files. Valid formats are ``'NETCDF'`` for CF-netCDF files, -# ``'CFA'`` for CFA-netCDF files, ``'UM'`` for PP or UM -# fields files, and ``'CDL'`` for CDL text files. By default -# files of any of these formats are read. -# -# cdl_string: `bool`, optional -# If True and the format to read is CDL, read a string -# input, or sequence of string inputs, each being interpreted -# as a string of CDL rather than names of locations from -# which field constructs can be read from, as standard. -# -# By default, each string input or string element in the input -# sequence is taken to be a file or directory name or an -# OPenDAP URL from which to read field constructs, rather -# than a string of CDL input, including when the `fmt` -# parameter is set as CDL. -# -# Note that when `cdl_string` is True, the `fmt` parameter is -# ignored as the format is assumed to be CDL, so in that case -# it is not necessary to also specify ``fmt='CDL'``. -# -# aggregate: `bool` or `dict`, optional -# If True (the default) or a dictionary (possibly empty) -# then aggregate the field constructs read in from all input -# files into as few field constructs as possible by passing -# all of the field constructs found the input files to the -# `cf.aggregate`, and returning the output of this function -# call. -# -# If *aggregate* is a dictionary then it is used to -# configure the aggregation process passing its contents as -# keyword arguments to the `cf.aggregate` function. -# -# If *aggregate* is False then the field constructs are not -# aggregated. -# -# squeeze: `bool`, optional -# If True then remove size 1 axes from each field construct's -# data array. -# -# unsqueeze: `bool`, optional -# If True then insert size 1 axes from each field -# construct's domain into its data array. -# -# select: (sequence of) `str` or `Query` or `re.Pattern`, optional -# Only return field constructs whose identities match the -# given values(s), i.e. those fields ``f`` for which -# ``f.match_by_identity(*select)`` is `True`. See -# `cf.Field.match_by_identity` for details. -# -# This is equivalent to, but faster than, not using the -# *select* parameter but applying its value to the returned -# field list with its `cf.FieldList.select_by_identity` -# method. For example, ``fl = cf.read(file, -# select='air_temperature')`` is equivalent to ``fl = -# cf.read(file).select_by_identity('air_temperature')``. -# -# recursive: `bool`, optional -# If True then recursively read sub-directories of any -# directories specified with the *files* parameter. -# -# followlinks: `bool`, optional -# If True, and *recursive* is True, then also search for -# files in sub-directories which resolve to symbolic -# links. By default directories which resolve to symbolic -# links are ignored. Ignored of *recursive* is False. Files -# which are symbolic links are always followed. -# -# Note that setting ``recursive=True, followlinks=True`` can -# lead to infinite recursion if a symbolic link points to a -# parent directory of itself. -# -# This parameter replaces the deprecated *follow_symlinks* -# parameter. -# -# mask: `bool`, optional -# If True (the default) then mask by convention the data of -# field and metadata constructs. -# -# A netCDF array is masked depending on the values of any of -# the netCDF attributes ``_FillValue``, ``missing_value``, -# ``_Unsigned``, ``valid_min``, ``valid_max``, and -# ``valid_range``. -# -# The masking by convention of a PP or UM array depends on -# the value of BMDI in the lookup header. A value other than -# ``-1.0e30`` indicates the data value to be masked. -# -# See -# https://ncas-cms.github.io/cf-python/tutorial.html#data-mask -# for details. -# -# .. versionadded:: 3.4.0 -# -# unpack: `bool`, optional -# If True, the default, then unpack arrays by convention -# when the data is read from disk. -# -# Unpacking is determined by netCDF conventions for the -# following variable attributes: ``add_offset``, -# ``scale_factor``, and ``_Unsigned``. -# -# .. versionadded:: NEXTVERSION -# -# warn_valid: `bool`, optional -# If True then print a warning for the presence of -# ``valid_min``, ``valid_max`` or ``valid_range`` properties -# on field constructs and metadata constructs that have -# data. By default no such warning is issued. -# -# "Out-of-range" data values in the file, as defined by any -# of these properties, are automatically masked by default, -# which may not be as intended. See the *mask* parameter for -# turning off all automatic masking. -# -# See -# https://ncas-cms.github.io/cf-python/tutorial.html#data-mask -# for details. -# -# .. versionadded:: 3.4.0 -# -# um: `dict`, optional -# For Met Office (UK) PP files and Met Office (UK) fields -# files only, provide extra decoding instructions. This -# option is ignored for input files which are not PP or -# fields files. In most cases, how to decode a file is -# inferrable from the file's contents, but if not then each -# key/value pair in the dictionary sets a decoding option as -# follows: -# -# ============================ ===================================== -# Key Value -# ============================ ===================================== -# ``'fmt'`` The file format (``'PP'`` or -# ``'FF'``) -# -# ``'word_size'`` The word size in bytes -# (``4`` or ``8``). -# -# ``'endian'`` The byte order (``'big'`` or -# ``'little'``). -# -# ``'version'`` The UM version to be used -# when decoding the -# header. Valid versions are, -# for example, ``4.2``, -# ``'6.6.3'`` and -# ``'8.2'``. In general, a -# given version is ignored if -# it can be inferred from the -# header (which is usually the -# case for files created by -# the UM at versions 5.3 and -# later). The exception to -# this is when the given -# version has a third element -# (such as the 3 in 6.6.3), in -# which case any version in -# the header is ignored. -# -# The default version is -# ``4.5``. -# -# ``'height_at_top_of_model'`` The height (in metres) of -# the upper bound of the top -# model level. By default the -# height at top model is taken -# from the top level's upper -# bound defined by BRSVD1 in -# the lookup header. If the -# height can't be determined -# from the header, or the -# given height is less than or -# equal to 0, then a -# coordinate reference system -# will still be created that -# contains the 'a' and 'b' -# formula term values, but -# without an atmosphere hybrid -# height dimension coordinate -# construct. -# -# .. note:: A current -# limitation is that if -# pseudolevels and -# atmosphere hybrid height -# coordinates are defined -# by same the lookup -# headers then the height -# **can't be determined -# automatically**. In this -# case the height may be -# found after reading as -# the maximum value of the -# bounds of the domain -# ancillary construct -# containing the 'a' -# formula term. The file -# can then be re-read with -# this height as a *um* -# parameter. -# ============================ ===================================== -# -# If format is specified as ``'PP'`` then the word size and -# byte order default to ``4`` and ``'big'`` respectively. -# -# This parameter replaces the deprecated *umversion* and -# *height_at_top_of_model* parameters. -# -# *Parameter example:* -# To specify that the input files are 32-bit, big-endian -# PP files: ``um={'fmt': 'PP'}`` -# -# *Parameter example:* -# To specify that the input files are 32-bit, -# little-endian PP files from version 5.1 of the UM: -# ``um={'fmt': 'PP', 'endian': 'little', 'version': 5.1}`` -# -# .. versionadded:: 1.5 -# -# dask_chunks: `str`, `int`, `None`, or `dict`, optional -# Specify the Dask chunking for data. May be one of the -# following: -# -# * ``'storage-aligned'`` -# -# This is the default. The Dask chunk size in bytes will -# be as close as possible the size given by -# `cf.chunksize`, favouring square-like chunk shapes, -# with the added restriction that the entirety of each -# storage chunk must also lie within exactly one Dask -# chunk. -# -# When reading the data from disk, an entire storage chunk -# will be read once per Dask storage chunk that contains -# any part of it, so ensuring that a storage chunk lies -# within only one Dask chunk can increase performance by -# reducing the amount of disk access (particularly when -# the data are stored remotely to the client). -# -# For instance, consider a file variable that has an array -# of 64-bit floats with shape (400, 300, 60) and a storage -# chunk shape of (100, 5, 60), giving 240 storage chunks -# each of size 100*5*60*8 bytes = 0.23 MiB. Then: -# -# * If `cf.chunksize` returned 134217728 (i.e. 128 MiB), -# then the storage-aligned Dask chunks will have shape -# (400, 300, 60), giving 1 Dask chunk with size of 54.93 -# MiB (compare with a Dask chunk shape of (400, 300, 60) -# and size 54.93 MiB, if *dask_chunks* were ``'auto'``.) -# -# * If `cf.chunksize` returned 33554432 (i.e. 32 MiB), -# then the storage-aligned Dask chunks will have shape -# (200, 260, 60), giving 4 Dask chunks with a maximum -# size of 23.80 MiB (compare with a Dask chunk shape of -# (264, 264, 60) and maximum size 31.90 MiB, if -# *dask_chunks* were ``'auto'``.) -# -# * If `cf.chunksize` returned 4194304 (i.e. 4 MiB), -# then the storage-aligned Dask chunks will have shape -# (100, 85, 60), giving 16 Dask chunks with a maximum -# size of 3.89 MiB (compare with a Dask chunk shape of -# (93, 93, 60) and maximum size 3.96 MiB, if -# *dask_chunks* were ``'auto'``.) -# -# There are, however, some occasions when, for particular -# data arrays in the file, the ``'auto'`` option will -# automatically be used instead of storage-aligned Dask -# chunks. This occurs when: -# -# * The data array in the file is stored contiguously. -# -# * The data array in the file is compressed by convention -# (e.g. ragged array representations, compression by -# gathering, subsampled coordinates, etc.). In this case -# the Dask chunks are for the uncompressed data, and so -# cannot be aligned with the storage chunks of the -# compressed array in the file. -# -# * ``'storage-exact'`` -# -# Each Dask chunk will contain exactly one storage chunk -# and each storage chunk will lie within exactly one Dask -# chunk. -# -# For instance, consider a file variable that has an array -# of 64-bit floats with shape (400, 300, 60) and a storage -# chunk shape of (100, 5, 60) (i.e. there are 240 storage -# chunks, each of size 0.23 MiB). Then the storage-exact -# Dask chunks will also have shape (100, 5, 60) giving 240 -# Dask chunks with a maximum size of 0.23 MiB. -# -# There are, however, some occasions when, for particular -# data arrays in the file, the ``'auto'`` option will -# automatically be used instead of storage-exact Dask -# chunks. This occurs when: -# -# * The data array in the file is stored contiguously. -# -# * The data array in the file is compressed by convention -# (e.g. ragged array representations, compression by -# gathering, subsampled coordinates, etc.). In this case -# the Dask chunks are for the uncompressed data, and so -# cannot be aligned with the storage chunks of the -# compressed array in the file. -# -# * ``auto`` -# -# The Dask chunk size in bytes will be as close as -# possible to the size given by `cf.chunksize`, -# favouring square-like chunk shapes. This may give -# similar Dask chunk shapes as the ``'storage-aligned'`` -# option, but without the guarantee that each storage -# chunk will lie within exactly one Dask chunk. -# -# * A byte-size given by a `str` -# -# The Dask chunk size in bytes will be as close as -# possible to the given byte-size, favouring square-like -# chunk shapes. Any string value, accepted by the *chunks* -# parameter of the `dask.array.from_array` function is -# permitted. -# -# *Example:* -# A Dask chunksize of 2 MiB may be specified as -# ``'2097152'`` or ``'2 MiB'``. -# -# * `-1` or `None` -# -# There is no Dask chunking, i.e. every data array has one -# Dask chunk regardless of its size. -# -# * Positive `int` -# -# Every dimension of all Dask chunks has this number of -# elements. -# -# *Example:* -# For 3-dimensional data, *dask_chunks* of `10` will -# give Dask chunks with shape (10, 10, 10). -# -# * `dict` -# -# Each of dictionary key identifies a file dimension, with -# a value that defines the Dask chunking for that -# dimension whenever it is spanned by a data array. A file -# dimension is identified in one of three ways: -# -# 1. the netCDF dimension name, preceded by ``ncdim%`` -# (e.g. ``'ncdim%lat'``); -# -# 2. the value of the "standard name" attribute of a -# CF-netCDF coordinate variable that spans the -# dimension (e.g. ``'latitude'``); -# -# 3. the value of the "axis" attribute of a CF-netCDF -# coordinate variable that spans the dimension -# (e.g. ``'Y'``). -# -# The dictionary values may be a byte-size string, -# ``'auto'``, `int` or `None`, with the same meanings as -# those types for the *dask_chunks* parameter itself, but -# applying only to the specified dimension. In addition, a -# dictionary value may be a `tuple` or `list` of integers -# that sum to the dimension size. -# -# Not specifying a file dimension in the dictionary is -# equivalent to it being defined with a value of -# ``'auto'``. -# -# *Example:* -# ``{'T': '0.5 MiB', 'Z': 'auto', 'Y': [36, 37], 'X': -# None}`` -# -# *Example:* -# If a netCDF file contains dimensions ``time``, ``z``, -# ``lat`` and ``lon``, then ``{'ncdim%time': 12, -# 'ncdim%lat', None, 'ncdim%lon': None}`` will ensure -# that, for all applicable data arrays, all ``time`` -# axes have a `dask` chunksize of 12; all ``lat`` and -# ``lon`` axes are not `dask` chunked; and all ``z`` -# axes are `dask` chunked to comply as closely as -# possible with the default `dask` chunk size. -# -# If the netCDF file also contains a ``time`` coordinate -# variable with a "standard_name" attribute of -# ``'time'`` and an "axis" attribute of ``'T'``, then -# the same `dask` chunking could be specified with -# either ``{'time': 12, 'ncdim%lat', None, 'ncdim%lon': -# None}`` or ``{'T': 12, 'ncdim%lat', None, 'ncdim%lon': -# None}``. -# -# .. versionadded:: NEXTVERSION -# -# store_hdf5_chunks: `bool`, optional -# If True (the default) then store the HDF5 chunking -# strategy for each returned data array. The HDF5 chunking -# strategy is then accessible via an object's -# `nc_hdf5_chunksizes` method. When the HDF5 chunking -# strategy is stored, it will be used when the data is -# written to a new netCDF4 file with `cf.write` (unless -# the strategy was modified prior to writing). -# -# If False, or if the file being read is not in netCDF4 -# format, then no HDF5 chunking strategy is stored. -# (i.e. an `nc_hdf5_chunksizes` method will return `None` -# for all `Data` objects). In this case, when the data is -# written to a new netCDF4 file, the HDF5 chunking strategy -# will be determined by `cf.write`. -# -# See the `cf.write` *hdf5_chunks* parameter for details -# on how the HDF5 chunking strategy is determined at the -# time of writing. -# -# .. versionadded:: NEXTVERSION -# -# domain: `bool`, optional -# If True then return only the domain constructs that are -# explicitly defined by CF-netCDF domain variables, ignoring -# all CF-netCDF data variables. By default only the field -# constructs defined by CF-netCDF data variables are -# returned. -# -# CF-netCDF domain variables are only defined from CF-1.9, -# so older datasets automatically contain no CF-netCDF -# domain variables. -# -# The unique domain constructs of the dataset are easily -# found with the `cf.unique_constructs` function. For -# example:: -# -# >>> d = cf.read('file.nc', domain=True) -# >>> ud = cf.unique_constructs(d) -# >>> f = cf.read('file.nc') -# >>> ufd = cf.unique_constructs(x.domain for x in f) -# -# Domain constructs can not be read from UM or PP datasets. -# -# .. versionadded:: 3.11.0 -# -# cfa: `dict`, optional -# Configure the reading of CFA-netCDF files. The dictionary -# may have any subset of the following key/value pairs to -# override the information read from the file: -# -# * ``'substitutions'``: `dict` -# -# A dictionary whose key/value pairs define text -# substitutions to be applied to the fragment file -# names. Each key may be specified with or without the -# ``${*}`` syntax (where `*` represents any amount of any -# characters). For instance, ``{'substitution': -# 'replacement'}`` and ``{'${substitution}': 'replacement'}``' -# are equivalent. The substitutions are used in -# conjunction with, and take precedence over, any that are -# stored in the CFA-netCDF file by the ``substitutions`` -# attribute of the ``file`` fragement array variable. -# -# *Example:* -# ``{'replacement': 'file:///data/'}`` -# -# .. versionadded:: 3.15.0 -# -# netcdf_backend: `None` or `str`, optional -# Specify which library to use for reading netCDF files. By -# default, or if `None`, then the first one of `netCDF4` and -# `h5netcdf` to successfully open the file netCDF file is -# used. Setting *netcdf_backend* to one of ``'netCDF4'`` and -# ``'h5netcdf'`` will force the use of that library. -# -# .. note:: The *netcdf_backend* parameter does not affect -# the opening of netCDF fragment files that define -# the data of aggregation variables. For these, it -# is always the case that the first one of -# `netCDF4` and `h5netcdf` to successfully open -# the file is used. -# -# .. versionadded:: NEXTVERSION -# -# storage_options: `dict` or `None`, optional -# Pass parameters to the backend file system driver, such as -# username, password, server, port, etc. How the storage -# options are interpreted depends on the location of the -# file: -# -# **Local File System** -# -# Storage options are ignored for local files. -# -# **HTTP(S)** -# -# Storage options are ignored for files available across the -# network via OPeNDAP. -# -# **S3-compatible services** -# -# The backend used is `s3fs`, and the storage options are -# used to initialise an `s3fs.S3FileSystem` file system -# object. By default, or if `None`, then *storage_options* -# is taken as ``{}``. -# -# If the ``'endpoint_url'`` key is not in *storage_options*, -# nor in a dictionary defined by the ``'client_kwargs'`` key -# (both of which are the case when *storage_options* is -# `None`), then one will be automatically inserted for -# accessing an S3 file. For example, for a file name of -# ``'s3://store/data/file.nc'``, an ``'endpoint_url'`` key -# with value ``'https://store'`` would be created. To -# disable this, set ``'endpoint_url'`` to `None`. -# -# *Parameter example:* -# For a file name of ``'s3://store/data/file.nc'``, the -# following are equivalent: ``None``, ``{}``, -# ``{'endpoint_url': 'https://store'}``, and -# ``{'client_kwargs': {'endpoint_url': 'https://store'}}`` -# -# *Parameter example:* -# ``{'key': 'scaleway-api-key...', 'secret': -# 'scaleway-secretkey...', 'endpoint_url': -# 'https://s3.fr-par.scw.cloud', 'client_kwargs': -# {'region_name': 'fr-par'}}`` -# -# .. versionadded:: NEXTVERSION -# -# cache: `bool`, optional -# If True, the default, then cache the first and last array -# elements of metadata constructs (not field constructs) for -# fast future access. In addition, the second and -# penultimate array elements will be cached from coordinate -# bounds when there are two bounds per cell. For remote -# data, setting *cache* to False may speed up the parsing of -# the file. -# -# .. versionadded:: NEXTVERSION -# -# umversion: deprecated at version 3.0.0 -# Use the *um* parameter instead. -# -# height_at_top_of_model: deprecated at version 3.0.0 -# Use the *um* parameter instead. -# -# field: deprecated at version 3.0.0 -# Use the *extra* parameter instead. -# -# follow_symlinks: deprecated at version 3.0.0 -# Use the *followlinks* parameter instead. -# -# select_options: deprecated at version 3.0.0 -# Use methods on the returned `FieldList` instead. -# -# chunk: deprecated at version 3.14.0 -# Use the *dask_chunks* parameter instead. -# -# chunks: deprecated at version NEXTVERSION -# Use the *dask_chunks* parameter instead. -# -# :Returns: -# -# `FieldList` or `DomainList` -# The field or domain constructs found in the input -# dataset(s). The list may be empty. -# -# **Examples** -# -# >>> x = cf.read('file.nc') -# -# Read a file and create field constructs from CF-netCDF data -# variables as well as from the netCDF variables that correspond to -# particular types metadata constructs: -# -# >>> f = cf.read('file.nc', extra='domain_ancillary') -# >>> g = cf.read('file.nc', extra=['dimension_coordinate', -# ... 'auxiliary_coordinate']) -# -# Read a file that contains external variables: -# -# >>> h = cf.read('parent.nc') -# >>> i = cf.read('parent.nc', external='external.nc') -# >>> j = cf.read('parent.nc', external=['external1.nc', 'external2.nc']) -# -# >>> f = cf.read('file*.nc') -# >>> f -# [, -# , -# , -# ] -# -# >>> cf.read('file*.nc')[0:2] -# [, -# ] -# -# >>> cf.read('file*.nc')[-1] -# -# -# >>> cf.read('file*.nc', select='units=K') -# [, -# ] -# -# >>> cf.read('file*.nc', select='ncvar%ta') -# -# -# """ -# if field: -# _DEPRECATION_ERROR_FUNCTION_KWARGS( -# "cf.read", -# {"field": field}, -# "Use keyword 'extra' instead", -# removed_at="4.0.0", -# ) # pragma: no cover -# -# if select_options: -# _DEPRECATION_ERROR_FUNCTION_KWARGS( -# "cf.read", {"select_options": select_options}, removed_at="4.0.0" -# ) # pragma: no cover -# -# if follow_symlinks: -# _DEPRECATION_ERROR_FUNCTION_KWARGS( -# "cf.read", -# {"follow_symlinks": follow_symlinks}, -# "Use keyword 'followlink' instead.", -# removed_at="4.0.0", -# ) # pragma: no cover -# -# if height_at_top_of_model is not None: -# _DEPRECATION_ERROR_FUNCTION_KWARGS( -# "cf.read", -# {"height_at_top_of_model": height_at_top_of_model}, -# "Use keyword 'um' instead.", -# removed_at="4.0.0", -# ) # pragma: no cover -# -# if chunk is not True: -# _DEPRECATION_ERROR_FUNCTION_KWARGS( -# "cf.read", -# {"chunk": chunk}, -# "Use keyword 'dask_chunks' instead.", -# version="3.14.0", -# removed_at="5.0.0", -# ) # pragma: no cover -# -# if chunks != "auto": -# _DEPRECATION_ERROR_FUNCTION_KWARGS( -# "cf.read", -# {"chunk": chunk}, -# "Use keyword 'dask_chunks' instead.", -# version="3.14.0", -# removed_at="5.0.0", -# ) # pragma: no cover -# -# # Parse select -# if isinstance(select, (str, Query, Pattern)): -# select = (select,) -# -# # Manage input parameters where contradictions are possible: -# if cdl_string and fmt: -# if fmt == "CDL": -# if is_log_level_info(logger): -# logger.info( -# "It is not necessary to set the cf.read fmt as 'CDL' when " -# "cdl_string is True, since that implies CDL is the format." -# ) # pragma: no cover -# else: -# raise ValueError( -# "cdl_string can only be True when the format is CDL, though " -# "fmt is ignored in that case so there is no need to set it." -# ) -# if squeeze and unsqueeze: -# raise ValueError("squeeze and unsqueeze can not both be True") -# if follow_symlinks and not recursive: -# raise ValueError( -# f"Can't set follow_symlinks={follow_symlinks!r} " -# f"when recursive={recursive!r}" -# ) -# -# info = cfdm.is_log_level_info(logger) -# -# # Initialise the output list of fields/domains -# if domain: -# out = DomainList() -# else: -# out = FieldList() -# -# if isinstance(aggregate, dict): -# aggregate_options = aggregate.copy() -# aggregate = True -# else: -# aggregate_options = {} -# -# aggregate_options["copy"] = False -# -# # Parse the extra parameter -# if extra is None: -# extra = () -# elif isinstance(extra, str): -# extra = (extra,) -# -# ftypes = set() -# -# # Count the number of fields (in all files) and the number of -# # files -# field_counter = -1 -# file_counter = 0 -# -# if cdl_string: -# files2 = [] -# -# # 'files' input may be a single string or a sequence of them and to -# # handle both cases it is easiest to convert former to a one-item seq. -# if isinstance(files, str): -# files = [files] -# -# for cdl_file in files: -# c = tempfile.NamedTemporaryFile( -# mode="w", -# dir=tempfile.gettempdir(), -# prefix="cf_", -# suffix=".cdl", -# ) -# -# c_name = c.name -# with open(c_name, "w") as f: -# f.write(cdl_file) -# -# # ---------------------------------------------------------------- -# # Need to cache the TemporaryFile object so that it doesn't get -# # deleted too soon -# # ---------------------------------------------------------------- -# _cached_temporary_files[c_name] = c -# -# files2.append(c.name) -# -# files = files2 -# -# for file_glob in flat(files): -# # Expand variables -# file_glob = os.path.expanduser(os.path.expandvars(file_glob)) -# -# scheme = urlparse(file_glob).scheme -# if scheme in ("https", "http", "s3"): -# # Do not glob a remote URL -# files2 = (file_glob,) -# else: -# # Glob files on disk -# files2 = glob(file_glob) -# -# if not files2 and not ignore_read_error: -# open(file_glob, "rb") -# -# files3 = [] -# for x in files2: -# if isdir(x): -# # Walk through directories, possibly recursively -# for path, subdirs, filenames in os.walk( -# x, followlinks=followlinks -# ): -# files3.extend(os.path.join(path, f) for f in filenames) -# if not recursive: -# break -# else: -# files3.append(x) -# -# files2 = files3 -# -# for filename in files2: -# if info: -# logger.info(f"File: {filename}") # pragma: no cover -# -# if um: -# ftype = "UM" -# else: -# try: -# ftype = file_type(filename) -# except Exception as error: -# if not ignore_read_error: -# raise ValueError(error) -# -# logger.warning(f"WARNING: {error}") # pragma: no cover -# continue -# -# if domain and ftype == "UM": -# raise ValueError( -# f"Can't read PP/UM file {filename} into domain constructs" -# ) -# -# ftypes.add(ftype) -# -# # -------------------------------------------------------- -# # Read the file -# # -------------------------------------------------------- -# file_contents = _read_a_file( -# filename, -# ftype=ftype, -# external=external, -# ignore_read_error=ignore_read_error, -# verbose=verbose, -# warnings=warnings, -# aggregate=aggregate, -# aggregate_options=aggregate_options, -# selected_fmt=fmt, -# um=um, -# extra=extra, -# height_at_top_of_model=height_at_top_of_model, -# dask_chunks=dask_chunks, -# store_hdf5_chunks=store_hdf5_chunks, -# mask=mask, -# unpack=unpack, -# warn_valid=warn_valid, -# select=select, -# domain=domain, -# cfa=cfa, -# cfa_write=cfa_write, -# netcdf_backend=netcdf_backend, -# storage_options=storage_options, -# cache=cache, -# ) -# -# # -------------------------------------------------------- -# # Select matching fields (not from UM files, yet) -# # -------------------------------------------------------- -# if select and ftype != "UM": -# file_contents = file_contents.select_by_identity(*select) -# -# # -------------------------------------------------------- -# # Add this file's contents to that already read from other -# # files -# # -------------------------------------------------------- -# out.extend(file_contents) -# -# field_counter = len(out) -# file_counter += 1 -# -# if info: -# logger.info( -# f"Read {field_counter} field{_plural(field_counter)} from " -# f"{file_counter} file{_plural(file_counter)}" -# ) # pragma: no cover -# -# # ---------------------------------------------------------------- -# # Aggregate the output fields/domains -# # ---------------------------------------------------------------- -# if aggregate and len(out) > 1: -# org_len = len(out) # pragma: no cover -# -# out = cf_aggregate(out, **aggregate_options) -# -# n = len(out) # pragma: no cover -# if info: -# logger.info( -# f"{org_len} input field{_plural(org_len)} aggregated into " -# f"{n} field{_plural(n)}" -# ) # pragma: no cover -# -# # ---------------------------------------------------------------- -# # Sort by netCDF variable name -# # ---------------------------------------------------------------- -# if len(out) > 1: -# out.sort(key=lambda f: f.nc_get_variable("")) -# -# # ---------------------------------------------------------------- -# # Add standard names to UM/PP fields (post aggregation) -# # ---------------------------------------------------------------- -# for f in out: -# standard_name = f._custom.get("standard_name", None) -# if standard_name is not None: -# f.set_property("standard_name", standard_name, copy=False) -# del f._custom["standard_name"] -# -# # ---------------------------------------------------------------- -# # Select matching fields from UM/PP fields (post setting of -# # standard names) -# # ---------------------------------------------------------------- -# if select and "UM" in ftypes: -# out = out.select_by_identity(*select) -# -# # ---------------------------------------------------------------- -# # Squeeze size one dimensions from the data arrays. Do one of: -# # -# # 1) Squeeze the fields, i.e. remove all size one dimensions from -# # all field data arrays -# # -# # 2) Unsqueeze the fields, i.e. Include all size 1 domain -# # dimensions in the data array. -# # -# # 3) Nothing -# # ---------------------------------------------------------------- -# if not domain: -# if squeeze: -# for f in out: -# f.squeeze(inplace=True) -# elif unsqueeze: -# for f in out: -# f.unsqueeze(inplace=True) -# -# if nfields is not None and len(out) != nfields: -# raise ValueError( -# f"{nfields} field{_plural(nfields)} requested but " -# f"{len(out)} field/domain constucts found in " -# f"file{_plural(file_counter)}" -# ) -# -# return out -# -# -# def _plural(n): # pragma: no cover -# """Return a suffix which reflects a word's plural.""" -# return "s" if n != 1 else "" # pragma: no cover -# -# -# @_manage_log_level_via_verbosity -# def _read_a_file( -# filename, -# ftype=None, -# aggregate=True, -# aggregate_options=None, -# ignore_read_error=False, -# verbose=None, -# warnings=False, -# external=None, -# selected_fmt=None, -# um=None, -# extra=None, -# height_at_top_of_model=None, -# mask=True, -# unpack=True, -# warn_valid=False, -# dask_chunks="storage-aligned", -# store_hdf5_chunks=True, -# select=None, -# domain=False, -# cfa=None, -# cfa_write=None, -# netcdf_backend=None, -# storage_options=None, -# cache=True, -# ): -# """Read the contents of a single file into a field list. -# -# :Parameters: -# -# filename: `str` -# See `cf.read` for details. -# -# ftype: `str` -# The file format to interpret the file. Recognised formats are -# ``'netCDF'``, ``'CDL'``, ``'UM'`` and ``'PP'``. -# -# aggregate_options: `dict`, optional -# See `cf.read` for details. -# -# ignore_read_error: `bool`, optional -# See `cf.read` for details. -# -# mask: `bool`, optional -# See `cf.read` for details. -# -# unpack: `bool`, optional -# See `cf.read` for details. -# -# verbose: `int` or `str` or `None`, optional -# See `cf.read` for details. -# -# select: optional -# For `read. Ignored for a netCDF file. -# -# domain: `bool`, optional -# See `cf.read` for details. -# -# cfa: `dict`, optional -# See `cf.read` for details. -# -# .. versionadded:: 3.15.0 -# -# storage_options: `dict` or `None`, optional -# See `cf.read` for details. -# -# .. versionadded:: NEXTVERSION -# -# netcdf_backend: `str` or `None`, optional -# See `cf.read` for details. -# -# .. versionadded:: NEXTVERSION -# -# cache: `bool`, optional -# See `cf.read` for details. -# -# .. versionadded:: NEXTVERSION -# -# :Returns: -# -# `FieldList` or `DomainList` -# The field or domain constructs in the dataset. -# -# """ -# if aggregate_options is None: -# aggregate_options = {} -# -# # Find this file's type -# fmt = None -# word_size = None -# endian = None -# height_at_top_of_model = None -# umversion = 405 -# -# if um: -# fmt = um.get("fmt") -# word_size = um.get("word_size") -# endian = um.get("endian") -# umversion = um.get("version", umversion) -# height_at_top_of_model = um.get("height_at_top_of_model") -# -# if fmt is not None: -# fmt = fmt.upper() -# -# if umversion is not None: -# umversion = float(str(umversion).replace(".", "0", 1)) -# -# extra_read_vars = { -# "fmt": selected_fmt, -# "ignore_read_error": ignore_read_error, -# } -# -# # ---------------------------------------------------------------- -# # Still here? Read the file into fields or domains. -# # ---------------------------------------------------------------- -# originally_cdl = ftype == "CDL" -# if originally_cdl: -# # Create a temporary netCDF file from input CDL -# ftype = "netCDF" -# cdl_filename = filename -# filename = netcdf.cdl_to_netcdf(filename) -# extra_read_vars["fmt"] = "NETCDF" -# -# if not netcdf.is_netcdf_file(filename): -# error_msg = ( -# f"Can't determine format of file {filename} generated " -# f"from CDL file {cdl_filename}" -# ) -# if ignore_read_error: -# logger.warning(error_msg) # pragma: no cover -# return FieldList() -# else: -# raise IOError(error_msg) -# -# if ftype == "netCDF" and extra_read_vars["fmt"] in (None, "NETCDF", "CFA"): -# # See https://github.com/NCAS-CMS/cfdm/issues/128 for context on the -# # try/except here, which acts as a temporary fix pending decisions on -# # the best way to handle CDL with only header or coordinate info. -# try: -# # out = netcdf.read( -# out = netcdf.read( -# filename, -# external=external, -# extra=extra, -# verbose=verbose, -# warnings=warnings, -# extra_read_vars=extra_read_vars, -# mask=mask, -# unpack=unpack, -# warn_valid=warn_valid, -# domain=domain, -# storage_options=storage_options, -# netcdf_backend=netcdf_backend, -# dask_chunks=dask_chunks, -# store_hdf5_chunks=store_hdf5_chunks, -# cache=cache, -# cfa=cfa, -# cfa_write=cfa_write, -# ) -# except MaskError: -# # Some data required for field interpretation is missing, -# # manifesting downstream as a NumPy MaskError. -# if originally_cdl: -# raise ValueError( -# "Unable to convert CDL without data to field construct(s) " -# "because there is insufficient information provided by " -# "the header and/or coordinates alone in this case." -# ) -# else: -# raise ValueError( -# "Unable to convert netCDF to field or domain construct " -# "because there is missing data." -# ) -# -# elif ftype == "UM" and extra_read_vars["fmt"] in (None, "UM"): -# if domain: -# raise ValueError( -# "Can't set domain=True when reading UM or PP datasets" -# ) -# -# out = UM.read( -# filename, -# um_version=umversion, -# verbose=verbose, -# set_standard_name=False, -# height_at_top_of_model=height_at_top_of_model, -# fmt=fmt, -# word_size=word_size, -# endian=endian, -# select=select, -# ) -# -# # PP fields are aggregated intrafile prior to interfile -# # aggregation -# if aggregate: -# # For PP fields, the default is strict_units=False -# if "strict_units" not in aggregate_options: -# aggregate_options["relaxed_units"] = True -# -# # ---------------------------------------------------------------- -# # Return the fields -# # ---------------------------------------------------------------- -# if domain: -# return DomainList(out) -# -# return FieldList(out) -# -# -# def file_type(filename): -# """Return the file format. -# -# :Parameters: -# -# filename: `str` -# The file name. -# -# :Returns: -# -# `str` -# The format type of the file. One of ``'netCDF'``, ``'UM'`` -# or ``'CDL'``. -# -# **Examples** -# -# >>> file_type(filename) -# 'netCDF' -# -# """ -# # ---------------------------------------------------------------- -# # NetCDF -# # ---------------------------------------------------------------- -# if netcdf.is_netcdf_file(filename): -# return "netCDF" -# -# # ---------------------------------------------------------------- -# # PP or FF -# # ---------------------------------------------------------------- -# if UM.is_um_file(filename): -# return "UM" -# -# # ---------------------------------------------------------------- -# # CDL -# # ---------------------------------------------------------------- -# if netcdf.is_cdl_file(filename): -# return "CDL" -# -# # Still here? -# raise IOError(f"Can't determine format of file {filename}") diff --git a/cf/read_write/write.py b/cf/read_write/write.py index aadaff6273..b6b3f55ae9 100644 --- a/cf/read_write/write.py +++ b/cf/read_write/write.py @@ -1,781 +1,7 @@ -import numpy +import cfdm from ..cfimplementation import implementation -from ..decorators import _manage_log_level_via_verbosity -from ..functions import ( - _DEPRECATION_ERROR_FUNCTION_KWARG, - _DEPRECATION_ERROR_FUNCTION_KWARG_VALUE, - flat, -) -from .netcdf import NetCDFWrite -netcdf = NetCDFWrite(implementation()) - -@_manage_log_level_via_verbosity -def write( - fields, - filename, - fmt="NETCDF4", - mode="w", - overwrite=True, - global_attributes=None, - file_descriptors=None, - external=None, - Conventions=None, - datatype=None, - least_significant_digit=None, - endian="native", - compress=0, - fletcher32=False, - shuffle=True, - reference_datetime=None, - verbose=None, - cfa="auto", - single=None, - double=None, - variable_attributes=None, - string=True, - warn_valid=True, - group=True, - coordinates=False, - omit_data=None, - hdf5_chunks="4 MiB", - cfa_options=None, -): - """Write field constructs to a netCDF file. - - **File format** - - See the *fmt* parameter for details on which output netCDF file - formats are supported. - - - **NetCDF variable and dimension names** - - These names are stored within constructs read a from dataset, or - may be set manually. They are used when writing a field construct - to the file. If a name has not been set then one will be - constructed (usually based on the standard name if it exists). The - names may be modified internally to prevent duplication in the - file. - - Each construct, or construct component, that corresponds to a - netCDF variable has the following methods to get, set and remove a - netCDF variable name: `!nc_get_variable`, `!nc_set_variable` and - `!nc_del_variable` method - - The domain axis construct has the following methods to get, set - and remove a netCDF dimension name: - `~cf.DomainAxis.nc_get_dimension`, - `~cf.DomainAxis.nc_set_dimension` and - `~cf.DomainAxis.nc_del_dimension`. - - - **NetCDF attributes** - - Field construct properties may be written as netCDF global - attributes and/or netCDF data variable attributes. See the - *file_descriptors*, *global_attributes* and *variable_attributes* - parameters for details. - - - **External variables** - - Metadata constructs marked as external are omitted from the file - and referred to via the netCDF "external_variables" global - attribute. However, omitted constructs may be written to an - external file (see the *external* parameter for details). - - - **NetCDF unlimited dimensions** - - Domain axis constructs that correspond to NetCDF unlimited - dimensions may be accessed with the - `~cf.DomainAxis.nc_is_unlimited` and - `~cf.DomainAxis.nc_set_unlimited` methods of a domain axis - construct. - - - **NetCDF-4 hierarchical groups** - - Hierarchical groups in CF provide a mechanism to structure - variables within netCDF-4 datasets with well defined rules for - resolving references to out-of-group netCDF variables and - dimensions. The group structure defined by a field construct's - netCDF interface will, by default, be recreated in the output - dataset. See the *group* parameter for details. - - - **NetCDF-4 HDF chunk sizes** - - HDF5 chunksizes may be set on contruct's data. See the - `~cf.Data.nc_hdf5_chunksizes`, `~cf.Data.nc_clear_hdf5_chunksizes` - and `~cf.Data.nc_set_hdf5_chunksizes` methods of a `Data` - instance. - - .. seealso:: `cf.read` - - :Parameters: - - fields: (arbitrarily nested sequence of) `Field` or `FieldList` - The field constructs to write to the file. - - filename: `str` - The output netCDF file name. Various type of expansion are - applied to the file names. - - Relative paths are allowed, and standard tilde and shell - parameter expansions are applied to the string. - - *Parameter example:* - The file file.nc in the user’s home directory could be - described by any of the following: '``$HOME/file.nc'``, - ``'${HOME}/file.nc'``, ``'~/file.nc'``, - ``'~/tmp/../file.nc'``. - - fmt: `str`, optional - The format of the output file. One of: - - ========================== ============================== - *fmt* Output file type - ========================== ============================== - ``'NETCDF4'`` NetCDF4 format file. This is - the default. - - ``'NETCDF4_CLASSIC'`` NetCDF4 classic format file - (see below) - - ``'NETCDF3_CLASSIC'`` NetCDF3 classic format file - (limited to file sizes less - than 2GB). - - ``'NETCDF3_64BIT_OFFSET'`` NetCDF3 64-bit offset format - file - - ``'NETCDF3_64BIT'`` An alias for - ``'NETCDF3_64BIT_OFFSET'`` - - ``'NETCDF3_64BIT_DATA'`` NetCDF3 64-bit offset format - file with extensions (see - below) - - ``'CFA'`` or ``'CFA4'`` Deprecated at version - 3.15.0. See the *cfa* - parameter. - - ``'CFA3'`` Deprecated at version - 3.15.0. See the *cfa* - parameter. - ========================== ============================== - - By default the format is ``'NETCDF4'``. - - ``'NETCDF3_64BIT_DATA'`` is a format that requires version - 4.4.0 or newer of the C library (use `cf.environment` to - see which version if the netCDF-C library is in use). It - extends the ``'NETCDF3_64BIT_OFFSET'`` binary format to - allow for unsigned/64 bit integer data types and 64-bit - dimension sizes. - - ``'NETCDF4_CLASSIC'`` files use the version 4 disk format - (HDF5), but omits features not found in the version 3 - API. They can be read by HDF5 clients. They can also be - read by netCDF3 clients only if they have been re-linked - against the netCDF4 library. - - ``'NETCDF4'`` files use the version 4 disk format (HDF5) - and use the new features of the version 4 API. - - mode: `str`, optional - Specify the mode of write access for the output file. One of: - - ======== ================================================= - *mode* Description - ======== ================================================= - ``'w'`` Open a new file for writing to. If it exists and - *overwrite* is True then the file is deleted - prior to being recreated. - - ``'a'`` Open an existing file for appending new - information to. The new information will be - incorporated whilst the original contents of the - file will be preserved. - - In practice this means that new fields will be - created, whilst the original fields will not be - edited at all. Coordinates on the fields, where - equal, will be shared as standard. - - For append mode, note the following: - - * Global attributes on the file - will remain the same as they were originally, - so will become inaccurate where appended fields - have incompatible attributes. To rectify this, - manually inspect and edit them as appropriate - after the append operation using methods such as - `nc_clear_global_attributes` and - `nc_set_global_attribute`. - - * Fields with incompatible ``featureType`` to - the original file cannot be appended. - - * At present fields with groups cannot be - appended, but this will be possible in a future - version. Groups can however be cleared, the - fields appended, and groups re-applied, via - methods such as `nc_clear_variable_groups` and - `nc_set_variable_groups`, to achieve the same - for now. - - * At present domain ancillary constructs of - appended fields may not be handled correctly - and can appear as extra fields. Set them on the - resultant fields using `set_domain_ancillary` - and similar methods if required. - - ``'r+'`` Alias for ``'a'``. - - ======== ================================================= - - By default the file is opened with write access mode - ``'w'``. - - overwrite: `bool`, optional - If False then raise an error if the output file - pre-exists. By default a pre-existing output file is - overwritten. - - Conventions: (sequence of) `str`, optional - Specify conventions to be recorded by the netCDF global - "Conventions" attribute. By default the current - conventions are always included, but if an older CF - conventions is defined then this is used instead. - - *Parameter example:* - ``Conventions='UGRID-1.0'`` - - *Parameter example:* - ``Conventions=['UGRID-1.0']`` - - *Parameter example:* - ``Conventions=['CMIP-6.2', 'UGRID-1.0']`` - - *Parameter example:* - ``Conventions='CF-1.7'`` - - *Parameter example:* - ``Conventions=['CF-1.7', 'UGRID-1.0']`` - - Note that if the "Conventions" property is set on a field - construct then it is ignored. - - file_descriptors: `dict`, optional - Create description of file contents netCDF global - attributes from the specified attributes and their - values. - - If any field construct has a property with the same name - then it will be written as a netCDF data variable - attribute, even if it has been specified by the - *global_attributes* parameter, or has been flagged as - global on any of the field constructs (see - `cf.Field.nc_global_attributes` for details). - - Identification of the conventions being adhered to by the - file are not specified as a file descriptor, but by the - *Conventions* parameter instead. - - *Parameter example:* - ``file_attributes={'title': 'my data'}`` - - *Parameter example:* - ``file_attributes={'history': 'created 2019-01-01', 'foo': 'bar'}`` - - global_attributes: (sequence of) `str`, optional - Create netCDF global attributes from the specified field - construct properties, rather than netCDF data variable - attributes. - - These attributes are in addition to the following field - construct properties, which are created as netCDF global - attributes by default: - - * the description of file contents properties (as defined - by the CF conventions), and - - * properties flagged as global on any of the field - constructs being written (see - `cf.Field.nc_global_attributes` for details). - - Note that it is not possible to create a netCDF global - attribute from a property that has different values for - different field constructs being written. In this case - the property will not be written as a netCDF global - attribute, even if it has been specified by the - *global_attributes* parameter or is one of the default - properties, but will appear as an attribute on the netCDF - data variable corresponding to each field construct that - contains the property. - - Any global attributes that are also specified as file - descriptors will not be written as netCDF global - variables, but as netCDF data variable attributes - instead. - - *Parameter example:* - ``global_attributes='project'`` - - *Parameter example:* - ``global_attributes=['project']`` - - *Parameter example:* - ``global_attributes=['project', 'experiment']`` - - variable_attributes: (sequence of) `str`, optional - Create netCDF data variable attributes from the specified - field construct properties. - - By default, all field construct properties that are not - created as netCDF global properties are created as - attributes netCDF data variables. See the - *global_attributes* parameter for details. - - Any field construct property named by the - *variable_attributes* parameter will always be created as - a netCDF data variable attribute - - *Parameter example:* - ``variable_attributes='project'`` - - *Parameter example:* - ``variable_attributes=['project']`` - - *Parameter example:* - ``variable_attributes=['project', 'doi']`` - - external: `str`, optional - Write metadata constructs that have data and are marked as - external to the named external file. Ignored if there are - no such constructs. - - endian: `str`, optional - The endian-ness of the output file. Valid values are - ``'little'``, ``'big'`` or ``'native'``. By default the - output is native endian. See the `netCDF4 package - `_ for more - details. - - *Parameter example:* - ``endian='big'`` - - compress: `int`, optional - Regulate the speed and efficiency of compression. Must be - an integer between ``0`` and ``9``. ``0`` means no - compression; ``1`` is the fastest, but has the lowest - compression ratio; ``9`` is the slowest but best - compression ratio. The default value is ``0``. An error is - raised if compression is requested for a netCDF3 output - file format. See the `netCDF4 package - `_ for more - details. - - *Parameter example:* - ``compress=4`` - - least_significant_digit: `int`, optional - Truncate the input field construct data arrays, but not - the data arrays of metadata constructs. For a given - positive integer, N the precision that is retained in the - compressed data is 10 to the power -N. For example, a - value of 2 will retain a precision of 0.01. In conjunction - with the *compress* parameter this produces 'lossy', but - significantly more efficient, compression. See the - `netCDF4 package - `_ for more - details. - - *Parameter example:* - ``least_significant_digit=3`` - - fletcher32: `bool`, optional - If True then the Fletcher-32 HDF5 checksum algorithm is - activated to detect compression errors. Ignored if - *compress* is ``0``. See the `netCDF4 package - `_ for details. - - shuffle: `bool`, optional - If True (the default) then the HDF5 shuffle filter (which - de-interlaces a block of data before compression by - reordering the bytes by storing the first byte of all of a - variable's values in the chunk contiguously, followed by - all the second bytes, and so on) is turned off. By default - the filter is applied because if the data array values are - not all wildly different, using the filter can make the - data more easily compressible. Ignored if the *compress* - parameter is ``0`` (which is its default value). See the - `netCDF4 package - `_ for more - details. - - datatype: `dict`, optional - Specify data type conversions to be applied prior to - writing data to disk. This may be useful as a means of - packing, or because the output format does not support a - particular data type (for example, netCDF3 classic files - do not support 64-bit integers). By default, input data - types are preserved. Any data type conversion is only - applied to the arrays on disk, and not to the input field - constructs themselves. - - Data types conversions are defined by `numpy.dtype` - objects in a dictionary whose keys are input data types - with values of output data types. - - *Parameter example:* - To convert 64-bit integers to 32-bit integers: - ``datatype={numpy.dtype('int64'): - numpy.dtype('int32')}``. - - single: `bool`, optional - If True then write 64-bit floats as 32-bit floats and - 64-bit integers as 32-bit integers. - - If False then write 32-bit floats as 64-bit floats and - 32-bit integers as 64-bit integers. - - By default, input data types are preserved. - - .. note:: ``single=True`` is exactly equivalent to - ``double=False``, as well as - ``datatype={numpy.dtype(float): - numpy.dtype('float32'), numpy.dtype(int): - numpy.dtype('int32')}``. - - ``single=False`` is exactly equivalent to - ``double=True``. - - double: `bool`, optional - If True then write 32-bit floats as 64-bit floats and - 32-bit integers as 64-bit integers. - - If False then write 64-bit floats as 32-bit floats and - 64-bit integers as 32-bit integers. - - By default, input data types are preserved. - - .. note:: ``double=True`` is exactly equivalent to - ``single=False``, as well as - ``datatype={numpy.dtype('float32'): - numpy.dtype(float), numpy.dtype('int32'): - numpy.dtype(int)}``. - - ``double=False`` is exactly equivalent to - ``single=True``. - - string: `bool`, optional - By default string-valued construct data are written as - netCDF arrays of type string if the output file format is - ``'NETCDF4'``, or of type char with an extra dimension - denoting the maximum string length for any other output - file format (see the *fmt* parameter). If *string* is False - then string-valued construct data are written as netCDF - arrays of type char with an extra dimension denoting the - maximum string length, regardless of the selected output - file format. - - verbose: `int` or `str` or `None`, optional - If an integer from ``-1`` to ``3``, or an equivalent string - equal ignoring case to one of: - - * ``'DISABLE'`` (``0``) - * ``'WARNING'`` (``1``) - * ``'INFO'`` (``2``) - * ``'DETAIL'`` (``3``) - * ``'DEBUG'`` (``-1``) - - set for the duration of the method call only as the minimum - cut-off for the verboseness level of displayed output (log) - messages, regardless of the globally-configured `cf.log_level`. - Note that increasing numerical value corresponds to increasing - verbosity, with the exception of ``-1`` as a special case of - maximal and extreme verbosity. - - Otherwise, if `None` (the default value), output messages will - be shown according to the value of the `cf.log_level` setting. - - Overall, the higher a non-negative integer or equivalent string - that is set (up to a maximum of ``3``/``'DETAIL'``) for - increasing verbosity, the more description that is printed to - convey how constructs map to output netCDF dimensions, variables - and attributes. - - warn_valid: `bool`, optional - If False then do not print a warning when writing - "out-of-range" data, as indicated by the values, if - present, of any of the ``valid_min``, ``valid_max`` or - ``valid_range`` properties on field and metadata - constructs that have data. By default a warning is printed - if any such construct has any of these properties in - combination with out-of-range data. - - The consequence of writing out-of-range data values is - that, by default, these values will be masked when the - file is subsequently read. - - *Parameter example:* - If a construct has ``valid_max`` property with value - ``100`` and data with maximum value ``999``, then the - resulting warning may be suppressed by setting - ``warn_valid=False``. - - .. versionadded:: 3.4.0 - - group: `bool`, optional - If False then create a "flat" netCDF file, i.e. one with - only the root group, regardless of any group structure - specified by the field constructs. By default any groups - defined by the netCDF interface of the field constructs - and its components will be created and populated. - - .. versionadded:: 3.6.0 - - coordinates: `bool`, optional - If True then include CF-netCDF coordinate variable names - in the 'coordinates' attribute of output data - variables. By default only auxiliary and scalar coordinate - variables are included. - - .. versionadded:: 3.7.0 - - omit_data: (sequence of) `str`, optional - Do not write the data of the named construct types. - - This does not affect the amount of netCDF variables and - dimensions that are written to the file, nor the netCDF - variables' attributes, but does not create data on disk - for the requested variables. The resulting file will be - smaller than it otherwise would have been, and when the - new file is read then the data of these variables will be - represented by an array of all missing data. - - The *omit_data* parameter may be one, or a sequence, of: - - ========================== =============================== - *omit_data* Construct types - ========================== =============================== - ``'field'`` Field constructs - ``'field_ancillary'`` Field ancillary constructs - ``'domain_ancillary'`` Domain ancillary constructs - ``'dimension_coordinate'`` Dimension coordinate constructs - ``'auxiliary_coordinate'`` Auxiliary coordinate constructs - ``'cell_measure'`` Cell measure constructs - ``'all'`` All of the above constructs - ========================== =============================== - - *Parameter example:* - To omit the data from only field constructs: - ``omit_data='field'`` or ``omit_data=['field']``. - - *Parameter example:* - To omit the data from domain ancillary and cell measure - constructs: ``omit_data=['domain_ancillary', - 'cell_measure']``. - - .. versionadded:: 3.14.0 - - cfa: `bool` or `dict`, optional - If True or a (possibly empty) dictionary then write the - constructs as CFA-netCDF aggregation variables, where - possible and where requested. - - The netCDF format of the CFA-netCDF file is determined by - the *fmt* parameter, as usual. - - If *cfa* is a dictionary then it is used to configure the - CFA write process. The default options when CFA writing is - enabled are ``{'constructs': 'field', 'absolute_paths': - True, 'strict': True, 'substitutions': {}}``, and the - dictionary may have any subset of the following key/value - pairs to override these defaults: - - * ``'constructs'``: `dict` or (sequence of) `str` - - The types of construct to be written as CFA-netCDF - aggregation variables. By default only field constructs - are written as CFA-netCDF aggregation variables. - - The types may be given as a (sequence of) `str`, which - may take any of the values allowed by the *omit_data* - parameter. Alternatively, the same types may be given as - keys to a `dict` whose values specify the number of - dimensions that a construct must also have if it is to - be written as a CFA-netCDF aggregation variable. A value - of `None` means no restriction on the number of - dimensions, which is equivalent to a value of - ``cf.ge(0)``. - - *Example:* - Equivalent ways to only write cell measure constructs - as CFA-netCDF aggregation variables: - ``'cell_measure``, ``['cell_measure']``, - ``{'cell_measure': None}``, ``{'cell_measure': - cf.ge(0)}`` - - *Example:* - Equivalent ways to only write field and auxiliary - coordinate constructs as CFA-netCDF aggregation - variables: ``('field', 'auxiliary_coordinate')`` and - ``{'field': None, 'auxiliary_coordinate': None}``. - - *Example:* - Equivalent ways to only write two-dimensional - auxiliary coordinate constructs as CFA-netCDF - aggregation variables: ``{'auxiliary_coordinate': - 2}`` and ``{'auxiliary_coordinate': cf.eq(2)}``. - - *Example:* - Only write auxiliary coordinate constructs with two or - more dimensions as CFA-netCDF variables, and also all - field constructs: ``{'field': None, - 'auxiliary_coordinate': cf.ge(2)}``. - - * ``'absolute_paths'``: `bool` - - How to write fragment file names. Set to True (the - default) for them to be written as fully qualified URIs, - or else set to False for them to be written as local - paths relative to the location of the CFA-netCDF file - being created. - - * ``'strict'``: `bool` - - If True (the default) then an exception is raised if it - is not possible to create a CFA aggregation variable - from data identified by the ``'constructs'`` option. If - False then a normal CF-netCDF variable will be written - in this case. - - * ``'substitutions'``: `dict` - - A dictionary whose key/value pairs define text - substitutions to be applied to the fragment file - names. Each key may be specified with or without the - ``${...}`` syntax. For instance, the following are - equivalent: ``{'base': 'sub'}``, ``{'${base}': 'sub'}``. - The substitutions are used in conjunction with, and take - precedence over, any that are also defined on individual - constructs (see `cf.Data.cfa_update_file_substitutions` - for details). - - Substitutions are stored in the output file by the - ``substitutions`` attribute of the ``file`` CFA - aggregation instruction variable. - - *Example:* - ``{'base': 'file:///data/'}`` - - .. versionadded:: 3.15.0 - - cfa_options: Deprecated at version 3.15.0 - Use the *cfa* parameter instead. - - :Returns: - - `None` - - **Examples** - - There are further worked examples - :ref:`in the tutorial `. - - >>> cf.write(f, 'file.nc') - - >>> cf.write(f, 'file.nc', fmt='NETCDF3_CLASSIC') - - >>> cf.write(f, 'file.nc', external='cell_measures.nc') - - >>> cf.write(f, 'file.nc', Conventions='CMIP-6.2') - - """ - if fmt in ("CFA", "CFA4", "CFA3"): - return _DEPRECATION_ERROR_FUNCTION_KWARG_VALUE( - "cf.write", - "fmt", - fmt, - "Use the 'cfa' keyword instead.", - version="3.15.0", - removed_at="5.0.0", - ) # pragma: no cover - - if cfa_options is not None: - return _DEPRECATION_ERROR_FUNCTION_KWARG( - "cf.write", - "cfa_options", - "Use keyword 'cfa' instead.", - version="3.15.0", - removed_at="5.0.0", - ) # pragma: no cover - - # Flatten the sequence of intput fields - fields = tuple(flat(fields)) - if fields: - # double and single - if datatype: - if single is not None: - raise ValueError("Can't set datatype and single") - if double is not None: - raise ValueError("Can't set datatype and double") - - if single is not None and double is not None: - raise ValueError("Can't set both the single and double parameters") - - if single is not None and not single: - double = True - - if double is not None and not double: - single = True - - if single: - datatype = { - numpy.dtype(float): numpy.dtype("float32"), - numpy.dtype(int): numpy.dtype("int32"), - } - - if double: - datatype = { - numpy.dtype("float32"): numpy.dtype(float), - numpy.dtype("int32"): numpy.dtype(int), - } - - # Extra write variables - extra_write_vars = {"reference_datetime": reference_datetime} - - netcdf.write( - fields, - filename, - fmt=fmt, - mode=mode, - overwrite=overwrite, - global_attributes=global_attributes, - variable_attributes=variable_attributes, - file_descriptors=file_descriptors, - external=external, - Conventions=Conventions, - datatype=datatype, - least_significant_digit=least_significant_digit, - endian=endian, - compress=compress, - shuffle=shuffle, - fletcher32=fletcher32, - verbose=verbose, - string=string, - warn_valid=warn_valid, - group=group, - coordinates=coordinates, - extra_write_vars=extra_write_vars, - omit_data=omit_data, - hdf5_chunks=hdf5_chunks, - cfa=cfa, - ) +class write(cfdm.write): + implementation = implementation() diff --git a/cf/test/test_read_write.py b/cf/test/test_read_write.py index 613280eaf3..4c61f3e542 100644 --- a/cf/test/test_read_write.py +++ b/cf/test/test_read_write.py @@ -570,36 +570,24 @@ def test_write_datatype(self): datatype={np.dtype(float): np.dtype("float32")}, ) g = cf.read(tmpfile)[0] - self.assertEqual( - g.dtype, - np.dtype("float32"), - "datatype read in is " + str(g.dtype), - ) + self.assertEqual(g.dtype, np.dtype("float32")) # Keyword single f = cf.read(self.filename)[0] self.assertEqual(f.dtype, np.dtype(float)) cf.write(f, tmpfile, fmt="NETCDF4", single=True) g = cf.read(tmpfile)[0] - self.assertEqual( - g.dtype, - np.dtype("float32"), - "datatype read in is " + str(g.dtype), - ) + self.assertEqual(g.dtype, np.dtype("float32")) # Keyword double f = g self.assertEqual(f.dtype, np.dtype("float32")) - cf.write(f, tmpfile2, fmt="NETCDF4", double=True) - g = cf.read(tmpfile2)[0] - self.assertEqual( - g.dtype, np.dtype(float), "datatype read in is " + str(g.dtype) - ) + cf.write(f, tmpfile1, fmt="NETCDF4", double=True) + g = cf.read(tmpfile1)[0] + self.assertEqual(g.dtype, np.dtype(float)) - for single in (True, False): - for double in (True, False): - with self.assertRaises(Exception): - cf.write(g, double=double, single=single) + with self.assertRaises(Exception): + cf.write(g, double=True, single=True) datatype = {np.dtype(float): np.dtype("float32")} with self.assertRaises(Exception): From f2675afd759d699e2655d0f205f7ec1fd83b1105 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 19 Nov 2024 14:03:29 +0000 Subject: [PATCH 15/51] dev --- cf/read_write/netcdf/__init__.py | 1 - cf/read_write/netcdf/netcdfwrite.py | 154 ---------------------------- cf/read_write/read.py | 6 +- 3 files changed, 3 insertions(+), 158 deletions(-) delete mode 100644 cf/read_write/netcdf/__init__.py delete mode 100644 cf/read_write/netcdf/netcdfwrite.py diff --git a/cf/read_write/netcdf/__init__.py b/cf/read_write/netcdf/__init__.py deleted file mode 100644 index b443940c43..0000000000 --- a/cf/read_write/netcdf/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .netcdfwrite import NetCDFWrite diff --git a/cf/read_write/netcdf/netcdfwrite.py b/cf/read_write/netcdf/netcdfwrite.py deleted file mode 100644 index dbba9488dd..0000000000 --- a/cf/read_write/netcdf/netcdfwrite.py +++ /dev/null @@ -1,154 +0,0 @@ -import cfdm - - -class NetCDFWrite(cfdm.read_write.netcdf.NetCDFWrite): - """A container for writing Fields to a netCDF dataset.""" - - def _write_dimension_coordinate( - self, f, key, coord, ncdim=None, coordinates=None - ): - """Write a coordinate variable and its bound variable to the - file. - - This also writes a new netCDF dimension to the file and, if - required, a new netCDF dimension for the bounds. - - .. versionadded:: 3.0.0 - - :Parameters: - - f: Field construct - - key: `str` - - coord: Dimension coordinate construct - - ncdim: `str` or `None` - The name of the netCDF dimension for this dimension - coordinate construct, including any groups - structure. Note that the group structure may be - different to the coordinate variable, and the - basename. - - .. versionadded:: 3.6.0 - - coordinates: `list` - This list may get updated in-place. - - .. versionadded:: 3.7.0 - - :Returns: - - `str` - The netCDF name of the dimension coordinate. - - """ - coord = self._change_reference_datetime(coord) - - return super()._write_dimension_coordinate( - f, key, coord, ncdim=ncdim, coordinates=coordinates - ) - - def _write_scalar_coordinate( - self, f, key, coord_1d, axis, coordinates, extra=None - ): - """Write a scalar coordinate and its bounds to the netCDF file. - - It is assumed that the input coordinate has size 1, but this is - not checked. - - If an equal scalar coordinate has already been written to the file - then the input coordinate is not written. - - .. versionadded:: 3.0.0 - - :Parameters: - - f: Field construct - - key: `str` - The coordinate construct key - - coord_1d: Coordinate construct - - axis: `str` - The field's axis identifier for the scalar coordinate. - - coordinates: `list` - - :Returns: - - coordinates: `list` - The updated list of netCDF auxiliary coordinate names. - - """ - if extra is None: - extra = {} - - coord_1d = self._change_reference_datetime(coord_1d) - - return super()._write_scalar_coordinate( - f, key, coord_1d, axis, coordinates, extra=extra - ) - - def _write_auxiliary_coordinate(self, f, key, coord, coordinates): - """Write auxiliary coordinates and bounds to the netCDF file. - - If an equal auxiliary coordinate has already been written to the - file then the input coordinate is not written. - - .. versionadded:: 3.0.0 - - :Parameters: - - f: Field construct - - key: `str` - - coord: Coordinate construct - - coordinates: `list` - - :Returns: - - coordinates: `list` - The list of netCDF auxiliary coordinate names updated in - place. - - **Examples:** - - >>> coordinates = _write_auxiliary_coordinate(f, 'aux2', coordinates) - - """ - coord = self._change_reference_datetime(coord) - - return super()._write_auxiliary_coordinate(f, key, coord, coordinates) - - def _change_reference_datetime(self, coord): - """Change the units of a reference date-time value. - - .. versionadded:: 3.0.0 - - :Parameters: - - coord: Coordinate instance - - :Returns: - - The coordinate construct with changed units. - - """ - reference_datetime = self.write_vars.get("reference_datetime") - if not reference_datetime or not coord.Units.isreftime: - return coord - - coord2 = coord.copy() - try: - coord2.reference_datetime = reference_datetime - except ValueError: - raise ValueError( - "Can't override coordinate reference date-time " - f"{coord.reference_datetime!r} with {reference_datetime!r}" - ) - else: - return coord2 diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 2e7d10a7da..772ddcf986 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -24,9 +24,9 @@ # Create an implementation container and initialise a read object for # each format # -------------------------------------------------------------------- -#_implementation = implementation() -#netcdf = NetCDFRead(_implementation) -#UM = UMRead(_implementation) +# _implementation = implementation() +# netcdf = NetCDFRead(_implementation) +# UM = UMRead(_implementation) logger = logging.getLogger(__name__) From 88ab94ad8161560f7159700f39ce79d9251fe363 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 19 Nov 2024 23:23:21 +0000 Subject: [PATCH 16/51] dev --- cf/docstring/docstring.py | 47 --------------------------------------- cf/read_write/read.py | 9 +++++++- 2 files changed, 8 insertions(+), 48 deletions(-) diff --git a/cf/docstring/docstring.py b/cf/docstring/docstring.py index a11e0129b9..2f9b78f4a8 100644 --- a/cf/docstring/docstring.py +++ b/cf/docstring/docstring.py @@ -546,38 +546,6 @@ # bounds "{{bounds: `bool`, optional}}": """bounds: `bool`, optional If True (the default) then alter any bounds.""", - # cull - "{{cull_graph: `bool`, optional}}": """cull_graph: `bool`, optional - If True then unnecessary tasks are removed (culled) - from each array's dask graph before - concatenation. This process can have a considerable - overhead but can sometimes improve the overall - performance of a workflow. If False (the default) then - dask graphs are not culled. See - `dask.optimization.cull` for details.""", - # relaxed_units - "{{relaxed_units: `bool`, optional}}": """relaxed_units: `bool`, optional - If True then allow the concatenation of data with - invalid but otherwise equal units. By default, if any - data array has invalid units then the concatenation - will fail. A `Units` object is considered to be - invalid if its `!isvalid` attribute is `False`.""", - # cfa substitutions - "{{cfa substitutions: `dict`}}": """substitutions: `dict` - The substitution definitions in a dictionary whose - key/value pairs are the file name parts to be - substituted and their corresponding substitution text. - - Each substitution definition may be specified with or - without the ``${...}`` syntax. For instance, the - following are equivalent: ``{'base': 'sub'}``, - ``{'${base}': 'sub'}``.""", - # cfa base - "{{cfa base: `str`}}": """base: `str` - The substitution definition to be removed. May be - specified with or without the ``${...}`` syntax. For - instance, the following are equivalent: ``'base'`` and - ``'${base}'``.""", # regular args "{{regular args}}": """A sequence of three numeric values. The first two values in the sequence represent the coordinate range (see the bounds @@ -707,21 +675,6 @@ coordinates check will be carried out, however, if the *check_coordinates* parameter is True.""", - # Returns cfa_file_substitutions - "{{Returns cfa_file_substitutions}}": """The CFA-netCDF file name substitutions in a dictionary - whose key/value pairs are the file name parts to be - substituted and their corresponding substitution - text.""", - # Returns cfa_clear_file_substitutions - "{{Returns cfa_clear_file_substitutions}}": """The removed CFA-netCDF file name substitutions in a - dictionary whose key/value pairs are the file name - parts to be substituted and their corresponding - substitution text.""", - # Returns cfa_clear_file_substitutions - "{{Returns cfa_del_file_substitution}}": """ - The removed CFA-netCDF file name substitution. If the - substitution was not defined then an empty dictionary - is returned.""", # subspace valid modes Field "{{subspace valid modes Field}}": """Valid modes are: diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 772ddcf986..edd3d16c7a 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -410,6 +410,10 @@ class read(cfdm.read): .. versionadded:: NEXTVERSION + {{read to_memory: (sequence of) `str`, optional}} + + .. versionadded:: NEXTVERSION + umversion: deprecated at version 3.0.0 Use the *um* parameter instead. @@ -511,6 +515,7 @@ def __new__( domain=False, cfa=None, cfa_write=None, + to_memory=None, netcdf_backend=None, storage_options=None, cache=True, @@ -734,6 +739,7 @@ def __new__( domain=domain, cfa=cfa, cfa_write=cfa_write, + to_memory=to_memory, netcdf_backend=netcdf_backend, storage_options=storage_options, cache=cache, @@ -855,6 +861,7 @@ def _read_a_file( domain=False, cfa=None, cfa_write=None, + to_memory=None, netcdf_backend=None, storage_options=None, cache=True, @@ -990,7 +997,7 @@ def _read_a_file( store_hdf5_chunks=store_hdf5_chunks, cache=cache, cfa=cfa, - cfa_write=cfa_write, + cfa_write=cfa_write,to_memory=to_memory ) elif ftype == "UM" and extra_read_vars["fmt"] in (None, "UM"): if domain: From 6f5d08f2c2f1006bf30a5649dd73624fd60aefc9 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 20 Nov 2024 09:46:03 +0000 Subject: [PATCH 17/51] dev --- cf/mixin/propertiesdata.py | 82 ++++++++++++++-------------- cf/mixin/propertiesdatabounds.py | 92 ++++++++++++++++---------------- 2 files changed, 87 insertions(+), 87 deletions(-) diff --git a/cf/mixin/propertiesdata.py b/cf/mixin/propertiesdata.py index 9144c6d70a..612d3354e7 100644 --- a/cf/mixin/propertiesdata.py +++ b/cf/mixin/propertiesdata.py @@ -1971,47 +1971,47 @@ def period(self, *value, **config): return old - @_inplace_enabled(default=False) - def persist(self, inplace=False): - """Persist the underlying dask array into memory. - - This turns an underlying lazy dask array into a equivalent - chunked dask array, but now with the results fully computed. - - `persist` is particularly useful when using distributed - systems, because the results will be kept in distributed - memory, rather than returned to the local process. - - **Performance** - - `persist` causes all delayed operations to be computed. - - .. versionadded:: 3.14.0 - - .. seealso:: `array`, `datetime_array`, - `dask.array.Array.persist` - - :Parameters: - - {{inplace: `bool`, optional}} - - :Returns: - - `{{class}}` or `None` - The construct with persisted data. If the operation - was in-place then `None` is returned. - - **Examples** - - >>> g = f.persist() - - """ - return self._apply_data_oper( - _inplace_enabled_define_and_cleanup(self), - "persist", - inplace=inplace, - delete_props=False, - ) +# @_inplace_enabled(default=False) +# def persist(self, inplace=False): +# """Persist the underlying dask array into memory. +# +# This turns an underlying lazy dask array into a equivalent +# chunked dask array, but now with the results fully computed. +# +# `persist` is particularly useful when using distributed +# systems, because the results will be kept in distributed +# memory, rather than returned to the local process. +# +# **Performance** +# +# `persist` causes all delayed operations to be computed. +# +# .. versionadded:: 3.14.0 +# +# .. seealso:: `array`, `datetime_array`, +# `dask.array.Array.persist` +# +# :Parameters: +# +# {{inplace: `bool`, optional}} +# +# :Returns: +# +# `{{class}}` or `None` +# The construct with persisted data. If the operation +# was in-place then `None` is returned. +# +# **Examples** +# +# >>> g = f.persist() +# +# """ +# return self._apply_data_oper( +# _inplace_enabled_define_and_cleanup(self), +# "persist", +# inplace=inplace, +# delete_props=False, +# ) def range(self): """The absolute difference between the maximum and minimum of diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index 124c1b5239..901f362059 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -3774,52 +3774,52 @@ def period(self, *value, **config): return bounds.period(*value, **config) - @_inplace_enabled(default=False) - def persist(self, bounds=True, inplace=False): - """Persist the underlying dask array into memory. - - This turns an underlying lazy dask array into a equivalent - chunked dask array, but now with the results fully computed. - - `persist` is particularly useful when using distributed - systems, because the results will be kept in distributed - memory, rather than returned to the local process. - - **Performance** - - `persist` causes all delayed operations to be computed. - - .. versionadded:: 3.14.0 - - .. seealso:: `array`, `datetime_array`, - `dask.array.Array.persist` - - :Parameters: - - bounds: `bool`, optional - If False then do not persist any bounds data. By - default any bound data are also persisted. - - {{inplace: `bool`, optional}} - - :Returns: - - `{{class}}` or `None` - The construct with persisted data. If the operation - was in-place then `None` is returned. - - **Examples** - - >>> g = f.persist() - - """ - return self._apply_superclass_data_oper( - _inplace_enabled_define_and_cleanup(self), - "persist", - bounds=bounds, - interior_ring=True, - inplace=inplace, - ) +# @_inplace_enabled(default=False) +# def persist(self, bounds=True, inplace=False): +# """Persist the underlying dask array into memory. +# +# This turns an underlying lazy dask array into a equivalent +# chunked dask array, but now with the results fully computed. +# +# `persist` is particularly useful when using distributed +# systems, because the results will be kept in distributed +# memory, rather than returned to the local process. +# +# **Performance** +# +# `persist` causes all delayed operations to be computed. +# +# .. versionadded:: 3.14.0 +# +# .. seealso:: `array`, `datetime_array`, +# `dask.array.Array.persist` +# +# :Parameters: +# +# bounds: `bool`, optional +# If False then do not persist any bounds data. By +# default any bound data are also persisted. +# +# {{inplace: `bool`, optional}} +# +# :Returns: +# +# `{{class}}` or `None` +# The construct with persisted data. If the operation +# was in-place then `None` is returned. +# +# **Examples** +# +# >>> g = f.persist() +# +# """ +# return self._apply_superclass_data_oper( +# _inplace_enabled_define_and_cleanup(self), +# "persist", +# bounds=bounds, +# interior_ring=True, +# inplace=inplace, +# ) @_inplace_enabled(default=False) def rechunk( From ed664b95845339880231e9e4b1e732ca7d197cbf Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 20 Nov 2024 16:16:31 +0000 Subject: [PATCH 18/51] dev --- cf/functions.py | 1 - cf/mixin/propertiesdata.py | 44 ------------------------------ cf/mixin/propertiesdatabounds.py | 47 -------------------------------- cf/read_write/read.py | 7 +++-- 4 files changed, 4 insertions(+), 95 deletions(-) diff --git a/cf/functions.py b/cf/functions.py index 722f75d477..15681d169f 100644 --- a/cf/functions.py +++ b/cf/functions.py @@ -13,7 +13,6 @@ from math import isnan from os import mkdir from os.path import abspath as _os_path_abspath -from os.path import dirname as _os_path_dirname from os.path import expanduser as _os_path_expanduser from os.path import expandvars as _os_path_expandvars from os.path import join as _os_path_join diff --git a/cf/mixin/propertiesdata.py b/cf/mixin/propertiesdata.py index 612d3354e7..1512d6be49 100644 --- a/cf/mixin/propertiesdata.py +++ b/cf/mixin/propertiesdata.py @@ -1,6 +1,5 @@ import logging from itertools import chain -from os import sep import numpy as np from cfdm import is_log_level_info @@ -17,7 +16,6 @@ _DEPRECATION_ERROR_ATTRIBUTE, _DEPRECATION_ERROR_KWARGS, _DEPRECATION_ERROR_METHOD, - abspath, default_netCDF_fillvals, ) from ..functions import equivalent as cf_equivalent @@ -1971,48 +1969,6 @@ def period(self, *value, **config): return old -# @_inplace_enabled(default=False) -# def persist(self, inplace=False): -# """Persist the underlying dask array into memory. -# -# This turns an underlying lazy dask array into a equivalent -# chunked dask array, but now with the results fully computed. -# -# `persist` is particularly useful when using distributed -# systems, because the results will be kept in distributed -# memory, rather than returned to the local process. -# -# **Performance** -# -# `persist` causes all delayed operations to be computed. -# -# .. versionadded:: 3.14.0 -# -# .. seealso:: `array`, `datetime_array`, -# `dask.array.Array.persist` -# -# :Parameters: -# -# {{inplace: `bool`, optional}} -# -# :Returns: -# -# `{{class}}` or `None` -# The construct with persisted data. If the operation -# was in-place then `None` is returned. -# -# **Examples** -# -# >>> g = f.persist() -# -# """ -# return self._apply_data_oper( -# _inplace_enabled_define_and_cleanup(self), -# "persist", -# inplace=inplace, -# delete_props=False, -# ) - def range(self): """The absolute difference between the maximum and minimum of the data array. diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index 901f362059..fe3d5f830b 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -3774,53 +3774,6 @@ def period(self, *value, **config): return bounds.period(*value, **config) -# @_inplace_enabled(default=False) -# def persist(self, bounds=True, inplace=False): -# """Persist the underlying dask array into memory. -# -# This turns an underlying lazy dask array into a equivalent -# chunked dask array, but now with the results fully computed. -# -# `persist` is particularly useful when using distributed -# systems, because the results will be kept in distributed -# memory, rather than returned to the local process. -# -# **Performance** -# -# `persist` causes all delayed operations to be computed. -# -# .. versionadded:: 3.14.0 -# -# .. seealso:: `array`, `datetime_array`, -# `dask.array.Array.persist` -# -# :Parameters: -# -# bounds: `bool`, optional -# If False then do not persist any bounds data. By -# default any bound data are also persisted. -# -# {{inplace: `bool`, optional}} -# -# :Returns: -# -# `{{class}}` or `None` -# The construct with persisted data. If the operation -# was in-place then `None` is returned. -# -# **Examples** -# -# >>> g = f.persist() -# -# """ -# return self._apply_superclass_data_oper( -# _inplace_enabled_define_and_cleanup(self), -# "persist", -# bounds=bounds, -# interior_ring=True, -# inplace=inplace, -# ) - @_inplace_enabled(default=False) def rechunk( self, diff --git a/cf/read_write/read.py b/cf/read_write/read.py index edd3d16c7a..20f607b677 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -515,7 +515,7 @@ def __new__( domain=False, cfa=None, cfa_write=None, - to_memory=None, + to_memory=None, netcdf_backend=None, storage_options=None, cache=True, @@ -861,7 +861,7 @@ def _read_a_file( domain=False, cfa=None, cfa_write=None, - to_memory=None, + to_memory=None, netcdf_backend=None, storage_options=None, cache=True, @@ -997,7 +997,8 @@ def _read_a_file( store_hdf5_chunks=store_hdf5_chunks, cache=cache, cfa=cfa, - cfa_write=cfa_write,to_memory=to_memory + cfa_write=cfa_write, + to_memory=to_memory, ) elif ftype == "UM" and extra_read_vars["fmt"] in (None, "UM"): if domain: From 73f03b8903861c6c5d884ea946eb46d160a4867e Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 21 Nov 2024 17:06:55 +0000 Subject: [PATCH 19/51] dev --- cf/test/test_docstring.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/cf/test/test_docstring.py b/cf/test/test_docstring.py index 1f467c36bc..5e8c11e27d 100644 --- a/cf/test/test_docstring.py +++ b/cf/test/test_docstring.py @@ -87,21 +87,14 @@ def test_docstring(self): if name.startswith("__") and not inspect.isfunction(f): continue - self.assertIsNotNone( - f.__doc__, - f"\nCLASS: {klass}" - f"\nMETHOD NAME: {name}" - f"\nMETHOD: {f}" - f"\n__doc__: {f.__doc__}", - ) - - self.assertNotIn( - "{{", - f.__doc__, - f"\nCLASS: {klass}" - f"\nMETHOD NAME: {name}" - f"\nMETHOD: {f}", - ) + if f.__doc__ is not None: + self.assertNotIn( + "{{", + f.__doc__, + f"\nCLASS: {klass}" + f"\nMETHOD NAME: {name}" + f"\nMETHOD: {f}", + ) def test_docstring_package(self): string = f">>> f = {self.package}." From 5c0c98eef2c4546b19a3467baa98e715c3e280c9 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 22 Nov 2024 11:08:03 +0000 Subject: [PATCH 20/51] dev --- cf/test/test_docstring.py | 2 +- requirements.txt | 2 +- setup.py | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cf/test/test_docstring.py b/cf/test/test_docstring.py index 5e8c11e27d..53402e6f83 100644 --- a/cf/test/test_docstring.py +++ b/cf/test/test_docstring.py @@ -87,7 +87,7 @@ def test_docstring(self): if name.startswith("__") and not inspect.isfunction(f): continue - if f.__doc__ is not None: + if f.__doc__ is not None: self.assertNotIn( "{{", f.__doc__, diff --git a/requirements.txt b/requirements.txt index 8b01daddca..81552a3566 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ netCDF4>=1.6.5 cftime>=1.6.2 numpy>=1.22 -cfdm>=1.11.2.0, <1.11.3.0 +cfdm @ git+https://github.com/davidhassell/cfdm@cfa-for-cfs psutil>=0.6.0 cfunits>=3.3.7 dask>=2024.4.0 diff --git a/setup.py b/setup.py index 326252b4af..fe6b94d253 100755 --- a/setup.py +++ b/setup.py @@ -310,7 +310,6 @@ def compile(): "cf.data.array.mixin", "cf.data.collapse", "cf.data.fragment", - "cf.data.fragment.mixin", "cf.data.mixin", "cf.docstring", "cf.read_write", From 03444d0b4313b2264d1b8d59ea7f8c56dfec0127 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 22 Nov 2024 11:08:44 +0000 Subject: [PATCH 21/51] dev --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index fe6b94d253..370974b624 100755 --- a/setup.py +++ b/setup.py @@ -314,7 +314,6 @@ def compile(): "cf.docstring", "cf.read_write", "cf.read_write.um", - "cf.read_write.netcdf", "cf.regrid", "cf.umread_lib", "cf.test", From 60e6ea0ec6ca2da41548cddf0a0bee9cd3ca3eb1 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 22 Nov 2024 11:17:19 +0000 Subject: [PATCH 22/51] dev --- setup.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/setup.py b/setup.py index 370974b624..2c1432587b 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess from distutils.command.build import build -from setuptools import setup +from setuptools import setup, find_packages def find_package_data_files(directory): @@ -300,24 +300,24 @@ def compile(): "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ], - packages=[ - "cf", - "cf.mixin", - "cf.mixin2", - "cf.data", - "cf.data.array", - "cf.data.array.abstract", - "cf.data.array.mixin", - "cf.data.collapse", - "cf.data.fragment", - "cf.data.mixin", - "cf.docstring", - "cf.read_write", - "cf.read_write.um", - "cf.regrid", - "cf.umread_lib", - "cf.test", - ], + packages=find_packages(), +# "cf", +# "cf.mixin", +# "cf.mixin2", +# "cf.data", +# "cf.data.array", +# "cf.data.array.abstract", +# "cf.data.array.mixin", +# "cf.data.collapse", +# "cf.data.fragment", +# "cf.data.mixin", +# "cf.docstring", +# "cf.read_write", +# "cf.read_write.um", +# "cf.regrid", +# "cf.umread_lib", +# "cf.test", +# ], package_data={"cf": package_data}, scripts=["scripts/cfa"], python_requires=">=3.8", From b7e0937031aa5b6ed37cfe96f2ac53cfe2525ae3 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 22 Nov 2024 11:36:34 +0000 Subject: [PATCH 23/51] dev --- setup.py | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/setup.py b/setup.py index 2c1432587b..af397e6c93 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess from distutils.command.build import build -from setuptools import setup, find_packages +from setuptools import find_packages, setup def find_package_data_files(directory): @@ -301,23 +301,6 @@ def compile(): "Programming Language :: Python :: 3.12", ], packages=find_packages(), -# "cf", -# "cf.mixin", -# "cf.mixin2", -# "cf.data", -# "cf.data.array", -# "cf.data.array.abstract", -# "cf.data.array.mixin", -# "cf.data.collapse", -# "cf.data.fragment", -# "cf.data.mixin", -# "cf.docstring", -# "cf.read_write", -# "cf.read_write.um", -# "cf.regrid", -# "cf.umread_lib", -# "cf.test", -# ], package_data={"cf": package_data}, scripts=["scripts/cfa"], python_requires=">=3.8", From 4a26930f08797472f1dcf1e7590dce1b6c30d79f Mon Sep 17 00:00:00 2001 From: David Hassell Date: Sat, 23 Nov 2024 12:02:40 +0000 Subject: [PATCH 24/51] dev --- cf/mixin/fielddomain.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cf/mixin/fielddomain.py b/cf/mixin/fielddomain.py index 1e9e1f67e1..9d253aa09c 100644 --- a/cf/mixin/fielddomain.py +++ b/cf/mixin/fielddomain.py @@ -387,6 +387,7 @@ def _indices(self, config, data_axes, ancillary_mask, kwargs): if debug: logger.debug( + f" constructs = {constructs!r}\n" f" item_axes = {item_axes!r}\n" f" keys = {keys!r}" ) # pragma: no cover @@ -405,6 +406,7 @@ def _indices(self, config, data_axes, ancillary_mask, kwargs): if debug: logger.debug( f" {n_items} 1-d constructs: {constructs!r}\n" + f" item = {item!r}\n" f" axis = {axis!r}\n" f" value = {value!r}\n" f" identity = {identity!r}" @@ -539,8 +541,8 @@ def _indices(self, config, data_axes, ancillary_mask, kwargs): index = normalize_index(index, (size,))[0] else: raise ValueError( - "Must specify a domain axis construct or a " - "construct with data for which to create indices" + "Could not find a unique construct with identity " + f"{identity!r} from which to infer the indices." ) if debug: From 847d94695d55a80ad90600d27f1cbbbfdb3dd772 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 25 Nov 2024 11:49:43 +0000 Subject: [PATCH 25/51] dev --- cf/data/data.py | 5 +- cf/test/create_test_files.py | 33 +- cf/test/test_CFA.py | 1445 +++++++++------------------------- cf/test/test_Field.py | 22 - cf/test/test_NetCDF4Array.py | 171 ---- 5 files changed, 376 insertions(+), 1300 deletions(-) delete mode 100644 cf/test/test_NetCDF4Array.py diff --git a/cf/data/data.py b/cf/data/data.py index 734e65cbf8..15d11cf9b1 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -6682,7 +6682,10 @@ def reshape(self, *shape, merge_chunks=True, limit=None, inplace=False): super(Data, d).reshape( *shape, merge_chunks=merge_chunks, limit=limit, inplace=True ) - # TODODASK: reshape: Need to clear cyclic axes, as we can't help but lose them in this operation + + # Clear cyclic axes, as we can't help but lose them in this + # operation + d._cyclic = _empty_set return d @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") diff --git a/cf/test/create_test_files.py b/cf/test/create_test_files.py index 2321ce0a95..9e8e333236 100644 --- a/cf/test/create_test_files.py +++ b/cf/test/create_test_files.py @@ -7,9 +7,10 @@ faulthandler.enable() # to debug seg faults and timeouts -import cfdm import netCDF4 +import cfdm + VN = cfdm.CF() # Load large arrays @@ -743,7 +744,7 @@ def _jj(shape, list_values): array[index] = i return array - n = netCDF4.Dataset(filename, "w", format="NETCDF3_CLASSIC") + n = netCDF4.Dataset(filename, "w") n.Conventions = f"CF-{VN}" @@ -855,7 +856,13 @@ def _jj(shape, list_values): temp2.coordinates = "aux7 aux8 aux9" temp2[...] = np.arange(2 * 3 * 9 * 6).reshape(2, 3, 9, 6) - temp3 = n.createVariable("temp3", "f8", ("time", "list3", "p")) + temp3 = n.createVariable( + "temp3", + "f8", + ("time", "list3", "p"), + complevel=1, + chunksizes=(2, 6, 4), + ) temp3.long_name = "temp3" temp3.units = "K" temp3.coordinates = "aux0 aux1 aux2 aux3 aux4 aux5 aux6 aux7 aux8 aux9" @@ -2247,12 +2254,12 @@ def _make_aggregation_value(filename): temperature.cell_methods = "time: mean" temperature.ancillary_variables = "uid" temperature.aggregated_dimensions = "time level latitude longitude" - temperature.aggregated_data = "location: fragment_location address: fragment_address shape: fragment_shape" + temperature.aggregated_data = "location: fragment_location identifier: fragment_identifier map: fragment_map" uid = n.createVariable("uid", str, ()) uid.long_name = "Fragment dataset unique identifiers" uid.aggregated_dimensions = "time" - uid.aggregated_data = "value: fragment_value_uid shape: fragment_shape_uid" + uid.aggregated_data = "value: fragment_value_uid map: fragment_map_uid" time = n.createVariable("time", "f4", ("time",)) time.standard_name = "time" @@ -2280,12 +2287,12 @@ def _make_aggregation_value(filename): fragment_location[0, 0, 0, 0] = "January-March.nc" fragment_location[1, 0, 0, 0] = "April-December.nc" - fragment_address = n.createVariable("fragment_address", str, ()) - fragment_address[...] = "temperature" + fragment_identifier = n.createVariable("fragment_identifier", str, ()) + fragment_identifier[...] = "temperature" - fragment_shape = n.createVariable("fragment_shape", "i4", ("j", "i")) - fragment_shape[...] = [[3, 9], [1, -1], [73, -1], [144, -1]] - fragment_shape[1:, 1] = np.ma.masked + fragment_map = n.createVariable("fragment_map", "i4", ("j", "i")) + fragment_map[...] = [[3, 9], [1, -1], [73, -1], [144, -1]] + fragment_map[1:, 1] = np.ma.masked fragment_value_uid = n.createVariable( "fragment_value_uid", str, ("f_time",) @@ -2293,10 +2300,10 @@ def _make_aggregation_value(filename): fragment_value_uid[0] = "04b9-7eb5-4046-97b-0bf8" fragment_value_uid[1] = "05ee0-a183-43b3-a67-1eca" - fragment_shape_uid = n.createVariable( - "fragment_shape_uid", "i4", ("j_uid", "i") + fragment_map_uid = n.createVariable( + "fragment_map_uid", "i4", ("j_uid", "i") ) - fragment_shape_uid[...] = [3, 9] + fragment_map_uid[...] = [3, 9] n.close() return filename diff --git a/cf/test/test_CFA.py b/cf/test/test_CFA.py index bf3beccb61..ebbfdabd28 100644 --- a/cf/test/test_CFA.py +++ b/cf/test/test_CFA.py @@ -1,1093 +1,352 @@ -# import atexit -# import datetime -# import faulthandler -# import os -# import tempfile -# import unittest -# from pathlib import PurePath -# -# import netCDF4 -# -# faulthandler.enable() # to debug seg faults and timeouts -# -# import cf -# -# n_tmpfiles = 5 -# tmpfiles = [ -# tempfile.mkstemp("_test_CFA.nc", dir=os.getcwd())[1] -# for i in range(n_tmpfiles) -# ] -# ( -# tmpfile1, -# tmpfile2, -# nc_file, -# cfa_file, -# cfa_file2, -# ) = tmpfiles -# -# -# def _remove_tmpfiles(): -# """Try to remove defined temporary files by deleting their paths.""" -# for f in tmpfiles: -# try: -# os.remove(f) -# except OSError: -# pass -# -# -# atexit.register(_remove_tmpfiles) -# -# -# class CFATest(unittest.TestCase): -# """Unit test for aggregation variables.""" -# -# netcdf3_fmts = [ -# "NETCDF3_CLASSIC", -# "NETCDF3_64BIT", -# "NETCDF3_64BIT_OFFSET", -# "NETCDF3_64BIT_DATA", -# ] -# netcdf4_fmts = ["NETCDF4", "NETCDF4_CLASSIC"] -# netcdf_fmts = netcdf3_fmts + netcdf4_fmts -# -# aggregation_value = os.path.join( -# os.path.dirname(os.path.abspath(__file__)), "aggregation_value.nc" -# ) -# -# def test_CFA_fmt(self): -# """Test the cf.read 'fmt' keyword with cfa.""" -# f = cf.example_field(0) -# cf.write(f, tmpfile1) -# f = cf.read(tmpfile1)[0] -# -# for fmt in self.netcdf_fmts: -# cf.write(f, cfa_file, fmt=fmt, cfa="field") -# g = cf.read(cfa_file) -# self.assertEqual(len(g), 1) -# self.assertTrue(f.equals(g[0])) -# -# def test_CFA_multiple_fragments(self): -# """Test aggregation variables with more than one fragment.""" -# f = cf.example_field(0) -# -# cf.write(f[:2], tmpfile1) -# cf.write(f[2:], tmpfile2) -# -# a = cf.read(tmpfile1)[0] -# b = cf.read(tmpfile2)[0] -# a = cf.Field.concatenate([a, b], axis=0) -# -# cf.write(a, nc_file) -# cf.write(a, cfa_file, cfa="field") -# -# n = cf.read(nc_file) -# c = cf.read(cfa_file) -# self.assertEqual(len(n), 1) -# self.assertEqual(len(c), 1) -# self.assertTrue(c[0].equals(f)) -# self.assertTrue(n[0].equals(c[0])) -# -# def test_CFA_strict(self): -# """Test 'strict' option to the cf.write 'cfa' keyword.""" -# f = cf.example_field(0) -# -# # By default, can't write in-memory arrays as aggregation -# # variables -# with self.assertRaises(ValueError): -# cf.write(f, cfa_file, cfa="field") -# -# # The previous line should have deleted the output file -# self.assertFalse(os.path.exists(cfa_file)) -# -# cf.write(f, nc_file, cfa={"constructs": "field", "strict": False}) -# g = cf.read(nc_file) -# self.assertEqual(len(g), 1) -# self.assertTrue(g[0].equals(f)) -# -# cf.write(g, cfa_file, cfa={"constructs": "field", "strict": True}) -# g = cf.read(cfa_file) -# self.assertEqual(len(g), 1) -# self.assertTrue(g[0].equals(f)) -# -# def test_CFA_substitutions_0(self): -# """Test aggregation substitution URI substitutions (0).""" -# f = cf.example_field(0) -# cf.write(f, tmpfile1) -# f = cf.read(tmpfile1)[0] -# -# cwd = os.getcwd() -# f.data.nc_update_aggregation_substitutions({"base": cwd}) -# -# cf.write( -# f, -# cfa_file, -# cfa={"constructs": "field", "uri": "absolute"}, -# ) -# -# nc = netCDF4.Dataset(cfa_file, "r") -# cfa_location = nc.variables["cfa_location"] -# self.assertEqual( -# cfa_location.getncattr("substitutions"), -# f"${{base}}: {cwd}", -# ) -# self.assertEqual( -# cfa_location[...], f"${{base}}/{os.path.basename(tmpfile1)}" -# ) -# nc.close() -# -# g = cf.read(cfa_file) -# self.assertEqual(len(g), 1) -# self.assertTrue(f.equals(g[0])) -# -# def test_CFA_substitutions_1(self): -# """Test aggregation substitution URI substitutions (1).""" -# f = cf.example_field(0) -# cf.write(f, tmpfile1) -# f = cf.read(tmpfile1)[0] -# -# cwd = os.getcwd() -# for base in ("base", "${base}"): -# cf.write( -# f, -# cfa_file, -# cfa={ -# "constructs": "field", -# "uri": "absolute", -# "substitutions": {base: cwd}, -# }, -# ) -# -# nc = netCDF4.Dataset(cfa_file, "r") -# cfa_location = nc.variables["cfa_location"] -# self.assertEqual( -# cfa_location.getncattr("substitutions"), -# f"${{base}}: {cwd}", -# ) -# self.assertEqual( -# cfa_location[...], -# f"${{base}}/{os.path.basename(tmpfile1)}", -# ) -# nc.close() -# -# g = cf.read(cfa_file) -# self.assertEqual(len(g), 1) -# self.assertTrue(f.equals(g[0])) -# -# def test_CFA_substitutions_2(self): -# """Test aggregation substitution URI substitutions (2).""" -# # TODOCFA: delete -# tmpfile1 = "tmpfile1.nc" -# -# f = cf.example_field(0) -# -# cf.write(f, tmpfile1) -# f = cf.read(tmpfile1)[0] -# -# cwd = os.getcwd() -# basename = os.path.basename(tmpfile1) -# -# # TODOCFA: delete -# cfa_file = "cfa_file.nc" -# -# f.data.nc_clear_aggregation_substitutions() -# f.data.nc_update_aggregation_substitutions({"base": f"{cwd}"}) -# cf.write( -# f, -# cfa_file, -# cfa={ -# "constructs": "field", -# "uri": "absolute", -# "substitutions": {"base2": "/bad/location"}, -# }, -# ) -# -# nc = netCDF4.Dataset(cfa_file, "r") -# cfa_location = nc.variables["cfa_location"] -# self.assertEqual( -# cfa_location.getncattr("substitutions"), -# f"${{base2}}: /bad/location ${{base}}: {cwd}", -# ) -# self.assertEqual(cfa_location[...], f"file://${{base}}/{basename}") -# nc.close() -# -# g = cf.read(cfa_file) -# self.assertEqual(len(g), 1) -# self.assertTrue(f.equals(g[0])) -# -# f.data.nc_clear_aggregation_substitutions() -# f.data.nc_update_aggregation_substitutions({"base": "/bad/location"}) -# -# cf.write( -# f, -# cfa_file, -# cfa={ -# "constructs": "field", -# "uri": "absolute", -# "substitutions": {"base": cwd}, -# }, -# ) -# -# nc = netCDF4.Dataset(cfa_file, "r") -# cfa_location = nc.variables["cfa_location"] -# self.assertEqual( -# cfa_location.getncattr("substitutions"), -# f"${{base}}: {cwd}", -# ) -# self.assertEqual(cfa_location[...], f"file://${{base}}/{basename}") -# nc.close() -# -# g = cf.read(cfa_file) -# self.assertEqual(len(g), 1) -# self.assertTrue(f.equals(g[0])) -# -# f.data.nc_clear_aggregation_substitutions() -# f.data.nc_update_aggregation_substitutions({"base2": "/bad/location"}) -# -# cf.write( -# f, -# cfa_file, -# cfa={ -# "constructs": "field", -# "uri": "absolute", -# "substitutions": {"base": cwd}, -# }, -# ) -# -# nc = netCDF4.Dataset(cfa_file, "r") -# cfa_location = nc.variables["cfa_location"] -# self.assertEqual( -# cfa_location.getncattr("substitutions"), -# f"${{base2}}: /bad/location ${{base}}: {cwd}", -# ) -# self.assertEqual( -# cfa_location[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" -# ) -# nc.close() -# -# g = cf.read(cfa_file) -# self.assertEqual(len(g), 1) -# g = g[0] -# self.assertTrue(f.equals(g)) -# -# self.assertEqual( -# g.data.get_filenames(normalise=False), -# set((f"file://${{base}}/{os.path.basename(tmpfile1)}",)), -# ) -# g.data.nc_update_aggregation_substitutions({"base": "/new/location"}) -# self.assertEqual( -# g.data.nc_aggregation_substitutions(), -# {"${base2}": "/bad/location", "${base}": "/new/location"}, -# ) -# self.assertEqual( -# g.data.get_filenames(normalise=False), -# set((f"file://${{base}}/{os.path.basename(tmpfile1)}",)), -# ) -# -# # TODOCFA: delete -# cfa_file2 = "cfa_file2.nc" -# cf.write( -# g, -# cfa_file2, -# cfa={ -# "constructs": "field", -# "uri": "absolute", -# }, -# ) -# nc = netCDF4.Dataset(cfa_file2, "r") -# cfa_location = nc.variables["cfa_location"] -# self.assertEqual( -# cfa_location.getncattr("substitutions"), -# "${base2}: /bad/location ${base}: /new/location", -# ) -# self.assertEqual( -# cfa_location[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" -# ) -# nc.close() -# -# def test_CFA_substitutions_3(self): -# """Test aggregation substitution URI substitutions (2).""" -# f = cf.example_field(0) -# cf.write(f, tmpfile1) -# f = cf.read(tmpfile1)[0] -# -# cwd = os.getcwd() -# basename = os.path.basename(tmpfile1) -# -# f.data.nc_clear_aggregation_substitutions() -# f.data.nc_update_aggregation_substitutions({"base": f"{cwd}/"}) -# cf.write( -# f, -# cfa_file, -# cfa={ -# "constructs": "field", -# "uri": "absolute", -# }, -# ) -# -# nc = netCDF4.Dataset(cfa_file, "r") -# cfa_location = nc.variables["cfa_location"] -# self.assertEqual( -# cfa_location.getncattr("substitutions"), f"${{base}}: {cwd}/" -# ) -# self.assertEqual(cfa_location[...], f"${{base}}{basename}") -# nc.close() -# -# # TODOCFA: delete -# cfa_file2 = "cfa_file2.nc" -# -# g = cf.read(cfa_file)[0] -# self.assertTrue(f.equals(g)) -# cf.write( -# g, -# cfa_file2, -# cfa={ -# "constructs": "field", -# "uri": "absolute", -# }, -# ) -# -# def test_CFA_substitutions_4(self): -# """Test aggregation substitution URI substitutions (2).""" -# f = cf.example_field(0) -# -# # TODOCFA: delete -# tmpfile1 = "tmpfile1.nc" -# -# cf.write(f, tmpfile1) -# f = cf.read(tmpfile1)[0] -# -# cwd = os.getcwd() -# basename = os.path.basename(tmpfile1) -# -# # TODOCFA: delete -# cfa_file = "cfa_file.nc" -# -# replacement = f"{cwd}/" -# f.data.nc_clear_aggregation_substitutions() -# f.data.nc_update_aggregation_substitutions({"base": replacement}) -# cf.write( -# f, -# cfa_file, -# cfa={ -# "constructs": "field", -# "uri": "relative", -# }, -# ) -# -# nc = netCDF4.Dataset(cfa_file, "r") -# cfa_location = nc.variables["cfa_location"] -# self.assertEqual( -# cfa_location.getncattr("substitutions"), -# f"${{base}}: {replacement}", -# ) -# self.assertEqual(cfa_location[...], basename) -# nc.close() -# -# cf.write( -# f, -# cfa_file, -# cfa={"constructs": "field", "uri": "absolute"}, -# ) -# nc = netCDF4.Dataset(cfa_file, "r") -# cfa_location = nc.variables["cfa_location"] -# self.assertEqual( -# cfa_location.getncattr("substitutions"), -# f"${{base}}: {replacement}", -# ) -# self.assertEqual(cfa_location[...], f"file://${{base}}{basename}") -# nc.close() -# -# def test_CFA_uri(self): -# """Test aggregation 'uri' option to cf.write.""" -# f = cf.example_field(0) -# cf.write(f, tmpfile1) -# f = cf.read(tmpfile1)[0] -# -# for uri, filename in zip( -# ("absolute", "relative"), -# ( -# PurePath(os.path.abspath(tmpfile1)).as_uri(), -# os.path.basename(tmpfile1), -# ), -# ): -# cf.write( -# f, -# cfa_file, -# cfa={"constructs": "field", "uri": uri}, -# ) -# -# nc = netCDF4.Dataset(cfa_file, "r") -# cfa_location = nc.variables["cfa_location"] -# self.assertEqual(cfa_location[...], filename) -# nc.close() -# -# g = cf.read(cfa_file) -# self.assertEqual(len(g), 1) -# self.assertTrue(f.equals(g[0])) -# -# def test_CFA_constructs(self): -# """Test aggregation 'constructs' option to cf.write.""" -# f = cf.example_field(1) -# f.del_construct("time") -# f.del_construct("long_name=Grid latitude name") -# cf.write(f, tmpfile1) -# f = cf.read(tmpfile1)[0] -# -# # No constructs -# cf.write(f, tmpfile2, cfa={"constructs": []}) -# nc = netCDF4.Dataset(tmpfile2, "r") -# for var in nc.variables.values(): -# attrs = var.ncattrs() -# self.assertNotIn("aggregated_dimensions", attrs) -# self.assertNotIn("aggregated_data", attrs) -# -# nc.close() -# -# # Field construct -# cf.write(f, tmpfile2, cfa={"constructs": "field"}) -# nc = netCDF4.Dataset(tmpfile2, "r") -# for ncvar, var in nc.variables.items(): -# attrs = var.ncattrs() -# if ncvar in ("ta",): -# self.assertFalse(var.ndim) -# self.assertIn("aggregated_dimensions", attrs) -# self.assertIn("aggregated_data", attrs) -# else: -# self.assertNotIn("aggregated_dimensions", attrs) -# self.assertNotIn("aggregated_data", attrs) -# -# nc.close() -# -# # Dimension construct -# for constructs in ( -# "dimension_coordinate", -# ["dimension_coordinate"], -# {"dimension_coordinate": None}, -# {"dimension_coordinate": 1}, -# ): -# cf.write(f, tmpfile2, cfa={"constructs": constructs}) -# nc = netCDF4.Dataset(tmpfile2, "r") -# for ncvar, var in nc.variables.items(): -# attrs = var.ncattrs() -# if ncvar in ( -# "x", -# "x_bnds", -# "y", -# "y_bnds", -# "atmosphere_hybrid_height_coordinate", -# "atmosphere_hybrid_height_coordinate_bounds", -# ): -# self.assertFalse(var.ndim) -# self.assertIn("aggregated_dimensions", attrs) -# self.assertIn("aggregated_data", attrs) -# else: -# self.assertNotIn("aggregated_dimensions", attrs) -# self.assertNotIn("aggregated_data", attrs) -# -# nc.close() -# -# # Dimension and auxiliary constructs -# for constructs in ( -# ["dimension_coordinate", "auxiliary_coordinate"], -# {"dimension_coordinate": None, "auxiliary_coordinate": 2}, -# ): -# cf.write(f, tmpfile2, cfa={"constructs": constructs}) -# nc = netCDF4.Dataset(tmpfile2, "r") -# for ncvar, var in nc.variables.items(): -# attrs = var.ncattrs() -# if ncvar in ( -# "x", -# "x_bnds", -# "y", -# "y_bnds", -# "atmosphere_hybrid_height_coordinate", -# "atmosphere_hybrid_height_coordinate_bounds", -# "latitude_1", -# "longitude_1", -# ): -# self.assertFalse(var.ndim) -# self.assertIn("aggregated_dimensions", attrs) -# self.assertIn("aggregated_data", attrs) -# else: -# self.assertNotIn("aggregated_dimensions", attrs) -# self.assertNotIn("aggregated_data", attrs) -# -# nc.close() -# -# def test_CFA_multiple_files(self): -# """Test storing multiple locations for the same fragment.""" -# f = cf.example_field(0) -# cf.write(f, tmpfile1) -# f = cf.read(tmpfile1)[0] -# f.add_file_directory("/new/path") -# -# cf.write(f, cfa_file, cfa="field") -# g = cf.read(cfa_file) -# self.assertEqual(len(g), 1) -# g = g[0] -# self.assertTrue(f.equals(g)) -# -# self.assertEqual(len(g.data.get_filenames()), 2) -# self.assertEqual(len(g.get_filenames()), 3) -# -# def test_CFA_unlimited_dimension(self): -# """Test aggregation files with unlimited dimensions.""" -# # Aggregated dimensions cannot be unlimited -# f = cf.example_field(0) -# axis = f.domain_axis("longitude") -# axis.nc_set_unlimited(True) -# cf.write(f, tmpfile1) -# g = cf.read(tmpfile1) -# with self.assertRaises(ValueError): -# cf.write(g, cfa_file, cfa="field") -# -# def test_CFA_scalar(self): -# """Test scalar aggregation variable.""" -# f = cf.example_field(0) -# f = f[0, 0].squeeze() -# cf.write(f, tmpfile1) -# g = cf.read(tmpfile1)[0] -# cf.write(g, cfa_file, cfa="field") -# h = cf.read(cfa_file)[0] -# self.assertTrue(h.equals(f)) -# -# def test_CFA_value(self): -# """Test the value fragment array variable.""" -# write = True -# for aggregation_value_file in (self.aggregation_value, cfa_file): -# f = cf.read(aggregation_value_file) -# self.assertEqual(len(f), 1) -# f = f[0] -# fa = f.field_ancillary() -# self.assertEqual(fa.shape, (12,)) -# self.assertEqual(fa.data.chunks, ((3, 9),)) -# self.assertEqual( -# fa.data.nc_get_aggregation_fragment_type(), "value" -# ) -# self.assertEqual( -# fa.data.nc_get_aggregated_data(), -# {"shape": "fragment_shape_uid", "value": "fragment_value_uid"}, -# ) -# -# nc = netCDF4.Dataset(aggregation_value_file, "r") -# fragment_value_uid = nc.variables["fragment_value_uid"][...] -# nc.close() -# -# self.assertTrue((fa[:3].array == fragment_value_uid[0]).all()) -# self.assertTrue((fa[3:].array == fragment_value_uid[1]).all()) -# -# if write: -# cf.write(f, cfa_file) -# write = False -# -# def test_CFA_cfa(self): -# """Test the cf.write 'cfa' keyword.""" -# f = cf.example_field(0) -# cf.write(f, tmpfile1) -# f = cf.read(tmpfile1)[0] -# cf.write(f, tmpfile2, cfa="field") -# g = cf.read(tmpfile2)[0] -# -# # Default of cfa="auto" - check that aggregation variable -# # gets written -# cf.write(g, cfa_file) -# nc = netCDF4.Dataset(cfa_file, "r") -# self.assertIsNotNone( -# getattr(nc.variables["q"], "aggregated_data", None) -# ) -# nc.close() -# -# cf.write(g, cfa_file, cfa={"constructs": {"auto": 2}}) -# nc = netCDF4.Dataset(cfa_file, "r") -# self.assertIsNotNone( -# getattr(nc.variables["q"], "aggregated_data", None) -# ) -# nc.close() -# -# cf.write( -# g, -# cfa_file, -# cfa={ -# "constructs": ["auto", "dimension_coordinate"], -# "strict": False, -# }, -# ) -# nc = netCDF4.Dataset(cfa_file, "r") -# for ncvar in ("q", "lat", "lon"): -# self.assertIsNotNone( -# getattr(nc.variables[ncvar], "aggregated_data", None) -# ) -# -# nc.close() -# -# # Check bad values of cfa -# for cfa in (False, True, (), []): -# with self.assertRaises(ValueError): -# cf.write(g, cfa_file, cfa=cfa) -# -# -# if __name__ == "__main__": -# print("Run date:", datetime.datetime.now()) -# cf.environment() -# print() -# unittest.main(verbosity=2) -# -# n_tmpfiles = 5 -# tmpfiles = [ -# tempfile.mkstemp("_test_CFA.nc", dir=os.getcwd())[1] -# for i in range(n_tmpfiles) -# ] -# ( -# tmpfile1, -# tmpfile2, -# tmpfile3, -# tmpfile4, -# tmpfile5, -# ) = tmpfiles -# -# -# def _remove_tmpfiles(): -# """Try to remove defined temporary files by deleting their paths.""" -# for f in tmpfiles: -# try: -# os.remove(f) -# except OSError: -# pass -# -# -# atexit.register(_remove_tmpfiles) -# -# -# class CFATest(unittest.TestCase): -# netcdf3_fmts = [ -# "NETCDF3_CLASSIC", -# "NETCDF3_64BIT", -# "NETCDF3_64BIT_OFFSET", -# "NETCDF3_64BIT_DATA", -# ] -# netcdf4_fmts = ["NETCDF4", "NETCDF4_CLASSIC"] -# netcdf_fmts = netcdf3_fmts + netcdf4_fmts -# -# def test_CFA_fmt(self): -# """Test the cf.read 'fmt' and 'cfa' keywords.""" -# f = cf.example_field(0) -# cf.write(f, tmpfile1) -# f = cf.read(tmpfile1)[0] -# -# for fmt in self.netcdf_fmts: -# cf.write(f, tmpfile2, fmt=fmt, cfa=True) -# g = cf.read(tmpfile2) -# self.assertEqual(len(g), 1) -# self.assertTrue(f.equals(g[0])) -# -# def test_CFA_multiple_fragments(self): -# """Test CFA with more than one fragment.""" -# f = cf.example_field(0) -# -# cf.write(f[:2], tmpfile1) -# cf.write(f[2:], tmpfile2) -# -# a = cf.read([tmpfile1, tmpfile2]) -# self.assertEqual(len(a), 1) -# a = a[0] -# -# nc_file = tmpfile3 -# cfa_file = tmpfile4 -# cf.write(a, nc_file) -# cf.write(a, cfa_file, cfa=True) -# -# n = cf.read(nc_file) -# c = cf.read(cfa_file) -# self.assertEqual(len(n), 1) -# self.assertEqual(len(c), 1) -# self.assertTrue(c[0].equals(f)) -# self.assertTrue(n[0].equals(c[0])) -# -# def test_CFA_strict(self): -# """Test CFA 'strict' option to the cfa.write 'cfa' keyword.""" -# f = cf.example_field(0) -# -# # By default, can't write as CF-netCDF those variables -# # selected for CFA treatment, but which aren't suitable. -# with self.assertRaises(ValueError): -# cf.write(f, tmpfile1, cfa=True) -# -# # The previous line should have deleted the output file -# self.assertFalse(os.path.exists(tmpfile1)) -# -# cf.write(f, tmpfile1, cfa={"strict": False}) -# g = cf.read(tmpfile1) -# self.assertEqual(len(g), 1) -# self.assertTrue(g[0].equals(f)) -# -# cf.write(g, tmpfile2, cfa={"strict": True}) -# g = cf.read(tmpfile2) -# self.assertEqual(len(g), 1) -# self.assertTrue(g[0].equals(f)) -# -# def test_CFA_field_ancillaries(self): -# """Test creation of field ancillaries from non-standard CFA terms.""" -# f = cf.example_field(0) -# self.assertFalse(f.field_ancillaries()) -# -# a = f[:2] -# b = f[2:] -# a.set_property("foo", "bar_a") -# b.set_property("foo", "bar_b") -# cf.write(a, tmpfile1) -# cf.write(b, tmpfile2) -# -# c = cf.read( -# [tmpfile1, tmpfile2], aggregate={"field_ancillaries": "foo"} -# ) -# self.assertEqual(len(c), 1) -# c = c[0] -# self.assertEqual(len(c.field_ancillaries()), 1) -# anc = c.field_ancillary() -# self.assertTrue(anc.data.cfa_get_term()) -# self.assertFalse(anc.data.cfa_get_write()) -# -# cf.write(c, tmpfile3, cfa=False) -# c2 = cf.read(tmpfile3) -# self.assertEqual(len(c2), 1) -# self.assertFalse(c2[0].field_ancillaries()) -# -# cf.write(c, tmpfile4, cfa=True) -# d = cf.read(tmpfile4) -# self.assertEqual(len(d), 1) -# d = d[0] -# -# self.assertEqual(len(d.field_ancillaries()), 1) -# anc = d.field_ancillary() -# self.assertTrue(anc.data.cfa_get_term()) -# self.assertFalse(anc.data.cfa_get_write()) -# self.assertTrue(d.equals(c)) -# -# cf.write(d, tmpfile5, cfa=False) -# e = cf.read(tmpfile5) -# self.assertEqual(len(e), 1) -# self.assertFalse(e[0].field_ancillaries()) -# -# cf.write(d, tmpfile5, cfa=True) -# e = cf.read(tmpfile5) -# self.assertEqual(len(e), 1) -# self.assertTrue(e[0].equals(d)) -# -# def test_CFA_substitutions_0(self): -# """Test CFA substitution URI substitutions (0).""" -# f = cf.example_field(0) -# cf.write(f, tmpfile1) -# f = cf.read(tmpfile1)[0] -# -# cwd = os.getcwd() -# -# f.data.cfa_update_file_substitutions({"base": cwd}) -# -# cf.write( -# f, -# tmpfile2, -# cfa={"absolute_paths": True}, -# ) -# -# nc = netCDF4.Dataset(tmpfile2, "r") -# cfa_file = nc.variables["cfa_file"] -# self.assertEqual( -# cfa_file.getncattr("substitutions"), -# f"${{base}}: {cwd}", -# ) -# self.assertEqual( -# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" -# ) -# nc.close() -# -# g = cf.read(tmpfile2) -# self.assertEqual(len(g), 1) -# self.assertTrue(f.equals(g[0])) -# -# def test_CFA_substitutions_1(self): -# """Test CFA substitution URI substitutions (1).""" -# f = cf.example_field(0) -# cf.write(f, tmpfile1) -# f = cf.read(tmpfile1)[0] -# -# cwd = os.getcwd() -# for base in ("base", "${base}"): -# cf.write( -# f, -# tmpfile2, -# cfa={"absolute_paths": True, "substitutions": {base: cwd}}, -# ) -# -# nc = netCDF4.Dataset(tmpfile2, "r") -# cfa_file = nc.variables["cfa_file"] -# self.assertEqual( -# cfa_file.getncattr("substitutions"), -# f"${{base}}: {cwd}", -# ) -# self.assertEqual( -# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" -# ) -# nc.close() -# -# g = cf.read(tmpfile2) -# self.assertEqual(len(g), 1) -# self.assertTrue(f.equals(g[0])) -# -# def test_CFA_substitutions_2(self): -# """Test CFA substitution URI substitutions (2).""" -# f = cf.example_field(0) -# cf.write(f, tmpfile1) -# f = cf.read(tmpfile1)[0] -# -# cwd = os.getcwd() -# -# f.data.cfa_clear_file_substitutions() -# f.data.cfa_update_file_substitutions({"base": cwd}) -# -# cf.write( -# f, -# tmpfile2, -# cfa={ -# "absolute_paths": True, -# "substitutions": {"base2": "/bad/location"}, -# }, -# ) -# -# nc = netCDF4.Dataset(tmpfile2, "r") -# cfa_file = nc.variables["cfa_file"] -# self.assertEqual( -# cfa_file.getncattr("substitutions"), -# f"${{base2}}: /bad/location ${{base}}: {cwd}", -# ) -# self.assertEqual( -# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" -# ) -# nc.close() -# -# g = cf.read(tmpfile2) -# self.assertEqual(len(g), 1) -# self.assertTrue(f.equals(g[0])) -# -# f.data.cfa_clear_file_substitutions() -# f.data.cfa_update_file_substitutions({"base": "/bad/location"}) -# -# cf.write( -# f, -# tmpfile2, -# cfa={"absolute_paths": True, "substitutions": {"base": cwd}}, -# ) -# -# nc = netCDF4.Dataset(tmpfile2, "r") -# cfa_file = nc.variables["cfa_file"] -# self.assertEqual( -# cfa_file.getncattr("substitutions"), -# f"${{base}}: {cwd}", -# ) -# self.assertEqual( -# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" -# ) -# nc.close() -# -# g = cf.read(tmpfile2) -# self.assertEqual(len(g), 1) -# self.assertTrue(f.equals(g[0])) -# -# f.data.cfa_clear_file_substitutions() -# f.data.cfa_update_file_substitutions({"base2": "/bad/location"}) -# -# cf.write( -# f, -# tmpfile2, -# cfa={"absolute_paths": True, "substitutions": {"base": cwd}}, -# ) -# -# nc = netCDF4.Dataset(tmpfile2, "r") -# cfa_file = nc.variables["cfa_file"] -# self.assertEqual( -# cfa_file.getncattr("substitutions"), -# f"${{base2}}: /bad/location ${{base}}: {cwd}", -# ) -# self.assertEqual( -# cfa_file[...], f"file://${{base}}/{os.path.basename(tmpfile1)}" -# ) -# nc.close() -# -# g = cf.read(tmpfile2) -# self.assertEqual(len(g), 1) -# self.assertTrue(f.equals(g[0])) -# -# def test_CFA_absolute_paths(self): -# """Test CFA 'absolute_paths' option to the cfa.write 'cfa' keyword.""" -# f = cf.example_field(0) -# cf.write(f, tmpfile1) -# f = cf.read(tmpfile1)[0] -# -# for absolute_paths, filename in zip( -# (True, False), -# ( -# PurePath(os.path.abspath(tmpfile1)).as_uri(), -# os.path.basename(tmpfile1), -# ), -# ): -# cf.write(f, tmpfile2, cfa={"absolute_paths": absolute_paths}) -# -# nc = netCDF4.Dataset(tmpfile2, "r") -# cfa_file = nc.variables["cfa_file"] -# self.assertEqual(cfa_file[...], filename) -# nc.close() -# -# g = cf.read(tmpfile2) -# self.assertEqual(len(g), 1) -# self.assertTrue(f.equals(g[0])) -# -# def test_CFA_constructs(self): -# """Test choice of constructs to write as CFA-netCDF variables.""" -# f = cf.example_field(1) -# f.del_construct("T") -# f.del_construct("long_name=Grid latitude name") -# cf.write(f, tmpfile1) -# f = cf.read(tmpfile1)[0] -# -# # No constructs -# cf.write(f, tmpfile2, cfa={"constructs": []}) -# nc = netCDF4.Dataset(tmpfile2, "r") -# for var in nc.variables.values(): -# attrs = var.ncattrs() -# self.assertNotIn("aggregated_dimensions", attrs) -# self.assertNotIn("aggregated_data", attrs) -# -# nc.close() -# -# # Field construct -# cf.write(f, tmpfile2, cfa={"constructs": "field"}) -# nc = netCDF4.Dataset(tmpfile2, "r") -# for ncvar, var in nc.variables.items(): -# attrs = var.ncattrs() -# if ncvar in ("ta",): -# self.assertFalse(var.ndim) -# self.assertIn("aggregated_dimensions", attrs) -# self.assertIn("aggregated_data", attrs) -# else: -# self.assertNotIn("aggregated_dimensions", attrs) -# self.assertNotIn("aggregated_data", attrs) -# -# nc.close() -# -# # Dimension construct -# for constructs in ( -# "dimension_coordinate", -# ["dimension_coordinate"], -# {"dimension_coordinate": None}, -# {"dimension_coordinate": 1}, -# {"dimension_coordinate": cf.eq(1)}, -# ): -# cf.write(f, tmpfile2, cfa={"constructs": constructs}) -# nc = netCDF4.Dataset(tmpfile2, "r") -# for ncvar, var in nc.variables.items(): -# attrs = var.ncattrs() -# if ncvar in ( -# "x", -# "x_bnds", -# "y", -# "y_bnds", -# "atmosphere_hybrid_height_coordinate", -# "atmosphere_hybrid_height_coordinate_bounds", -# ): -# self.assertFalse(var.ndim) -# self.assertIn("aggregated_dimensions", attrs) -# self.assertIn("aggregated_data", attrs) -# else: -# self.assertNotIn("aggregated_dimensions", attrs) -# self.assertNotIn("aggregated_data", attrs) -# -# nc.close() -# -# # Dimension and auxiliary constructs -# for constructs in ( -# ["dimension_coordinate", "auxiliary_coordinate"], -# {"dimension_coordinate": None, "auxiliary_coordinate": cf.ge(2)}, -# ): -# cf.write(f, tmpfile2, cfa={"constructs": constructs}) -# nc = netCDF4.Dataset(tmpfile2, "r") -# for ncvar, var in nc.variables.items(): -# attrs = var.ncattrs() -# if ncvar in ( -# "x", -# "x_bnds", -# "y", -# "y_bnds", -# "atmosphere_hybrid_height_coordinate", -# "atmosphere_hybrid_height_coordinate_bounds", -# "latitude_1", -# "longitude_1", -# ): -# self.assertFalse(var.ndim) -# self.assertIn("aggregated_dimensions", attrs) -# self.assertIn("aggregated_data", attrs) -# else: -# self.assertNotIn("aggregated_dimensions", attrs) -# self.assertNotIn("aggregated_data", attrs) -# -# nc.close() -# -# def test_CFA_PP(self): -# """Test writing CFA-netCDF with PP format fragments.""" -# f = cf.read("file1.pp")[0] -# cf.write(f, tmpfile1, cfa=True) -# -# # Check that only the fields have been aggregated -# nc = netCDF4.Dataset(tmpfile1, "r") -# for ncvar, var in nc.variables.items(): -# attrs = var.ncattrs() -# if ncvar in ("UM_m01s15i201_vn405",): -# self.assertFalse(var.ndim) -# self.assertIn("aggregated_dimensions", attrs) -# self.assertIn("aggregated_data", attrs) -# else: -# self.assertNotIn("aggregated_dimensions", attrs) -# self.assertNotIn("aggregated_data", attrs) -# -# nc.close() -# -# g = cf.read(tmpfile1) -# self.assertEqual(len(g), 1) -# self.assertTrue(f.equals(g[0])) -# -# def test_CFA_multiple_files(self): -# """Test storing multiple CFA frgament locations.""" -# f = cf.example_field(0) -# cf.write(f, tmpfile1) -# f = cf.read(tmpfile1)[0] -# f.add_file_location("/new/location") -# -# cf.write(f, tmpfile2, cfa=True) -# g = cf.read(tmpfile2) -# self.assertEqual(len(g), 1) -# g = g[0] -# self.assertTrue(f.equals(g)) -# -# self.assertEqual(len(g.data.get_filenames()), 2) -# self.assertEqual(len(g.get_filenames()), 3) -# -# def test_CFA_unlimited_dimension(self): -# """Test CFA with unlimited dimensions""" -# # Create a CFA file from a field that has an unlimited -# # dimension and no metadata constructs spanning that dimension -# f = cf.example_field(0) -# d = f.domain_axis("X") -# d.nc_set_unlimited(True) -# f.del_construct("X") -# cf.write(f, tmpfile1) -# g = cf.read(tmpfile1) -# cf.write(g, tmpfile2, cfa=True) -# -# # Check that the CFA file can be read -# h = cf.read(tmpfile2) -# self.assertEqual(len(h), 1) -# -# -# if __name__ == "__main__": -# print("Run date:", datetime.datetime.now()) -# cf.environment() -# print() -# unittest.main(verbosity=2) +import atexit +import datetime +import faulthandler +import os +import tempfile +import unittest +from pathlib import PurePath + +import netCDF4 + +faulthandler.enable() # to debug seg faults and timeouts + +import cf + +n_tmpfiles = 5 +tmpfiles = [ + tempfile.mkstemp("_test_CFA.nc", dir=os.getcwd())[1] + for i in range(n_tmpfiles) +] +( + tmpfile1, + tmpfile2, + nc_file, + cfa_file, + cfa_file2, +) = tmpfiles + + +def _remove_tmpfiles(): + """Try to remove defined temporary files by deleting their paths.""" + for f in tmpfiles: + try: + os.remove(f) + except OSError: + pass + + +atexit.register(_remove_tmpfiles) + + +class CFATest(unittest.TestCase): + """Unit test for aggregation variables.""" + + netcdf3_fmts = [ + "NETCDF3_CLASSIC", + "NETCDF3_64BIT", + "NETCDF3_64BIT_OFFSET", + "NETCDF3_64BIT_DATA", + ] + netcdf4_fmts = ["NETCDF4", "NETCDF4_CLASSIC"] + netcdf_fmts = netcdf3_fmts + netcdf4_fmts + + aggregation_value = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "aggregation_value.nc" + ) + + def test_CFA_fmt(self): + """Test the cf.read 'fmt' keyword with cfa.""" + f = cf.example_field(0) + cf.write(f, tmpfile1) + f = cf.read(tmpfile1, cfa_write="field")[0] + + for fmt in self.netcdf_fmts: + cf.write(f, cfa_file, fmt=fmt, cfa="field") + g = cf.read(cfa_file) + self.assertEqual(len(g), 1) + self.assertTrue(f.equals(g[0])) + + def test_CFA_multiple_fragments(self): + """Test aggregation variables with more than one fragment.""" + f = cf.example_field(0) + + cf.write(f[:2], tmpfile1) + cf.write(f[2:], tmpfile2) + + a = cf.read(tmpfile1, cfa_write="field")[0] + b = cf.read(tmpfile2, cfa_write="field")[0] + a = cf.Field.concatenate([a, b], axis=0) + + cf.write(a, nc_file) + cf.write(a, cfa_file, cfa="field") + + n = cf.read(nc_file) + c = cf.read(cfa_file) + self.assertEqual(len(n), 1) + self.assertEqual(len(c), 1) + self.assertTrue(c[0].equals(f)) + self.assertTrue(n[0].equals(c[0])) + + def test_CFA_strict(self): + """Test 'strict' option to the cf.write 'cfa' keyword.""" + f = cf.example_field(0) + + # By default, can't write in-memory arrays as aggregation + # variables + with self.assertRaises(ValueError): + cf.write(f, cfa_file, cfa="field") + + # The previous line should have deleted the output file + self.assertFalse(os.path.exists(cfa_file)) + + cf.write(f, nc_file, cfa={"constructs": "field", "strict": False}) + g = cf.read(nc_file, cfa_write="field") + self.assertEqual(len(g), 1) + self.assertTrue(g[0].equals(f)) + + cf.write(g, cfa_file, cfa={"constructs": "field", "strict": True}) + g = cf.read(cfa_file) + self.assertEqual(len(g), 1) + self.assertTrue(g[0].equals(f)) + + def test_CFA_uri_0(self): + """Test aggregation 'uri' option to cf.write.""" + f = cf.example_field(0) + cf.write(f, tmpfile1) + f = cf.read(tmpfile1, cfa_write="field")[0] + + absuri_filename = PurePath(os.path.abspath(tmpfile1)).as_uri() + reluri_filename = os.path.basename(tmpfile1) + + for uri, filename in zip( + ("absolute", "relative"), (absuri_filename, reluri_filename) + ): + print(uri) + cf.write( + f, + cfa_file, + cfa={"constructs": "field", "uri": uri}, + ) + + nc = netCDF4.Dataset(cfa_file, "r") + cfa_location = nc.variables["cfa_location"] + self.assertEqual(cfa_location[...], filename) + nc.close() + + g = cf.read(cfa_file) + self.assertEqual(len(g), 1) + g = g[0] + self.assertTrue(f.equals(g)) + self.assertEqual( + g.data.get_filenames(normalise=False), set([filename]) + ) + + def test_CFA_uri_1(self): + """Test aggregation 'uri=default' option to cf.write.""" + f = cf.example_field(0) + cf.write(f, tmpfile1) + f = cf.read(tmpfile1, cfa_write="field")[0] + + absuri_filename = PurePath(os.path.abspath(tmpfile1)).as_uri() + reluri_filename = os.path.basename(tmpfile1) + + for uri, filename in zip( + ("absolute", "relative"), (absuri_filename, reluri_filename) + ): + cf.write( + f, + cfa_file, + cfa={"constructs": "field", "uri": uri}, + ) + + g = cf.read(cfa_file)[0] + cf.write( + g, + cfa_file2, + cfa="field", + ) + + nc = netCDF4.Dataset(cfa_file2, "r") + cfa_location = nc.variables["cfa_location"] + self.assertEqual(cfa_location[...], filename) + nc.close() + + def test_CFA_constructs(self): + """Test aggregation 'constructs' option to cf.write.""" + f = cf.example_field(1) + f.del_construct("time") + f.del_construct("long_name=Grid latitude name") + cf.write(f, tmpfile1) + f = cf.read(tmpfile1, cfa_write="all")[0] + + # No constructs + cf.write(f, tmpfile2, cfa={"constructs": []}) + nc = netCDF4.Dataset(tmpfile2, "r") + for var in nc.variables.values(): + attrs = var.ncattrs() + self.assertNotIn("aggregated_dimensions", attrs) + self.assertNotIn("aggregated_data", attrs) + + nc.close() + + # Field construct + cf.write(f, tmpfile2, cfa={"constructs": "field"}) + nc = netCDF4.Dataset(tmpfile2, "r") + for ncvar, var in nc.variables.items(): + attrs = var.ncattrs() + if ncvar in ("ta",): + self.assertFalse(var.ndim) + self.assertIn("aggregated_dimensions", attrs) + self.assertIn("aggregated_data", attrs) + else: + self.assertNotIn("aggregated_dimensions", attrs) + self.assertNotIn("aggregated_data", attrs) + + nc.close() + + # Dimension construct + for constructs in ( + "dimension_coordinate", + ["dimension_coordinate"], + {"dimension_coordinate": None}, + {"dimension_coordinate": 1}, + ): + cf.write(f, tmpfile2, cfa={"constructs": constructs}) + nc = netCDF4.Dataset(tmpfile2, "r") + for ncvar, var in nc.variables.items(): + attrs = var.ncattrs() + if ncvar in ( + "x", + "x_bnds", + "y", + "y_bnds", + "atmosphere_hybrid_height_coordinate", + "atmosphere_hybrid_height_coordinate_bounds", + ): + self.assertFalse(var.ndim) + self.assertIn("aggregated_dimensions", attrs) + self.assertIn("aggregated_data", attrs) + else: + self.assertNotIn("aggregated_dimensions", attrs) + self.assertNotIn("aggregated_data", attrs) + + nc.close() + + # Dimension and auxiliary constructs + for constructs in ( + ["dimension_coordinate", "auxiliary_coordinate"], + {"dimension_coordinate": None, "auxiliary_coordinate": 2}, + ): + cf.write(f, tmpfile2, cfa={"constructs": constructs}) + nc = netCDF4.Dataset(tmpfile2, "r") + for ncvar, var in nc.variables.items(): + attrs = var.ncattrs() + if ncvar in ( + "x", + "x_bnds", + "y", + "y_bnds", + "atmosphere_hybrid_height_coordinate", + "atmosphere_hybrid_height_coordinate_bounds", + "latitude_1", + "longitude_1", + ): + self.assertFalse(var.ndim) + self.assertIn("aggregated_dimensions", attrs) + self.assertIn("aggregated_data", attrs) + else: + self.assertNotIn("aggregated_dimensions", attrs) + self.assertNotIn("aggregated_data", attrs) + + nc.close() + + def test_CFA_scalar(self): + """Test scalar aggregation variable.""" + f = cf.example_field(0) + f = f[0, 0].squeeze() + cf.write(f, tmpfile1) + g = cf.read(tmpfile1, cfa_write="field")[0] + cf.write(g, cfa_file, cfa="field") + h = cf.read(cfa_file)[0] + self.assertTrue(h.equals(f)) + + def test_CFA_value(self): + """Test the value fragment array variable.""" + write = True + for aggregation_value_file in (self.aggregation_value, cfa_file): + f = cf.read(aggregation_value_file, cfa_write="all") + self.assertEqual(len(f), 1) + f = f[0] + fa = f.field_ancillary() + self.assertEqual(fa.shape, (12,)) + self.assertEqual(fa.data.chunks, ((3, 9),)) + self.assertEqual( + fa.data.nc_get_aggregation_fragment_type(), "value" + ) + self.assertEqual( + fa.data.nc_get_aggregated_data(), + {"map": "fragment_map_uid", "value": "fragment_value_uid"}, + ) + + nc = netCDF4.Dataset(aggregation_value_file, "r") + fragment_value_uid = nc.variables["fragment_value_uid"][...] + nc.close() + + self.assertTrue((fa[:3].array == fragment_value_uid[0]).all()) + self.assertTrue((fa[3:].array == fragment_value_uid[1]).all()) + + if write: + cf.write(f, cfa_file) # , cfa={'uri': 'relative'}) + write = False + + def test_CFA_cfa(self): + """Test the cf.write 'cfa' keyword.""" + f = cf.example_field(0) + cf.write(f, tmpfile1) + f = cf.read(tmpfile1, cfa_write="field")[0] + cf.write(f, tmpfile2, cfa="field") + g = cf.read(tmpfile2, cfa_write="field")[0] + + # Default of cfa="auto" - check that aggregation variable + # gets written + cf.write(g, cfa_file) + nc = netCDF4.Dataset(cfa_file, "r") + self.assertIsNotNone( + getattr(nc.variables["q"], "aggregated_data", None) + ) + nc.close() + + cf.write(g, cfa_file, cfa={"constructs": {"auto": 2}}) + nc = netCDF4.Dataset(cfa_file, "r") + self.assertIsNotNone( + getattr(nc.variables["q"], "aggregated_data", None) + ) + nc.close() + + cf.write( + g, + cfa_file, + cfa={ + "constructs": ["auto", "dimension_coordinate"], + "strict": False, + }, + ) + nc = netCDF4.Dataset(cfa_file, "r") + for ncvar in ("q", "lat", "lon"): + self.assertIsNotNone( + getattr(nc.variables[ncvar], "aggregated_data", None) + ) + + nc.close() + + # Check bad values of cfa + for cfa in (False, True, (), []): + with self.assertRaises(ValueError): + cf.write(g, cfa_file, cfa=cfa) + + +if __name__ == "__main__": + print("Run date:", datetime.datetime.now()) + cf.environment() + print() + unittest.main(verbosity=2) diff --git a/cf/test/test_Field.py b/cf/test/test_Field.py index fdc2baa01b..7f382c918c 100644 --- a/cf/test/test_Field.py +++ b/cf/test/test_Field.py @@ -2855,28 +2855,6 @@ def test_Field_subspace_ugrid(self): self.assertTrue(g.aux("X").data.range() < 30) self.assertTrue(g.aux("Y").data.range() < 50) - # def test_Field_file_location(self): - # f = cf.example_field(0) - # - # self.assertEqual(f.add_file_location("/data/model/"), "/data/model") - # - # cf.write(f, tmpfile) - # f = cf.read(tmpfile)[0] - # g = f.copy() - # location = os.path.dirname(os.path.abspath(tmpfile)) - # - # self.assertEqual(f.file_locations(), set((location,))) - # self.assertEqual(f.add_file_location("/data/model/"), "/data/model") - # self.assertEqual(f.file_locations(), set((location, "/data/model"))) - # - # # Check that we haven't changed 'g' - # self.assertEqual(g.file_locations(), set((location,))) - # - # self.assertEqual(f.del_file_location("/data/model/"), "/data/model") - # self.assertEqual(f.file_locations(), set((location,))) - # f.del_file_location("/invalid") - # self.assertEqual(f.file_locations(), set((location,))) - def test_Field_pad_missing(self): """Test Field.pad_missing.""" f = cf.example_field(0) diff --git a/cf/test/test_NetCDF4Array.py b/cf/test/test_NetCDF4Array.py deleted file mode 100644 index a956daed54..0000000000 --- a/cf/test/test_NetCDF4Array.py +++ /dev/null @@ -1,171 +0,0 @@ -# import atexit -# import datetime -# import faulthandler -# import os -# import tempfile -# import unittest -# -# import numpy as np -# from dask.base import tokenize -# -# faulthandler.enable() # to debug seg faults and timeouts -# -# import cf -# -# n_tmpfiles = 1 -# tmpfiles = [ -# tempfile.mkstemp("_test_NetCDF4Array.nc", dir=os.getcwd())[1] -# for i in range(n_tmpfiles) -# ] -# (tmpfile1,) = tmpfiles -# -# -# def _remove_tmpfiles(): -# """Try to remove defined temporary files by deleting their paths.""" -# for f in tmpfiles: -# try: -# os.remove(f) -# except OSError: -# pass -# -# -# atexit.register(_remove_tmpfiles) -# -# -# class NetCDF4ArrayTest(unittest.TestCase): -# n = cf.NetCDF4Array( -# filename="filename.nc", -# address="x", -# shape=(5, 8), -# dtype=np.dtype(float), -# ) -# -# def test_NetCDF4Array_del_file_location(self): -# a = cf.NetCDF4Array(("/data1/file1", "/data2/file2"), ("tas1", "tas2")) -# b = a.del_file_location("/data1") -# self.assertIsNot(b, a) -# self.assertEqual(b.get_filenames(), ("/data2/file2",)) -# self.assertEqual(b.get_addresses(), ("tas2",)) -# -# a = cf.NetCDF4Array( -# ("/data1/file1", "/data2/file1", "/data2/file2"), -# ("tas1", "tas1", "tas2"), -# ) -# b = a.del_file_location("/data2") -# self.assertEqual(b.get_filenames(), ("/data1/file1",)) -# self.assertEqual(b.get_addresses(), ("tas1",)) -# -# # Can't be left with no files -# self.assertEqual(b.file_locations(), ("/data1",)) -# with self.assertRaises(ValueError): -# b.del_file_location("/data1/") -# -# def test_NetCDF4Array_file_locations(self): -# a = cf.NetCDF4Array("/data1/file1") -# self.assertEqual(a.file_locations(), ("/data1",)) -# -# a = cf.NetCDF4Array(("/data1/file1", "/data2/file2")) -# self.assertEqual(a.file_locations(), ("/data1", "/data2")) -# -# a = cf.NetCDF4Array(("/data1/file1", "/data2/file2", "/data1/file2")) -# self.assertEqual(a.file_locations(), ("/data1", "/data2", "/data1")) -# -# def test_NetCDF4Array_add_file_location(self): -# a = cf.NetCDF4Array("/data1/file1", "tas") -# b = a.add_file_location("/home/user") -# self.assertIsNot(b, a) -# self.assertEqual( -# b.get_filenames(), ("/data1/file1", "/home/user/file1") -# ) -# self.assertEqual(b.get_addresses(), ("tas", "tas")) -# -# a = cf.NetCDF4Array(("/data1/file1", "/data2/file2"), ("tas1", "tas2")) -# b = a.add_file_location("/home/user") -# self.assertEqual( -# b.get_filenames(), -# ( -# "/data1/file1", -# "/data2/file2", -# "/home/user/file1", -# "/home/user/file2", -# ), -# ) -# self.assertEqual(b.get_addresses(), ("tas1", "tas2", "tas1", "tas2")) -# -# a = cf.NetCDF4Array(("/data1/file1", "/data2/file1"), ("tas1", "tas2")) -# b = a.add_file_location("/home/user") -# self.assertEqual( -# b.get_filenames(), -# ("/data1/file1", "/data2/file1", "/home/user/file1"), -# ) -# self.assertEqual(b.get_addresses(), ("tas1", "tas2", "tas1")) -# -# a = cf.NetCDF4Array(("/data1/file1", "/data2/file1"), ("tas1", "tas2")) -# b = a.add_file_location("/data1/") -# self.assertEqual(b.get_filenames(), a.get_filenames()) -# self.assertEqual(b.get_addresses(), a.get_addresses()) -# -# def test_NetCDF4Array__dask_tokenize__(self): -# a = cf.NetCDF4Array("/data1/file1", "tas", shape=(12, 2), mask=False) -# self.assertEqual(tokenize(a), tokenize(a.copy())) -# -# b = cf.NetCDF4Array("/home/file2", "tas", shape=(12, 2)) -# self.assertNotEqual(tokenize(a), tokenize(b)) -# -# def test_NetCDF4Array_multiple_files(self): -# f = cf.example_field(0) -# cf.write(f, tmpfile1) -# -# # Create instance with non-existent file -# n = cf.NetCDF4Array( -# filename=os.path.join("/bad/location", os.path.basename(tmpfile1)), -# address=f.nc_get_variable(), -# shape=f.shape, -# dtype=f.dtype, -# ) -# # Add file that exists -# n = n.add_file_location(os.path.dirname(tmpfile1)) -# -# self.assertEqual(len(n.get_filenames()), 2) -# self.assertTrue((n[...] == f.array).all()) -# -# def test_NetCDF4Array_shape(self): -# shape = (12, 73, 96) -# a = cf.NetCDF4Array("/home/file2", "tas", shape=shape) -# self.assertEqual(a.shape, shape) -# self.assertEqual(a.original_shape, shape) -# a = a[::2] -# self.assertEqual(a.shape, (shape[0] // 2,) + shape[1:]) -# self.assertEqual(a.original_shape, shape) -# -# def test_NetCDF4Array_index(self): -# shape = (12, 73, 96) -# a = cf.NetCDF4Array("/home/file2", "tas", shape=shape) -# self.assertEqual( -# a.index(), -# ( -# slice( -# None, -# ), -# ) -# * len(shape), -# ) -# a = a[8:7:-1, 10:19:3, [15, 1, 4, 12]] -# a = a[[0], [True, False, True], ::-2] -# self.assertEqual(a.shape, (1, 2, 2)) -# self.assertEqual( -# a.index(), -# (slice(8, 9, None), slice(10, 17, 6), slice(12, -1, -11)), -# ) -# -# index = a.index(conform=False) -# self.assertTrue((index[0] == [8]).all()) -# self.assertTrue((index[1] == [10, 16]).all()) -# self.assertTrue((index[2] == [12, 1]).all()) -# -# -# if __name__ == "__main__": -# print("Run date:", datetime.datetime.now()) -# cf.environment() -# print() -# unittest.main(verbosity=2) From 26e257440bf7b45bc6ba31d7f6810c40effb1f00 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 25 Nov 2024 23:52:56 +0000 Subject: [PATCH 26/51] dev --- cf/aggregate.py | 90 +++++++++++++++++++++++++++++++++++---- cf/data/dask_utils.py | 4 -- cf/test/test_CFA.py | 9 ++-- cf/test/test_aggregate.py | 4 +- 4 files changed, 89 insertions(+), 18 deletions(-) diff --git a/cf/aggregate.py b/cf/aggregate.py index 632e99c0a2..9273066aab 100644 --- a/cf/aggregate.py +++ b/cf/aggregate.py @@ -426,6 +426,7 @@ def __init__( # Promote selected properties to field ancillaries that span # the same domain axes as the field # ------------------------------------------------------------ + self.promoted_field_ancillaries = [] if field_ancillaries: f = self.promote_to_field_ancillary(field_ancillaries) @@ -2129,9 +2130,15 @@ def promote_to_field_ancillary(self, properties): f = f.copy() copy = False - f.set_construct(field_anc, axes=f.get_data_axes(), copy=False) + key = f.set_construct( + field_anc, axes=f.get_data_axes(), copy=False + ) f.del_property(prop) + # Record that this field ancillary is derived from a + # promotion + self.promoted_field_ancillaries.append(key) + self.field = f return f @@ -2426,9 +2433,9 @@ def aggregate( Create new field ancillary constructs for each input field which has one or more of the given properties. For each input field, each property is converted to a field - ancillary construct that spans the entire domain, with the - constant value of the property, and the property itself is - deleted. + ancillary construct that spans the aggregation axes with + the constant value of the property, and the property + itself is deleted. .. versionadded:: 3.15.0 @@ -3031,6 +3038,9 @@ def aggregate( unaggregatable = False + # Record the names of theaxes thatr are actually aggregated + axes_aggregated = [] + for axis in aggregating_axes: number_of_fields = len(meta) if number_of_fields == 1: @@ -3243,6 +3253,7 @@ def aggregate( # the aggregated fields as a single list ready for # aggregation along the next axis. # -------------------------------------------------------- + axes_aggregated.append(axis) meta = [m for gm in grouped_meta for m in gm] # Add fields to the output list @@ -3259,6 +3270,10 @@ def aggregate( if cells: _set_cell_conditions(output_meta) + # Remove non-aggregated axes from promoted field ancillaries + if field_ancillaries: + _fix_promoted_field_ancillaries(output_meta, axes_aggregated) + output_constructs = [m.field for m in output_meta] aggregate.status = status @@ -4716,6 +4731,14 @@ def _aggregate_2_fields( hash_value1 = anc1["hash_value"] anc0["hash_value"] = hash_value0 + hash_value1 + # The result of aggregating a promoted amd non-promoted + # field ancillary is a non-promoted fierld ancillary + if ( + key0 in m0.promoted_field_ancillaries + and key1 not in m1.promoted_field_ancillaries + ): + m0.promoted_field_ancillaries.remove(key0) + # Domain ancillaries for identity in m0.domain_anc: anc0 = m0.domain_anc[identity] @@ -4737,9 +4760,9 @@ def _aggregate_2_fields( anc0["hash_value"] = hash_value0 + hash_value1 # ---------------------------------------------------------------- - # For each matching pair of coordinates, cell measures, field and - # domain ancillaries which span the aggregating axis, insert the - # one from parent1 into the one from parent0 + # For each matching pair of coordinates, cell measures, and field + # and domain ancillaries which span the aggregating axis, insert + # the one from parent1 into the one from parent0 # ---------------------------------------------------------------- for key0, key1, construct0, construct1 in spanning_variables: construct_axes0 = parent0.get_data_axes(key0) @@ -4901,7 +4924,7 @@ def _aggregate_2_fields( actual_range = parent0.del_property("actual_range", None) if actual_range is not None and is_log_level_info(logger): logger.info( - "Deleted 'actual_range' attribute due to being " + "Deleted 'actual_range' attribute due to it being " "outside of 'valid_range' attribute limits." ) @@ -4911,7 +4934,6 @@ def _aggregate_2_fields( # Make a note that the parent construct in this _Meta object has # already been aggregated - m0.aggregated_field = True # ---------------------------------------------------------------- @@ -4978,3 +5000,53 @@ def dsg_feature_type_axis(meta, axis): # cf_role property cf_role = coords["cf_role"] return cf_role.count(None) != len(cf_role) + + +def _fix_promoted_field_ancillaries(output_meta, axes_aggregated): + """Remove non-aggregated axes from promoted field ancillaries. + + .. versionadded:: NEXTVERSION + + :Parameters: + + output_meta: `list` + The list of `_Meta` objects. If any include promoted field + ancillaries then thses will be updated in-place. + + :Returns: + + `None` + + """ + for m in output_meta: + for value in m.field_anc.values(): + index = [] + squeeze = [] + + key = value["key"] + if key not in m.promoted_field_ancillaries: + continue + + # Remove the non-aggregated axes from the promoted field + # ancillary + for i, axis in enumerate(value["axes"]): + if axis in axes_aggregated: + index.append(slice(None)) + else: + index.append(0) + squeeze.append(i) + + if not squeeze: + continue + + fa_axes = m.field.get_data_axes(key) + fa = m.field.del_construct(key) + fa = fa[tuple(index)] + fa.squeeze(squeeze, inplace=True) + fa_axes = [a for i, a in enumerate(fa_axes) if i not in squeeze] + + # Record the field ancillary as being able to be written + # as a CF-netCDF aggregation 'value' variable + fa.data._nc_set_aggregation_fragment_type("value") + + m.field.set_construct(fa, axes=fa_axes, copy=False) diff --git a/cf/data/dask_utils.py b/cf/data/dask_utils.py index 2f5de183c5..04a0c09921 100644 --- a/cf/data/dask_utils.py +++ b/cf/data/dask_utils.py @@ -7,15 +7,11 @@ from functools import partial -import dask.array as da import numpy as np from cfdm.data.dask_utils import cfdm_asanyarray -from dask.core import flatten from scipy.ndimage import convolve1d from ..cfdatetime import dt, dt2rt, rt2dt -from ..functions import atol as cf_atol -from ..functions import rtol as cf_rtol from ..units import Units diff --git a/cf/test/test_CFA.py b/cf/test/test_CFA.py index ebbfdabd28..8bf3268777 100644 --- a/cf/test/test_CFA.py +++ b/cf/test/test_CFA.py @@ -10,6 +10,8 @@ faulthandler.enable() # to debug seg faults and timeouts +from cfdm.read_write.netcdf.netcdfwrite import AggregationError + import cf n_tmpfiles = 5 @@ -91,9 +93,10 @@ def test_CFA_strict(self): """Test 'strict' option to the cf.write 'cfa' keyword.""" f = cf.example_field(0) + cfa_file = "cfa_file.nc" # By default, can't write in-memory arrays as aggregation # variables - with self.assertRaises(ValueError): + with self.assertRaises(AggregationError): cf.write(f, cfa_file, cfa="field") # The previous line should have deleted the output file @@ -121,7 +124,6 @@ def test_CFA_uri_0(self): for uri, filename in zip( ("absolute", "relative"), (absuri_filename, reluri_filename) ): - print(uri) cf.write( f, cfa_file, @@ -296,7 +298,7 @@ def test_CFA_value(self): self.assertTrue((fa[3:].array == fragment_value_uid[1]).all()) if write: - cf.write(f, cfa_file) # , cfa={'uri': 'relative'}) + cf.write(f, cfa_file) write = False def test_CFA_cfa(self): @@ -318,6 +320,7 @@ def test_CFA_cfa(self): cf.write(g, cfa_file, cfa={"constructs": {"auto": 2}}) nc = netCDF4.Dataset(cfa_file, "r") + self.assertIsNotNone( getattr(nc.variables["q"], "aggregated_data", None) ) diff --git a/cf/test/test_aggregate.py b/cf/test/test_aggregate.py index 53c0b9b938..e4684d6f87 100644 --- a/cf/test/test_aggregate.py +++ b/cf/test/test_aggregate.py @@ -326,7 +326,7 @@ def test_aggregate_relaxed_units(self): self.assertEqual(i.Units.__dict__, bad_units.__dict__) self.assertTrue((i.array == f.array).all()) - def test_aggregate_field_ancillaries(self): + def test_aggregate_promote_field_ancillaries(self): f = cf.example_field(0) self.assertFalse(f.field_ancillaries()) @@ -341,7 +341,7 @@ def test_aggregate_field_ancillaries(self): self.assertEqual(len(c.field_ancillaries()), 1) anc = c.field_ancillary() - self.assertEqual(anc.shape, c.shape) + self.assertEqual(anc.shape, f.shape[:1]) self.assertTrue((anc[:2] == "bar_a").all()) self.assertTrue((anc[2:] == "bar_b").all()) From bb27a9876248bf7e7a5eefd35af4e2d7382eb38b Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 28 Nov 2024 09:07:54 +0000 Subject: [PATCH 27/51] dev --- cf/mixin/properties.py | 5 ++--- cf/test/create_test_files.py | 11 +++++++---- cf/test/test_CFA.py | 7 +++++-- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/cf/mixin/properties.py b/cf/mixin/properties.py index a6c4e5a13e..dd7d80fa86 100644 --- a/cf/mixin/properties.py +++ b/cf/mixin/properties.py @@ -26,9 +26,8 @@ class Properties(Container): def __new__(cls, *args, **kwargs): """Store component classes. - .. note:: If a child class requires a different component - classes than the ones defined here, then they must be redefined - in the child class. + Child classes should consider redefining these component + classes. """ instance = super().__new__(cls) diff --git a/cf/test/create_test_files.py b/cf/test/create_test_files.py index 74d37b2265..e7ee44d2f9 100644 --- a/cf/test/create_test_files.py +++ b/cf/test/create_test_files.py @@ -7,10 +7,11 @@ faulthandler.enable() # to debug seg faults and timeouts -import cfdm import netCDF4 -VN = cfdm.CF() +import cf + +VN = cf.CF() # Load large arrays filename = os.path.join( @@ -2258,7 +2259,9 @@ def _make_aggregation_value(filename): uid = n.createVariable("uid", str, ()) uid.long_name = "Fragment dataset unique identifiers" uid.aggregated_dimensions = "time" - uid.aggregated_data = "value: fragment_value_uid map: fragment_map_uid" + uid.aggregated_data = ( + "unique_value: fragment_value_uid map: fragment_map_uid" + ) time = n.createVariable("time", "f4", ("time",)) time.standard_name = "time" @@ -2342,6 +2345,6 @@ def _make_aggregation_value(filename): if __name__ == "__main__": print("Run date:", datetime.datetime.now()) - cfdm.environment() + cf.environment() print() unittest.main(verbosity=2) diff --git a/cf/test/test_CFA.py b/cf/test/test_CFA.py index 8bf3268777..9ff1a61590 100644 --- a/cf/test/test_CFA.py +++ b/cf/test/test_CFA.py @@ -283,11 +283,14 @@ def test_CFA_value(self): self.assertEqual(fa.shape, (12,)) self.assertEqual(fa.data.chunks, ((3, 9),)) self.assertEqual( - fa.data.nc_get_aggregation_fragment_type(), "value" + fa.data.nc_get_aggregation_fragment_type(), "unique_value" ) self.assertEqual( fa.data.nc_get_aggregated_data(), - {"map": "fragment_map_uid", "value": "fragment_value_uid"}, + { + "map": "fragment_map_uid", + "unique_value": "fragment_value_uid", + }, ) nc = netCDF4.Dataset(aggregation_value_file, "r") From ad2e0d08a6d28a6e2dc389e16ebbe0f866868008 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 29 Nov 2024 16:39:20 +0000 Subject: [PATCH 28/51] dev --- cf/field.py | 764 +++++++++++++++---------------- cf/mixin/propertiesdata.py | 120 ++--- cf/mixin/propertiesdatabounds.py | 160 +++---- cf/test/create_test_files.py | 36 +- cf/test/test_CFA.py | 30 +- cf/test/test_functions.py | 4 +- 6 files changed, 570 insertions(+), 544 deletions(-) diff --git a/cf/field.py b/cf/field.py index f118e29275..ea63395c55 100644 --- a/cf/field.py +++ b/cf/field.py @@ -3048,133 +3048,133 @@ def iscyclic(self, *identity, **filter_kwargs): return axis in self.cyclic() - @classmethod - def concatenate( - cls, fields, axis=0, cull_graph=False, relaxed_units=False, copy=True - ): - """Join a sequence of fields together. - - This is different to `cf.aggregate` because it does not account - for all metadata. For example, it assumes that the axis order is - the same in each field. - - .. versionadded:: 1.0 - - .. seealso:: `cf.aggregate`, `Data.concatenate`, - `Data.cull_graph` - - :Parameters: - - fields: (sequence of) `Field` - The fields to concatenate. - - axis: `int`, optional - The axis along which the arrays will be joined. The - default is 0. Note that scalar arrays are treated as - if they were one dimensional. - - {{cull_graph: `bool`, optional}} - - .. versionadded:: 3.14.0 - - {{relaxed_units: `bool`, optional}} - - .. versionadded:: 3.15.1 - - copy: `bool`, optional - If True (the default) then make copies of the - {{class}} constructs, prior to the concatenation, - thereby ensuring that the input constructs are not - changed by the concatenation process. If False then - some or all input constructs might be changed - in-place, but the concatenation process will be - faster. - - .. versionadded:: 3.15.1 - - :Returns: - - `Field` - The field generated from the concatenation of input - fields. - - """ - if isinstance(fields, cls): - return fields.copy() - - field0 = fields[0] - if copy: - out = field0.copy() - - if len(fields) == 1: - return out - - new_data = Data.concatenate( - [f.get_data(_fill_value=False) for f in fields], - axis=axis, - cull_graph=cull_graph, - relaxed_units=relaxed_units, - copy=copy, - ) - - # Change the domain axis size - dim = out.get_data_axes()[axis] - out.set_construct(DomainAxis(size=new_data.shape[axis]), key=dim) - - # Insert the concatenated data - out.set_data(new_data, set_axes=False, copy=False) - - # ------------------------------------------------------------ - # Concatenate constructs with data - # ------------------------------------------------------------ - for key, construct in field0.constructs.filter_by_data( - todict=True - ).items(): - construct_axes = field0.get_data_axes(key) - - if dim not in construct_axes: - # This construct does not span the concatenating axis - # in the first field - continue - - constructs = [construct] - for f in fields[1:]: - c = f.constructs.get(key) - if c is None: - # This field does not have this construct - constructs = None - break - - constructs.append(c) - - if not constructs: - # Not every field has this construct, so remove it - # from the output field. - out.del_construct(key) - continue - - # Still here? Then try concatenating the constructs from - # each field. - try: - construct = construct.concatenate( - constructs, - axis=construct_axes.index(dim), - cull_graph=cull_graph, - relaxed_units=relaxed_units, - copy=copy, - ) - except ValueError: - # Couldn't concatenate this construct, so remove it from - # the output field. - out.del_construct(key) - else: - # Successfully concatenated this construct, so insert - # it into the output field. - out.set_construct( - construct, key=key, axes=construct_axes, copy=False - ) - - return out +# @classmethod +# def concatenate( +# cls, fields, axis=0, cull_graph=False, relaxed_units=False, copy=True +# ): +# """Join a sequence of fields together. +# +# This is different to `cf.aggregate` because it does not account +# for all metadata. For example, it assumes that the axis order is +# the same in each field. +# +# .. versionadded:: 1.0 +# +# .. seealso:: `cf.aggregate`, `Data.concatenate`, +# `Data.cull_graph` +# +# :Parameters: +# +# fields: (sequence of) `Field` +# The fields to concatenate. +# +# axis: `int`, optional +# The axis along which the arrays will be joined. The +# default is 0. Note that scalar arrays are treated as +# if they were one dimensional. +# +# {{cull_graph: `bool`, optional}} +# +# .. versionadded:: 3.14.0 +# +# {{relaxed_units: `bool`, optional}} +# +# .. versionadded:: 3.15.1 +# +# copy: `bool`, optional +# If True (the default) then make copies of the +# {{class}} constructs, prior to the concatenation, +# thereby ensuring that the input constructs are not +# changed by the concatenation process. If False then +# some or all input constructs might be changed +# in-place, but the concatenation process will be +# faster. +# +# .. versionadded:: 3.15.1 +# +# :Returns: +# +# `Field` +# The field generated from the concatenation of input +# fields. +# +# """ +# if isinstance(fields, cls): +# return fields.copy() +# +# field0 = fields[0] +# if copy: +# out = field0.copy() +# +# if len(fields) == 1: +# return out +# +# new_data = Data.concatenate( +# [f.get_data(_fill_value=False) for f in fields], +# axis=axis, +# cull_graph=cull_graph, +# relaxed_units=relaxed_units, +# copy=copy, +# ) +# +# # Change the domain axis size +# dim = out.get_data_axes()[axis] +# out.set_construct(DomainAxis(size=new_data.shape[axis]), key=dim) +# +# # Insert the concatenated data +# out.set_data(new_data, set_axes=False, copy=False) +# +# # ------------------------------------------------------------ +# # Concatenate constructs with data +# # ------------------------------------------------------------ +# for key, construct in field0.constructs.filter_by_data( +# todict=True +# ).items(): +# construct_axes = field0.get_data_axes(key) +# +# if dim not in construct_axes: +# # This construct does not span the concatenating axis +# # in the first field +# continue +# +# constructs = [construct] +# for f in fields[1:]: +# c = f.constructs.get(key) +# if c is None: +# # This field does not have this construct +# constructs = None +# break +# +# constructs.append(c) +# +# if not constructs: +# # Not every field has this construct, so remove it +# # from the output field. +# out.del_construct(key) +# continue +# +# # Still here? Then try concatenating the constructs from +# # each field. +# try: +# construct = construct.concatenate( +# constructs, +# axis=construct_axes.index(dim), +# cull_graph=cull_graph, +# relaxed_units=relaxed_units, +# copy=copy, +# ) +# except ValueError: +# # Couldn't concatenate this construct, so remove it from +# # the output field. +# out.del_construct(key) +# else: +# # Successfully concatenated this construct, so insert +# # it into the output field. +# out.set_construct( +# construct, key=key, axes=construct_axes, copy=False +# ) +# +# return out def weights( self, @@ -8717,90 +8717,90 @@ def _update_cell_methods( f" Modified cell methods = {self.cell_methods()}" ) # pragma: no cover - @_inplace_enabled(default=False) - def insert_dimension( - self, axis, position=0, constructs=False, inplace=False - ): - """Insert a size 1 axis into the data array. - - .. versionadded:: 3.0.0 - - .. seealso:: `domain_axis`, `flatten`, `flip`, `squeeze`, - `transpose`, `unsqueeze` - - :Parameters: - - axis: - Select the domain axis to insert, generally defined by that - which would be selected by passing the given axis description - to a call of the field construct's `domain_axis` method. For - example, for a value of ``'X'``, the domain axis construct - returned by ``f.domain_axis('X')`` is selected. - - If *axis* is `None` then a new domain axis construct will - created for the inserted dimension. - - position: `int`, optional - Specify the position that the new axis will have in the - data array. By default the new axis has position 0, the - slowest varying position. - - constructs: `bool`, optional - If True then also insert the new axis into all - metadata constructs that don't already include it. By - default, metadata constructs are not changed. - - .. versionadded:: 3.16.1 - - {{inplace: `bool`, optional}} - - :Returns: - - `Field` or `None` - The field construct with expanded data, or `None` if the - operation was in-place. - - **Examples** - - >>> f = cf.example_field(0) - >>> print(f) - Field: specific_humidity (ncvar%q) - ---------------------------------- - Data : specific_humidity(latitude(5), longitude(8)) 1 - Cell methods : area: mean - Dimension coords: latitude(5) = [-75.0, ..., 75.0] degrees_north - : longitude(8) = [22.5, ..., 337.5] degrees_east - : time(1) = [2019-01-01 00:00:00] - >>> g = f.insert_dimension('T', 0) - >>> print(g) - Field: specific_humidity (ncvar%q) - ---------------------------------- - Data : specific_humidity(time(1), latitude(5), longitude(8)) 1 - Cell methods : area: mean - Dimension coords: latitude(5) = [-75.0, ..., 75.0] degrees_north - : longitude(8) = [22.5, ..., 337.5] degrees_east - : time(1) = [2019-01-01 00:00:00] - - A previously non-existent size 1 axis must be created prior to - insertion: - - >>> f.insert_dimension(None, 1, inplace=True) - >>> print(f) - Field: specific_humidity (ncvar%q) - ---------------------------------- - Data : specific_humidity(time(1), key%domainaxis3(1), latitude(5), longitude(8)) 1 - Cell methods : area: mean - Dimension coords: latitude(5) = [-75.0, ..., 75.0] degrees_north - : longitude(8) = [22.5, ..., 337.5] degrees_east - : time(1) = [2019-01-01 00:00:00] - - """ - return super().insert_dimension( - axis=axis, - position=position, - constructs=constructs, - inplace=inplace, - ) +# @_inplace_enabled(default=False) +# def insert_dimension( +# self, axis, position=0, constructs=False, inplace=False +# ): +# """Insert a size 1 axis into the data array. +# +# .. versionadded:: 3.0.0 +# +# .. seealso:: `domain_axis`, `flatten`, `flip`, `squeeze`, +# `transpose`, `unsqueeze` +# +# :Parameters: +# +# axis: +# Select the domain axis to insert, generally defined by that +# which would be selected by passing the given axis description +# to a call of the field construct's `domain_axis` method. For +# example, for a value of ``'X'``, the domain axis construct +# returned by ``f.domain_axis('X')`` is selected. +# +# If *axis* is `None` then a new domain axis construct will +# created for the inserted dimension. +# +# position: `int`, optional +# Specify the position that the new axis will have in the +# data array. By default the new axis has position 0, the +# slowest varying position. +# +# constructs: `bool`, optional +# If True then also insert the new axis into all +# metadata constructs that don't already include it. By +# default, metadata constructs are not changed. +# +# .. versionadded:: 3.16.1 +# +# {{inplace: `bool`, optional}} +# +# :Returns: +# +# `Field` or `None` +# The field construct with expanded data, or `None` if the +# operation was in-place. +# +# **Examples** +# +# >>> f = cf.example_field(0) +# >>> print(f) +# Field: specific_humidity (ncvar%q) +# ---------------------------------- +# Data : specific_humidity(latitude(5), longitude(8)) 1 +# Cell methods : area: mean +# Dimension coords: latitude(5) = [-75.0, ..., 75.0] degrees_north +# : longitude(8) = [22.5, ..., 337.5] degrees_east +# : time(1) = [2019-01-01 00:00:00] +# >>> g = f.insert_dimension('T', 0) +# >>> print(g) +# Field: specific_humidity (ncvar%q) +# ---------------------------------- +# Data : specific_humidity(time(1), latitude(5), longitude(8)) 1 +# Cell methods : area: mean +# Dimension coords: latitude(5) = [-75.0, ..., 75.0] degrees_north +# : longitude(8) = [22.5, ..., 337.5] degrees_east +# : time(1) = [2019-01-01 00:00:00] +# +# A previously non-existent size 1 axis must be created prior to +# insertion: +# +# >>> f.insert_dimension(None, 1, inplace=True) +# >>> print(f) +# Field: specific_humidity (ncvar%q) +# ---------------------------------- +# Data : specific_humidity(time(1), key%domainaxis3(1), latitude(5), longitude(8)) 1 +# Cell methods : area: mean +# Dimension coords: latitude(5) = [-75.0, ..., 75.0] degrees_north +# : longitude(8) = [22.5, ..., 337.5] degrees_east +# : time(1) = [2019-01-01 00:00:00] +# +# """ +# return super().insert_dimension( +# axis=axis, +# position=position, +# constructs=constructs, +# inplace=inplace, +# ) def indices(self, *config, **kwargs): """Create indices that define a subspace of the field construct. @@ -10955,77 +10955,77 @@ def argmin(self, axis=None, unravel=False): return self.data.argmin(axis=axis, unravel=unravel) - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") - def squeeze(self, axes=None, inplace=False, i=False, **kwargs): - """Remove size 1 axes from the data. - - By default all size 1 axes are removed, but particular size 1 axes - may be selected for removal. - - Squeezed domain axis constructs are not removed from the metadata - constructs, nor from the domain of the field construct. - - .. seealso:: `domain_axis`, `flatten`, `insert_dimension`, `flip`, - `remove_axes`, `transpose`, `unsqueeze` - - :Parameters: - - axes: (sequence of) `str` or `int`, optional - Select the domain axes to squeeze, defined by the domain - axes that would be selected by passing each given axis - description to a call of the field construct's - `domain_axis` method. For example, for a value of ``'X'``, - the domain axis construct returned by - ``f.domain_axis('X')`` is selected. - - If no axes are provided then all size 1 axes are squeezed. - - {{inplace: `bool`, optional}} - - {{i: deprecated at version 3.0.0}} - - kwargs: deprecated at version 3.0.0 - - :Returns: - - `Field` or `None` - The field construct with squeezed data, or `None` if the - operation was in-place. - - **Examples** - - >>> g = f.squeeze() - >>> g = f.squeeze('time') - >>> g = f.squeeze(1) - >>> g = f.squeeze(['time', 1, 'dim2']) - >>> f.squeeze(['dim2'], inplace=True) - - """ - if kwargs: - _DEPRECATION_ERROR_KWARGS( - self, "squeeze", kwargs, version="3.0.0", removed_at="4.0.0" - ) # pragma: no cover - - data_axes = self.get_data_axes() - - if axes is None: - domain_axes = self.domain_axes(todict=True) - axes = [ - axis - for axis in data_axes - if domain_axes[axis].get_size(None) == 1 - ] - else: - if isinstance(axes, (str, int)): - axes = (axes,) - - axes = [self.domain_axis(x, key=True) for x in axes] - axes = set(axes).intersection(data_axes) - - iaxes = [data_axes.index(axis) for axis in axes] - - # Squeeze the field's data array - return super().squeeze(iaxes, inplace=inplace) +# @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") +# def squeeze(self, axes=None, inplace=False, i=False, **kwargs): +# """Remove size 1 axes from the data. +# +# By default all size 1 axes are removed, but particular size 1 axes +# may be selected for removal. +# +# Squeezed domain axis constructs are not removed from the metadata +# constructs, nor from the domain of the field construct. +# +# .. seealso:: `domain_axis`, `flatten`, `insert_dimension`, `flip`, +# `remove_axes`, `transpose`, `unsqueeze` +# +# :Parameters: +# +# axes: (sequence of) `str` or `int`, optional +# Select the domain axes to squeeze, defined by the domain +# axes that would be selected by passing each given axis +# description to a call of the field construct's +# `domain_axis` method. For example, for a value of ``'X'``, +# the domain axis construct returned by +# ``f.domain_axis('X')`` is selected. +# +# If no axes are provided then all size 1 axes are squeezed. +# +# {{inplace: `bool`, optional}} +# +# {{i: deprecated at version 3.0.0}} +# +# kwargs: deprecated at version 3.0.0 +# +# :Returns: +# +# `Field` or `None` +# The field construct with squeezed data, or `None` if the +# operation was in-place. +# +# **Examples** +# +# >>> g = f.squeeze() +# >>> g = f.squeeze('time') +# >>> g = f.squeeze(1) +# >>> g = f.squeeze(['time', 1, 'dim2']) +# >>> f.squeeze(['dim2'], inplace=True) +# +# """ +# if kwargs: +# _DEPRECATION_ERROR_KWARGS( +# self, "squeeze", kwargs, version="3.0.0", removed_at="4.0.0" +# ) # pragma: no cover +# +# data_axes = self.get_data_axes() +# +# if axes is None: +# domain_axes = self.domain_axes(todict=True) +# axes = [ +# axis +# for axis in data_axes +# if domain_axes[axis].get_size(None) == 1 +# ] +# else: +# if isinstance(axes, (str, int)): +# axes = (axes,) +# +# axes = [self.domain_axis(x, key=True) for x in axes] +# axes = set(axes).intersection(data_axes) +# +# iaxes = [data_axes.index(axis) for axis in axes] +# +# # Squeeze the field's data array +# return super().squeeze(iaxes, inplace=inplace) @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) @@ -11095,106 +11095,106 @@ def swapaxes(self, axis0, axis1, inplace=False, i=False): return f - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") - def transpose( - self, - axes=None, - constructs=False, - inplace=False, - items=True, - i=False, - **kwargs, - ): - """Permute the axes of the data array. - - By default the order of the axes is reversed, but any ordering may - be specified by selecting the axes of the output in the required - order. - - By default metadata constructs are not transposed, but they may be - if the *constructs* parameter is set. - - .. seealso:: `domain_axis`, `flatten`, `insert_dimension`, `flip`, - `squeeze`, `unsqueeze` - - :Parameters: - - axes: (sequence of) `str` or `int`, optional - Select the domain axis order, defined by the domain axes - that would be selected by passing each given axis - description to a call of the field construct's - `domain_axis` method. For example, for a value of ``'X'``, - the domain axis construct returned by - ``f.domain_axis('X')`` is selected. - - Each dimension of the field construct's data must be - provided, or if no axes are specified then the axis order - is reversed. - - constructs: `bool`, optional - If True then metadata constructs are also transposed so - that their axes are in the same relative order as in the - transposed data array of the field. By default metadata - constructs are not altered. - - {{inplace: `bool`, optional}} - - items: deprecated at version 3.0.0 - Use the *constructs* parameter instead. - - {{i: deprecated at version 3.0.0}} - - kwargs: deprecated at version 3.0.0 - - :Returns: - - `Field` or `None` - The field construct with transposed data, or `None` if the - operation was in-place. - - **Examples** - - >>> f.ndim - 3 - >>> g = f.transpose() - >>> g = f.transpose(['time', 1, 'dim2']) - >>> f.transpose(['time', -2, 'dim2'], inplace=True) - - """ - if not items: - _DEPRECATION_ERROR_KWARGS( - self, - "transpose", - {"items": items}, - "Use keyword 'constructs' instead.", - version="3.0.0", - removed_at="4.0.0", - ) # pragma: no cover - - if kwargs: - _DEPRECATION_ERROR_KWARGS( - self, "transpose", kwargs, version="3.0.0", removed_at="4.0.0" - ) # pragma: no cover - - if axes is None: - iaxes = list(range(self.ndim - 1, -1, -1)) - else: - data_axes = self.get_data_axes(default=()) - if isinstance(axes, (str, int)): - axes = (axes,) - - axes2 = [self.domain_axis(x, key=True) for x in axes] - - if sorted(axes2) != sorted(data_axes): - raise ValueError( - f"Can't transpose {self.__class__.__name__}: " - f"Bad axis specification: {axes!r}" - ) - - iaxes = [data_axes.index(axis) for axis in axes2] - - # Transpose the field's data array - return super().transpose(iaxes, constructs=constructs, inplace=inplace) +# @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") +# def transpose( +# self, +# axes=None, +# constructs=False, +# inplace=False, +# items=True, +# i=False, +# **kwargs, +# ): +# """Permute the axes of the data array. +# +# By default the order of the axes is reversed, but any ordering may +# be specified by selecting the axes of the output in the required +# order. +# +# By default metadata constructs are not transposed, but they may be +# if the *constructs* parameter is set. +# +# .. seealso:: `domain_axis`, `flatten`, `insert_dimension`, `flip`, +# `squeeze`, `unsqueeze` +# +# :Parameters: +# +# axes: (sequence of) `str` or `int`, optional +# Select the domain axis order, defined by the domain axes +# that would be selected by passing each given axis +# description to a call of the field construct's +# `domain_axis` method. For example, for a value of ``'X'``, +# the domain axis construct returned by +# ``f.domain_axis('X')`` is selected. +# +# Each dimension of the field construct's data must be +# provided, or if no axes are specified then the axis order +# is reversed. +# +# constructs: `bool`, optional +# If True then metadata constructs are also transposed so +# that their axes are in the same relative order as in the +# transposed data array of the field. By default metadata +# constructs are not altered. +# +# {{inplace: `bool`, optional}} +# +# items: deprecated at version 3.0.0 +# Use the *constructs* parameter instead. +# +# {{i: deprecated at version 3.0.0}} +# +# kwargs: deprecated at version 3.0.0 +# +# :Returns: +# +# `Field` or `None` +# The field construct with transposed data, or `None` if the +# operation was in-place. +# +# **Examples** +# +# >>> f.ndim +# 3 +# >>> g = f.transpose() +# >>> g = f.transpose(['time', 1, 'dim2']) +# >>> f.transpose(['time', -2, 'dim2'], inplace=True) +# +# """ +# if not items: +# _DEPRECATION_ERROR_KWARGS( +# self, +# "transpose", +# {"items": items}, +# "Use keyword 'constructs' instead.", +# version="3.0.0", +# removed_at="4.0.0", +# ) # pragma: no cover +# +# if kwargs: +# _DEPRECATION_ERROR_KWARGS( +# self, "transpose", kwargs, version="3.0.0", removed_at="4.0.0" +# ) # pragma: no cover +# +# if axes is None: +# iaxes = list(range(self.ndim - 1, -1, -1)) +# else: +# data_axes = self.get_data_axes(default=()) +# if isinstance(axes, (str, int)): +# axes = (axes,) +# +# axes2 = [self.domain_axis(x, key=True) for x in axes] +# +# if sorted(axes2) != sorted(data_axes): +# raise ValueError( +# f"Can't transpose {self.__class__.__name__}: " +# f"Bad axis specification: {axes!r}" +# ) +# +# iaxes = [data_axes.index(axis) for axis in axes2] +# +# # Transpose the field's data array +# return super().transpose(iaxes, constructs=constructs, inplace=inplace) @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) diff --git a/cf/mixin/propertiesdata.py b/cf/mixin/propertiesdata.py index 1512d6be49..1205844e85 100644 --- a/cf/mixin/propertiesdata.py +++ b/cf/mixin/propertiesdata.py @@ -2594,66 +2594,66 @@ def close(self): removed_at="5.0.0", ) # pragma: no cover - @classmethod - def concatenate( - cls, - variables, - axis=0, - cull_graph=False, - relaxed_units=False, - copy=True, - ): - """Join a sequence of variables together. - - .. seealso:: `Data.cull_graph` - - :Parameters: - - variables: sequence of constructs. - - axis: `int`, optional - - {{cull_graph: `bool`, optional}} - - .. versionadded:: 3.14.0 - - {{relaxed_units: `bool`, optional}} - - .. versionadded:: 3.15.1 - - copy: `bool`, optional - If True (the default) then make copies of the - {{class}} constructs, prior to the concatenation, - thereby ensuring that the input constructs are not - changed by the concatenation process. If False then - some or all input constructs might be changed - in-place, but the concatenation process will be - faster. - - .. versionadded:: 3.15.1 - - :Returns: - - TODO - - """ - out = variables[0] - if copy: - out = out.copy() - - if len(variables) == 1: - return out - - data = Data.concatenate( - [v.get_data(_fill_value=False) for v in variables], - axis=axis, - cull_graph=cull_graph, - relaxed_units=relaxed_units, - copy=copy, - ) - out.set_data(data, copy=False) - - return out +# @classmethod +# def concatenate( +# cls, +# variables, +# axis=0, +# cull_graph=False, +# relaxed_units=False, +# copy=True, +# ): +# """Join a sequence of variables together. +# +# .. seealso:: `Data.cull_graph` +# +# :Parameters: +# +# variables: sequence of constructs. +# +# axis: `int`, optional +# +# {{cull_graph: `bool`, optional}} +# +# .. versionadded:: 3.14.0 +# +# {{relaxed_units: `bool`, optional}} +# +# .. versionadded:: 3.15.1 +# +# copy: `bool`, optional +# If True (the default) then make copies of the +# {{class}} constructs, prior to the concatenation, +# thereby ensuring that the input constructs are not +# changed by the concatenation process. If False then +# some or all input constructs might be changed +# in-place, but the concatenation process will be +# faster. +# +# .. versionadded:: 3.15.1 +# +# :Returns: +# +# TODO +# +# """ +# out = variables[0] +# if copy: +# out = out.copy() +# +# if len(variables) == 1: +# return out +# +# data = Data.concatenate( +# [v.get_data(_fill_value=False) for v in variables], +# axis=axis, +# cull_graph=cull_graph, +# relaxed_units=relaxed_units, +# copy=copy, +# ) +# out.set_data(data, copy=False) +# +# return out @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index fe3d5f830b..df4e2f4bf4 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -1313,86 +1313,86 @@ def close(self): removed_at="5.0.0", ) # pragma: no cover - @classmethod - def concatenate( - cls, - variables, - axis=0, - cull_graph=False, - relaxed_units=False, - copy=True, - ): - """Join a sequence of variables together. - - .. seealso:: `Data.cull_graph` - - :Parameters: - - variables: sequence of constructs - - axis: `int`, optional - - {{cull_graph: `bool`, optional}} - - .. versionadded:: 3.14.0 - - {{relaxed_units: `bool`, optional}} - - .. versionadded:: 3.15.1 - - copy: `bool`, optional - If True (the default) then make copies of the - {{class}} objects, prior to the concatenation, thereby - ensuring that the input constructs are not changed by - the concatenation process. If False then some or all - input constructs might be changed in-place, but the - concatenation process will be faster. - - .. versionadded:: 3.15.1 - - :Returns: - - TODO - - """ - variable0 = variables[0] - if copy: - variable0 = variable0.copy() - - if len(variables) == 1: - return variable0 - - out = super().concatenate( - variables, - axis=axis, - cull_graph=cull_graph, - relaxed_units=relaxed_units, - copy=copy, - ) - - bounds = variable0.get_bounds(None) - if bounds is not None: - bounds = bounds.concatenate( - [v.get_bounds() for v in variables], - axis=axis, - cull_graph=cull_graph, - relaxed_units=relaxed_units, - copy=copy, - ) - out.set_bounds(bounds, copy=False) - - interior_ring = variable0.get_interior_ring(None) - if interior_ring is not None: - interior_ring = interior_ring.concatenate( - [v.get_interior_ring() for v in variables], - axis=axis, - cull_graph=cull_graph, - relaxed_units=relaxed_units, - copy=copy, - ) - out.set_interior_ring(interior_ring, copy=False) - - return out +# @classmethod +# def concatenate( +# cls, +# variables, +# axis=0, +# cull_graph=False, +# relaxed_units=False, +# copy=True, +# ): +# """Join a sequence of variables together. +# +# .. seealso:: `Data.cull_graph` +# +# :Parameters: +# +# variables: sequence of constructs +# +# axis: `int`, optional +# +# {{cull_graph: `bool`, optional}} +# +# .. versionadded:: 3.14.0 +# +# {{relaxed_units: `bool`, optional}} +# +# .. versionadded:: 3.15.1 +# +# copy: `bool`, optional +# If True (the default) then make copies of the +# {{class}} objects, prior to the concatenation, thereby +# ensuring that the input constructs are not changed by +# the concatenation process. If False then some or all +# input constructs might be changed in-place, but the +# concatenation process will be faster. +# +# .. versionadded:: 3.15.1 +# +# :Returns: +# +# TODO +# +# """ +# variable0 = variables[0] +# if copy: +# variable0 = variable0.copy() +# +# if len(variables) == 1: +# return variable0 +# +# out = super().concatenate( +# variables, +# axis=axis, +# cull_graph=cull_graph, +# relaxed_units=relaxed_units, +# copy=copy, +# ) +# +# bounds = variable0.get_bounds(None) +# if bounds is not None: +# bounds = bounds.concatenate( +# [v.get_bounds() for v in variables], +# axis=axis, +# cull_graph=cull_graph, +# relaxed_units=relaxed_units, +# copy=copy, +# ) +# out.set_bounds(bounds, copy=False) +# +# interior_ring = variable0.get_interior_ring(None) +# if interior_ring is not None: +# interior_ring = interior_ring.concatenate( +# [v.get_interior_ring() for v in variables], +# axis=axis, +# cull_graph=cull_graph, +# relaxed_units=relaxed_units, +# copy=copy, +# ) +# out.set_interior_ring(interior_ring, copy=False) +# +# return out @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) diff --git a/cf/test/create_test_files.py b/cf/test/create_test_files.py index e7ee44d2f9..d56f15d76b 100644 --- a/cf/test/create_test_files.py +++ b/cf/test/create_test_files.py @@ -9,9 +9,9 @@ import netCDF4 -import cf +import cfdm -VN = cf.CF() +VN = cfdm.CF() # Load large arrays filename = os.path.join( @@ -2240,13 +2240,13 @@ def _make_aggregation_value(filename): n.createDimension("level", 1) n.createDimension("latitude", 73) n.createDimension("longitude", 144) - n.createDimension("f_time", 2) - n.createDimension("f_level", 1) - n.createDimension("f_latitude", 1) - n.createDimension("f_longitude", 1) - n.createDimension("i", 2) - n.createDimension("j", 4) - n.createDimension("j_uid", 1) + n.createDimension("a_time", 2) + n.createDimension("a_level", 1) + n.createDimension("a_latitude", 1) + n.createDimension("a_longitude", 1) + n.createDimension("a_map_i2", 2) + n.createDimension("a_map_j4", 4) + n.createDimension("a_map_j1", 1) temperature = n.createVariable("temperature", "f8", ()) temperature.standard_name = "air_temperature" @@ -2254,7 +2254,7 @@ def _make_aggregation_value(filename): temperature.cell_methods = "time: mean" temperature.ancillary_variables = "uid" temperature.aggregated_dimensions = "time level latitude longitude" - temperature.aggregated_data = "location: fragment_location identifier: fragment_identifier map: fragment_map" + temperature.aggregated_data = "location: fragment_location variable: fragment_variable map: fragment_map" uid = n.createVariable("uid", str, ()) uid.long_name = "Fragment dataset unique identifiers" @@ -2284,26 +2284,28 @@ def _make_aggregation_value(filename): fragment_location = n.createVariable( "fragment_location", str, - ("f_time", "f_level", "f_latitude", "f_longitude"), + ("a_time", "a_level", "a_latitude", "a_longitude"), ) fragment_location[0, 0, 0, 0] = "January-March.nc" fragment_location[1, 0, 0, 0] = "April-December.nc" - fragment_identifier = n.createVariable("fragment_identifier", str, ()) - fragment_identifier[...] = "temperature" + fragment_variable = n.createVariable("fragment_variable", str, ()) + fragment_variable[...] = "temperature" - fragment_map = n.createVariable("fragment_map", "i4", ("j", "i")) + fragment_map = n.createVariable( + "fragment_map", "i4", ("a_map_j4", "a_map_i2") + ) fragment_map[...] = [[3, 9], [1, -1], [73, -1], [144, -1]] fragment_map[1:, 1] = np.ma.masked fragment_value_uid = n.createVariable( - "fragment_value_uid", str, ("f_time",) + "fragment_value_uid", str, ("a_time",) ) fragment_value_uid[0] = "04b9-7eb5-4046-97b-0bf8" fragment_value_uid[1] = "05ee0-a183-43b3-a67-1eca" fragment_map_uid = n.createVariable( - "fragment_map_uid", "i4", ("j_uid", "i") + "fragment_map_uid", "i4", ("a_map_j1", "a_map_i2") ) fragment_map_uid[...] = [3, 9] @@ -2345,6 +2347,6 @@ def _make_aggregation_value(filename): if __name__ == "__main__": print("Run date:", datetime.datetime.now()) - cf.environment() + cfdm.environment() print() unittest.main(verbosity=2) diff --git a/cf/test/test_CFA.py b/cf/test/test_CFA.py index 9ff1a61590..c06fe304c5 100644 --- a/cf/test/test_CFA.py +++ b/cf/test/test_CFA.py @@ -10,9 +10,8 @@ faulthandler.enable() # to debug seg faults and timeouts -from cfdm.read_write.netcdf.netcdfwrite import AggregationError - import cf +from cfdm.read_write.netcdf.netcdfwrite import AggregationError n_tmpfiles = 5 tmpfiles = [ @@ -93,7 +92,6 @@ def test_CFA_strict(self): """Test 'strict' option to the cf.write 'cfa' keyword.""" f = cf.example_field(0) - cfa_file = "cfa_file.nc" # By default, can't write in-memory arrays as aggregation # variables with self.assertRaises(AggregationError): @@ -350,6 +348,32 @@ def test_CFA_cfa(self): with self.assertRaises(ValueError): cf.write(g, cfa_file, cfa=cfa) + def test_CFA_subspace(self): + """Test the writing subspaces of aggregations.""" + f = cf.example_field(0) + + cf.write(f[:2], tmpfile1) + cf.write(f[2:], tmpfile2) + + a = cf.read(tmpfile1, cfa_write="field")[0] + b = cf.read(tmpfile2, cfa_write="field")[0] + c = cf.Field.concatenate([a, b], axis=0) + + cf.write(c, cfa_file, cfa="field") + + f = cf.read(cfa_file, cfa_write="field")[0] + cf.write(f[:2], cfa_file2, cfa="field") + g = cf.read(cfa_file2)[0] + self.assertTrue(g.equals(a)) + + cf.write(f[2:], cfa_file2, cfa="field") + g = cf.read(cfa_file2)[0] + self.assertTrue(g.equals(b)) + + # Can't straddle Dask chunks + with self.assertRaises(AggregationError): + cf.write(f[1:3], cfa_file2, cfa="field") + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) diff --git a/cf/test/test_functions.py b/cf/test/test_functions.py index 431ab4ad14..fa7dc24458 100644 --- a/cf/test/test_functions.py +++ b/cf/test/test_functions.py @@ -32,9 +32,9 @@ def test_keyword_deprecation(self): # Use as test case 'i' kwarg, the deprecated old name for # 'inplace': f = cf.example_field(0) - f.squeeze(inplace=True) # new way to specify operation tested below + f.flip(inplace=True) # new way to specify operation tested below with self.assertRaises(cf.functions.DeprecationError): - f.squeeze(i=True) + f.flip(i=True) def test_aliases(self): self.assertEqual(cf.log_level(), cf.LOG_LEVEL()) From 2b6e171f72085c68a5af6cdaa38bf1183dc43a57 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Sat, 30 Nov 2024 15:05:38 +0000 Subject: [PATCH 29/51] dev --- cf/field.py | 446 ------------------------------- cf/mixin/propertiesdata.py | 63 +---- cf/mixin/propertiesdatabounds.py | 205 +------------- cf/read_write/read.py | 61 ++--- cf/read_write/um/umread.py | 44 ++- cf/test/create_test_files.py | 3 +- cf/test/test_CFA.py | 3 +- 7 files changed, 65 insertions(+), 760 deletions(-) diff --git a/cf/field.py b/cf/field.py index ea63395c55..5ab0572cbc 100644 --- a/cf/field.py +++ b/cf/field.py @@ -3048,134 +3048,6 @@ def iscyclic(self, *identity, **filter_kwargs): return axis in self.cyclic() -# @classmethod -# def concatenate( -# cls, fields, axis=0, cull_graph=False, relaxed_units=False, copy=True -# ): -# """Join a sequence of fields together. -# -# This is different to `cf.aggregate` because it does not account -# for all metadata. For example, it assumes that the axis order is -# the same in each field. -# -# .. versionadded:: 1.0 -# -# .. seealso:: `cf.aggregate`, `Data.concatenate`, -# `Data.cull_graph` -# -# :Parameters: -# -# fields: (sequence of) `Field` -# The fields to concatenate. -# -# axis: `int`, optional -# The axis along which the arrays will be joined. The -# default is 0. Note that scalar arrays are treated as -# if they were one dimensional. -# -# {{cull_graph: `bool`, optional}} -# -# .. versionadded:: 3.14.0 -# -# {{relaxed_units: `bool`, optional}} -# -# .. versionadded:: 3.15.1 -# -# copy: `bool`, optional -# If True (the default) then make copies of the -# {{class}} constructs, prior to the concatenation, -# thereby ensuring that the input constructs are not -# changed by the concatenation process. If False then -# some or all input constructs might be changed -# in-place, but the concatenation process will be -# faster. -# -# .. versionadded:: 3.15.1 -# -# :Returns: -# -# `Field` -# The field generated from the concatenation of input -# fields. -# -# """ -# if isinstance(fields, cls): -# return fields.copy() -# -# field0 = fields[0] -# if copy: -# out = field0.copy() -# -# if len(fields) == 1: -# return out -# -# new_data = Data.concatenate( -# [f.get_data(_fill_value=False) for f in fields], -# axis=axis, -# cull_graph=cull_graph, -# relaxed_units=relaxed_units, -# copy=copy, -# ) -# -# # Change the domain axis size -# dim = out.get_data_axes()[axis] -# out.set_construct(DomainAxis(size=new_data.shape[axis]), key=dim) -# -# # Insert the concatenated data -# out.set_data(new_data, set_axes=False, copy=False) -# -# # ------------------------------------------------------------ -# # Concatenate constructs with data -# # ------------------------------------------------------------ -# for key, construct in field0.constructs.filter_by_data( -# todict=True -# ).items(): -# construct_axes = field0.get_data_axes(key) -# -# if dim not in construct_axes: -# # This construct does not span the concatenating axis -# # in the first field -# continue -# -# constructs = [construct] -# for f in fields[1:]: -# c = f.constructs.get(key) -# if c is None: -# # This field does not have this construct -# constructs = None -# break -# -# constructs.append(c) -# -# if not constructs: -# # Not every field has this construct, so remove it -# # from the output field. -# out.del_construct(key) -# continue -# -# # Still here? Then try concatenating the constructs from -# # each field. -# try: -# construct = construct.concatenate( -# constructs, -# axis=construct_axes.index(dim), -# cull_graph=cull_graph, -# relaxed_units=relaxed_units, -# copy=copy, -# ) -# except ValueError: -# # Couldn't concatenate this construct, so remove it from -# # the output field. -# out.del_construct(key) -# else: -# # Successfully concatenated this construct, so insert -# # it into the output field. -# out.set_construct( -# construct, key=key, axes=construct_axes, copy=False -# ) -# -# return out - def weights( self, weights=True, @@ -8717,91 +8589,6 @@ def _update_cell_methods( f" Modified cell methods = {self.cell_methods()}" ) # pragma: no cover -# @_inplace_enabled(default=False) -# def insert_dimension( -# self, axis, position=0, constructs=False, inplace=False -# ): -# """Insert a size 1 axis into the data array. -# -# .. versionadded:: 3.0.0 -# -# .. seealso:: `domain_axis`, `flatten`, `flip`, `squeeze`, -# `transpose`, `unsqueeze` -# -# :Parameters: -# -# axis: -# Select the domain axis to insert, generally defined by that -# which would be selected by passing the given axis description -# to a call of the field construct's `domain_axis` method. For -# example, for a value of ``'X'``, the domain axis construct -# returned by ``f.domain_axis('X')`` is selected. -# -# If *axis* is `None` then a new domain axis construct will -# created for the inserted dimension. -# -# position: `int`, optional -# Specify the position that the new axis will have in the -# data array. By default the new axis has position 0, the -# slowest varying position. -# -# constructs: `bool`, optional -# If True then also insert the new axis into all -# metadata constructs that don't already include it. By -# default, metadata constructs are not changed. -# -# .. versionadded:: 3.16.1 -# -# {{inplace: `bool`, optional}} -# -# :Returns: -# -# `Field` or `None` -# The field construct with expanded data, or `None` if the -# operation was in-place. -# -# **Examples** -# -# >>> f = cf.example_field(0) -# >>> print(f) -# Field: specific_humidity (ncvar%q) -# ---------------------------------- -# Data : specific_humidity(latitude(5), longitude(8)) 1 -# Cell methods : area: mean -# Dimension coords: latitude(5) = [-75.0, ..., 75.0] degrees_north -# : longitude(8) = [22.5, ..., 337.5] degrees_east -# : time(1) = [2019-01-01 00:00:00] -# >>> g = f.insert_dimension('T', 0) -# >>> print(g) -# Field: specific_humidity (ncvar%q) -# ---------------------------------- -# Data : specific_humidity(time(1), latitude(5), longitude(8)) 1 -# Cell methods : area: mean -# Dimension coords: latitude(5) = [-75.0, ..., 75.0] degrees_north -# : longitude(8) = [22.5, ..., 337.5] degrees_east -# : time(1) = [2019-01-01 00:00:00] -# -# A previously non-existent size 1 axis must be created prior to -# insertion: -# -# >>> f.insert_dimension(None, 1, inplace=True) -# >>> print(f) -# Field: specific_humidity (ncvar%q) -# ---------------------------------- -# Data : specific_humidity(time(1), key%domainaxis3(1), latitude(5), longitude(8)) 1 -# Cell methods : area: mean -# Dimension coords: latitude(5) = [-75.0, ..., 75.0] degrees_north -# : longitude(8) = [22.5, ..., 337.5] degrees_east -# : time(1) = [2019-01-01 00:00:00] -# -# """ -# return super().insert_dimension( -# axis=axis, -# position=position, -# constructs=constructs, -# inplace=inplace, -# ) - def indices(self, *config, **kwargs): """Create indices that define a subspace of the field construct. @@ -10955,78 +10742,6 @@ def argmin(self, axis=None, unravel=False): return self.data.argmin(axis=axis, unravel=unravel) -# @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") -# def squeeze(self, axes=None, inplace=False, i=False, **kwargs): -# """Remove size 1 axes from the data. -# -# By default all size 1 axes are removed, but particular size 1 axes -# may be selected for removal. -# -# Squeezed domain axis constructs are not removed from the metadata -# constructs, nor from the domain of the field construct. -# -# .. seealso:: `domain_axis`, `flatten`, `insert_dimension`, `flip`, -# `remove_axes`, `transpose`, `unsqueeze` -# -# :Parameters: -# -# axes: (sequence of) `str` or `int`, optional -# Select the domain axes to squeeze, defined by the domain -# axes that would be selected by passing each given axis -# description to a call of the field construct's -# `domain_axis` method. For example, for a value of ``'X'``, -# the domain axis construct returned by -# ``f.domain_axis('X')`` is selected. -# -# If no axes are provided then all size 1 axes are squeezed. -# -# {{inplace: `bool`, optional}} -# -# {{i: deprecated at version 3.0.0}} -# -# kwargs: deprecated at version 3.0.0 -# -# :Returns: -# -# `Field` or `None` -# The field construct with squeezed data, or `None` if the -# operation was in-place. -# -# **Examples** -# -# >>> g = f.squeeze() -# >>> g = f.squeeze('time') -# >>> g = f.squeeze(1) -# >>> g = f.squeeze(['time', 1, 'dim2']) -# >>> f.squeeze(['dim2'], inplace=True) -# -# """ -# if kwargs: -# _DEPRECATION_ERROR_KWARGS( -# self, "squeeze", kwargs, version="3.0.0", removed_at="4.0.0" -# ) # pragma: no cover -# -# data_axes = self.get_data_axes() -# -# if axes is None: -# domain_axes = self.domain_axes(todict=True) -# axes = [ -# axis -# for axis in data_axes -# if domain_axes[axis].get_size(None) == 1 -# ] -# else: -# if isinstance(axes, (str, int)): -# axes = (axes,) -# -# axes = [self.domain_axis(x, key=True) for x in axes] -# axes = set(axes).intersection(data_axes) -# -# iaxes = [data_axes.index(axis) for axis in axes] -# -# # Squeeze the field's data array -# return super().squeeze(iaxes, inplace=inplace) - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) def swapaxes(self, axis0, axis1, inplace=False, i=False): @@ -11095,167 +10810,6 @@ def swapaxes(self, axis0, axis1, inplace=False, i=False): return f -# @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") -# def transpose( -# self, -# axes=None, -# constructs=False, -# inplace=False, -# items=True, -# i=False, -# **kwargs, -# ): -# """Permute the axes of the data array. -# -# By default the order of the axes is reversed, but any ordering may -# be specified by selecting the axes of the output in the required -# order. -# -# By default metadata constructs are not transposed, but they may be -# if the *constructs* parameter is set. -# -# .. seealso:: `domain_axis`, `flatten`, `insert_dimension`, `flip`, -# `squeeze`, `unsqueeze` -# -# :Parameters: -# -# axes: (sequence of) `str` or `int`, optional -# Select the domain axis order, defined by the domain axes -# that would be selected by passing each given axis -# description to a call of the field construct's -# `domain_axis` method. For example, for a value of ``'X'``, -# the domain axis construct returned by -# ``f.domain_axis('X')`` is selected. -# -# Each dimension of the field construct's data must be -# provided, or if no axes are specified then the axis order -# is reversed. -# -# constructs: `bool`, optional -# If True then metadata constructs are also transposed so -# that their axes are in the same relative order as in the -# transposed data array of the field. By default metadata -# constructs are not altered. -# -# {{inplace: `bool`, optional}} -# -# items: deprecated at version 3.0.0 -# Use the *constructs* parameter instead. -# -# {{i: deprecated at version 3.0.0}} -# -# kwargs: deprecated at version 3.0.0 -# -# :Returns: -# -# `Field` or `None` -# The field construct with transposed data, or `None` if the -# operation was in-place. -# -# **Examples** -# -# >>> f.ndim -# 3 -# >>> g = f.transpose() -# >>> g = f.transpose(['time', 1, 'dim2']) -# >>> f.transpose(['time', -2, 'dim2'], inplace=True) -# -# """ -# if not items: -# _DEPRECATION_ERROR_KWARGS( -# self, -# "transpose", -# {"items": items}, -# "Use keyword 'constructs' instead.", -# version="3.0.0", -# removed_at="4.0.0", -# ) # pragma: no cover -# -# if kwargs: -# _DEPRECATION_ERROR_KWARGS( -# self, "transpose", kwargs, version="3.0.0", removed_at="4.0.0" -# ) # pragma: no cover -# -# if axes is None: -# iaxes = list(range(self.ndim - 1, -1, -1)) -# else: -# data_axes = self.get_data_axes(default=()) -# if isinstance(axes, (str, int)): -# axes = (axes,) -# -# axes2 = [self.domain_axis(x, key=True) for x in axes] -# -# if sorted(axes2) != sorted(data_axes): -# raise ValueError( -# f"Can't transpose {self.__class__.__name__}: " -# f"Bad axis specification: {axes!r}" -# ) -# -# iaxes = [data_axes.index(axis) for axis in axes2] -# -# # Transpose the field's data array -# return super().transpose(iaxes, constructs=constructs, inplace=inplace) - - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") - @_inplace_enabled(default=False) - def unsqueeze(self, inplace=False, i=False, axes=None, **kwargs): - """Insert size 1 axes into the data array. - - All size 1 domain axes which are not spanned by the field - construct's data are inserted. - - The axes are inserted into the slowest varying data array positions. - - .. seealso:: `flatten`, `flip`, `insert_dimension`, `squeeze`, - `transpose` - - :Parameters: - - {{inplace: `bool`, optional}} - - {{i: deprecated at version 3.0.0}} - - axes: deprecated at version 3.0.0 - - kwargs: deprecated at version 3.0.0 - - :Returns: - - `Field` or `None` - The field construct with size-1 axes inserted in its data, - or `None` if the operation was in-place. - - **Examples** - - >>> g = f.unsqueeze() - >>> f.unsqueeze(['dim2'], inplace=True) - - """ - if kwargs: - _DEPRECATION_ERROR_KWARGS( - self, "unsqueeze", kwargs, version="3.0.0", removed_at="4.0.0" - ) # pragma: no cover - - if axes is not None: - _DEPRECATION_ERROR_KWARGS( - self, - "unsqueeze", - {"axes": axes}, - "All size one domain axes missing from the data are " - "inserted. Use method 'insert_dimension' to insert an " - "individual size one domain axis.", - version="3.0.0", - removed_at="4.0.0", - ) # pragma: no cover - - f = _inplace_enabled_define_and_cleanup(self) - - size_1_axes = self.domain_axes(filter_by_size=(1,), todict=True) - for axis in set(size_1_axes).difference(self.get_data_axes()): - f.insert_dimension(axis, position=0, inplace=True) - - return f - def domain_axis_position(self, *identity, **filter_kwargs): """Return the position in the data of a domain axis construct. diff --git a/cf/mixin/propertiesdata.py b/cf/mixin/propertiesdata.py index 1205844e85..5147ba53fe 100644 --- a/cf/mixin/propertiesdata.py +++ b/cf/mixin/propertiesdata.py @@ -2594,67 +2594,6 @@ def close(self): removed_at="5.0.0", ) # pragma: no cover -# @classmethod -# def concatenate( -# cls, -# variables, -# axis=0, -# cull_graph=False, -# relaxed_units=False, -# copy=True, -# ): -# """Join a sequence of variables together. -# -# .. seealso:: `Data.cull_graph` -# -# :Parameters: -# -# variables: sequence of constructs. -# -# axis: `int`, optional -# -# {{cull_graph: `bool`, optional}} -# -# .. versionadded:: 3.14.0 -# -# {{relaxed_units: `bool`, optional}} -# -# .. versionadded:: 3.15.1 -# -# copy: `bool`, optional -# If True (the default) then make copies of the -# {{class}} constructs, prior to the concatenation, -# thereby ensuring that the input constructs are not -# changed by the concatenation process. If False then -# some or all input constructs might be changed -# in-place, but the concatenation process will be -# faster. -# -# .. versionadded:: 3.15.1 -# -# :Returns: -# -# TODO -# -# """ -# out = variables[0] -# if copy: -# out = out.copy() -# -# if len(variables) == 1: -# return out -# -# data = Data.concatenate( -# [v.get_data(_fill_value=False) for v in variables], -# axis=axis, -# cull_graph=cull_graph, -# relaxed_units=relaxed_units, -# copy=copy, -# ) -# out.set_data(data, copy=False) -# -# return out - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) def cos(self, inplace=False, i=False): @@ -3698,7 +3637,7 @@ def flip(self, axes=None, inplace=False, i=False): """Flip (reverse the direction of) data dimensions. .. seealso:: `flatten`, `insert_dimension`, `squeeze`, - `transpose`, `unsqueeze` + `transpose` :Parameters: diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index df4e2f4bf4..eb02a9decc 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -1313,87 +1313,6 @@ def close(self): removed_at="5.0.0", ) # pragma: no cover -# @classmethod -# def concatenate( -# cls, -# variables, -# axis=0, -# cull_graph=False, -# relaxed_units=False, -# copy=True, -# ): -# """Join a sequence of variables together. -# -# .. seealso:: `Data.cull_graph` -# -# :Parameters: -# -# variables: sequence of constructs -# -# axis: `int`, optional -# -# {{cull_graph: `bool`, optional}} -# -# .. versionadded:: 3.14.0 -# -# {{relaxed_units: `bool`, optional}} -# -# .. versionadded:: 3.15.1 -# -# copy: `bool`, optional -# If True (the default) then make copies of the -# {{class}} objects, prior to the concatenation, thereby -# ensuring that the input constructs are not changed by -# the concatenation process. If False then some or all -# input constructs might be changed in-place, but the -# concatenation process will be faster. -# -# .. versionadded:: 3.15.1 -# -# :Returns: -# -# TODO -# -# """ -# variable0 = variables[0] -# if copy: -# variable0 = variable0.copy() -# -# if len(variables) == 1: -# return variable0 -# -# out = super().concatenate( -# variables, -# axis=axis, -# cull_graph=cull_graph, -# relaxed_units=relaxed_units, -# copy=copy, -# ) -# -# bounds = variable0.get_bounds(None) -# if bounds is not None: -# bounds = bounds.concatenate( -# [v.get_bounds() for v in variables], -# axis=axis, -# cull_graph=cull_graph, -# relaxed_units=relaxed_units, -# copy=copy, -# ) -# out.set_bounds(bounds, copy=False) -# -# interior_ring = variable0.get_interior_ring(None) -# if interior_ring is not None: -# interior_ring = interior_ring.concatenate( -# [v.get_interior_ring() for v in variables], -# axis=axis, -# cull_graph=cull_graph, -# relaxed_units=relaxed_units, -# copy=copy, -# ) -# out.set_interior_ring(interior_ring, copy=False) -# -# return out - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) def cos(self, bounds=True, inplace=False, i=False): @@ -2508,8 +2427,7 @@ def halo( def flip(self, axes=None, inplace=False, i=False): """Flip (reverse the direction of) data dimensions. - .. seealso:: `insert_dimension`, `squeeze`, `transpose`, - `unsqueeze` + .. seealso:: `insert_dimension`, `squeeze`, `transpose` :Parameters: @@ -3388,62 +3306,6 @@ def log(self, base=None, bounds=True, inplace=False, i=False): i=i, ) - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") - def squeeze(self, axes=None, inplace=False, i=False): - """Remove size one axes from the data array. - - By default all size one axes are removed, but particular size one - axes may be selected for removal. Corresponding axes are also - removed from the bounds data array, if present. - - .. seealso:: `flip`, `insert_dimension`, `transpose` - - :Parameters: - - axes: (sequence of) `int` - The positions of the size one axes to be removed. By - default all size one axes are removed. Each axis is - identified by its original integer position. Negative - integers counting from the last position are allowed. - - *Parameter example:* - ``axes=0`` - - *Parameter example:* - ``axes=-2`` - - *Parameter example:* - ``axes=[2, 0]`` - - {{inplace: `bool`, optional}} - - {{i: deprecated at version 3.0.0}} - - :Returns: - - `{{class}}` or `None` - The new construct with removed data axes. If the operation - was in-place then `None` is returned. - - **Examples** - - >>> f.shape - (1, 73, 1, 96) - >>> f.squeeze().shape - (73, 96) - >>> f.squeeze(0).shape - (73, 1, 96) - >>> g = f.squeeze([-3, 2]) - >>> g.shape - (73, 96) - >>> f.bounds.shape - (1, 73, 1, 96, 4) - >>> g.shape - (73, 96, 4) - - """ - return super().squeeze(axes=axes, inplace=inplace) - @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) def trunc(self, bounds=True, inplace=False, i=False): @@ -3492,68 +3354,6 @@ def trunc(self, bounds=True, inplace=False, i=False): i=i, ) - # def identities(self, generator=False): - # """Return all possible identities. - # - # The identities comprise: - # - # * The "standard_name" property. - # * The "id" attribute, preceded by ``'id%'``. - # * The "cf_role" property, preceded by ``'cf_role='``. - # * The "axis" property, preceded by ``'axis='``. - # * The "long_name" property, preceded by ``'long_name='``. - # * All other properties (including "standard_name"), preceded by - # the property name and an ``'='``. - # * The coordinate type (``'X'``, ``'Y'``, ``'Z'`` or ``'T'``). - # * The netCDF variable name, preceded by ``'ncvar%'``. - # - # The identities of the bounds, if present, are included (with the - # exception of the bounds netCDF variable name). - # - # .. versionadded:: 3.0.0 - # - # .. seealso:: `id`, `identity` - # ODO - # :Returns: - # - # `list` - # The identities. - # - # **Examples** - # - # >>> f.properties() - # {'foo': 'bar', - # 'long_name': 'Air Temperature', - # 'standard_name': 'air_temperature'} - # >>> f.nc_get_variable() - # 'tas' - # >>> f.identities() - # ['air_temperature', - # 'long_name=Air Temperature', - # 'foo=bar', - # 'standard_name=air_temperature', - # 'ncvar%tas'] - # - # >>> f.properties() - # {} - # >>> f.bounds.properties() - # {'axis': 'Z', - # 'units': 'm'} - # >>> f.identities() - # ['axis=Z', 'units=m', 'ncvar%z'] - # - # """ - # identities = super().identities() - # - # bounds = self.get_bounds(None) - # if bounds is not None: - # identities.extend( - # [i for i in bounds.identities() if i not in identities] - # ) - # # TODO ncvar AND? - # - # return identities - @_deprecated_kwarg_check( "relaxed_identity", version="3.0.0", removed_at="4.0.0" ) @@ -3947,7 +3747,8 @@ def round(self, decimals=0, bounds=True, inplace=False, i=False): def roll(self, iaxis, shift, inplace=False, i=False): """Roll the data along an axis. - .. seealso:: `insert_dimension`, `flip`, `squeeze`, `transpose` + .. seealso:: `insert_dimension`, `flip`, `squeeze`, + `transpose` :Parameters: diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 9609e70e9b..3ec8892b58 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -20,14 +20,6 @@ _cached_temporary_files = {} -# -------------------------------------------------------------------- -# Create an implementation container and initialise a read object for -# each format -# -------------------------------------------------------------------- -# _implementation = implementation() -# netcdf = NetCDFRead(_implementation) -# UM = UMRead(_implementation) - logger = logging.getLogger(__name__) @@ -327,13 +319,9 @@ class read(cfdm.read): If *aggregate* is False then the field constructs are not aggregated. - squeeze: `bool`, optional - If True then remove size 1 axes from each field construct's - data array. + {{read squeeze: `bool`, optional}} - unsqueeze: `bool`, optional - If True then insert size 1 axes from each field - construct's domain into its data array. + {{read unsqueeze: `bool`, optional}} select: (sequence of) `str` or `Query` or `re.Pattern`, optional Only return field constructs whose identities match the @@ -593,8 +581,9 @@ def __new__( "cdl_string can only be True when the format is CDL, though " "fmt is ignored in that case so there is no need to set it." ) - if squeeze and unsqueeze: - raise ValueError("squeeze and unsqueeze can not both be True") + + # if squeeze and unsqueeze: + # raise ValueError("squeeze and unsqueeze can not both be True") if follow_symlinks and not recursive: raise ValueError( f"Can't set follow_symlinks={follow_symlinks!r} " @@ -743,6 +732,8 @@ def __new__( netcdf_backend=netcdf_backend, storage_options=storage_options, cache=cache, + squeeze=squeeze, + unsqueeze=unsqueeze, ) # -------------------------------------------------------- @@ -803,25 +794,6 @@ def __new__( if select and "UM" in ftypes: out = out.select_by_identity(*select) - # ---------------------------------------------------------------- - # Squeeze size one dimensions from the data arrays. Do one of: - # - # 1) Squeeze the fields, i.e. remove all size one dimensions from - # all field data arrays - # - # 2) Unsqueeze the fields, i.e. Include all size 1 domain - # dimensions in the data array. - # - # 3) Nothing - # ---------------------------------------------------------------- - if not domain: - if squeeze: - for f in out: - f.squeeze(inplace=True) - elif unsqueeze: - for f in out: - f.unsqueeze(inplace=True) - if nfields is not None and len(out) != nfields: raise ValueError( f"{nfields} field{cls._plural(nfields)} requested but " @@ -865,6 +837,8 @@ def _read_a_file( netcdf_backend=None, storage_options=None, cache=True, + squeeze=False, + unsqueeze=False, ): """Read the contents of a single file into a field list. @@ -918,6 +892,19 @@ def _read_a_file( .. versionadded:: NEXTVERSION + squeeze: `bool`, optional + Whether or not to remove all size 1 axes from field + construct data arrays. See `cf.read` for details. + + .. versionadded:: NEXTVERSION + + unsqueeze: `bool`, optional + Whether or not to ensure that all size 1 axes are + spanned by field construct data arrays. See + `cf.read` for details. + + .. versionadded:: NEXTVERSION + :Returns: `FieldList` or `DomainList` @@ -999,6 +986,8 @@ def _read_a_file( cfa=cfa, cfa_write=cfa_write, to_memory=to_memory, + squeeze=squeeze, + unsqueeze=unsqueeze, ) elif ftype == "UM" and extra_read_vars["fmt"] in (None, "UM"): if domain: @@ -1016,6 +1005,8 @@ def _read_a_file( word_size=word_size, endian=endian, select=select, + squeeze=squeeze, + unsqueeze=unsqueeze, ) # PP fields are aggregated intrafile prior to interfile diff --git a/cf/read_write/um/umread.py b/cf/read_write/um/umread.py index 98c8fbd630..f3e38f9edd 100644 --- a/cf/read_write/um/umread.py +++ b/cf/read_write/um/umread.py @@ -491,6 +491,8 @@ def __init__( implementation=None, select=None, info=False, + squeeze=False, + unsqueeze=False, **kwargs, ): """**Initialisation** @@ -545,6 +547,9 @@ def __init__( return field construct. """ + if squeeze and unsqueeze: + raise ValueError("'squeeze' and 'unsqueeze' can not both be True") + self._bool = False self.info = info @@ -1114,6 +1119,16 @@ def __init__( self.fields.append(field) + # ------------------------------------------------------------ + # Squeeze/unsqueeze size 1 axes in field constructs + # ------------------------------------------------------------ + if unsqueeze: + for f in self.fields: + f.unsqueeze(inplace=True) + elif squeeze: + for f in self.fields: + f.squeeze(inplace=True) + self._bool = True def __bool__(self): @@ -3355,6 +3370,8 @@ def read( chunk=True, verbose=None, select=None, + squeeze=False, + unsqueeze=False, ): """Read fields from a PP file or UM fields file. @@ -3404,18 +3421,21 @@ def read( set_standard_name: `bool`, optional - select: (sequence of) `str` or `Query` or `re.Pattern`, optional - Only return field constructs whose identities match the - given values(s), i.e. those fields ``f`` for which - ``f.match_by_identity(*select)`` is `True`. See - `cf.Field.match_by_identity` for details. - - This is equivalent to, but faster than, not using the - *select* parameter but applying its value to the returned - field list with its `cf.FieldList.select_by_identity` - method. For example, ``fl = cf.read(file, - select='stash_code=3236')`` is equivalent to ``fl = - cf.read(file).select_by_identity('stash_code=3236')``. + select: (sequence of) `str` or `Query` or `re.Pattern`, optional + Only return field constructs whose identities match + the given values(s), i.e. those fields ``f`` for which + ``f.match_by_identity(*select)`` is `True`. See + `cf.Field.match_by_identity` for details. + + This is equivalent to, but faster than, not using the + *select* parameter but applying its value to the + returned field list with its + `cf.FieldList.select_by_identity` method. For example, + ``fl = cf.read(file, select='stash_code=3236')`` is + equivalent to ``fl = + cf.read(file).select_by_identity('stash_code=3236')``. + + TODOCFA (squeeuze) :Returns: diff --git a/cf/test/create_test_files.py b/cf/test/create_test_files.py index d56f15d76b..2a0745294a 100644 --- a/cf/test/create_test_files.py +++ b/cf/test/create_test_files.py @@ -7,9 +7,8 @@ faulthandler.enable() # to debug seg faults and timeouts -import netCDF4 - import cfdm +import netCDF4 VN = cfdm.CF() diff --git a/cf/test/test_CFA.py b/cf/test/test_CFA.py index c06fe304c5..ee3cd11efa 100644 --- a/cf/test/test_CFA.py +++ b/cf/test/test_CFA.py @@ -10,9 +10,10 @@ faulthandler.enable() # to debug seg faults and timeouts -import cf from cfdm.read_write.netcdf.netcdfwrite import AggregationError +import cf + n_tmpfiles = 5 tmpfiles = [ tempfile.mkstemp("_test_CFA.nc", dir=os.getcwd())[1] From 6545ae029dc690a556cfc60b2699cac34f5a14be Mon Sep 17 00:00:00 2001 From: David Hassell Date: Sun, 1 Dec 2024 13:26:22 +0000 Subject: [PATCH 30/51] dev --- cf/functions.py | 49 +++++++------------------------------------------ 1 file changed, 7 insertions(+), 42 deletions(-) diff --git a/cf/functions.py b/cf/functions.py index 15681d169f..fa66fb2677 100644 --- a/cf/functions.py +++ b/cf/functions.py @@ -2695,48 +2695,11 @@ def flat(x): yield a -def abspath(filename): - """Return a normalized absolute version of a file name. +def abspath(path, uri=None): + return cfdm.abspath(path, uri=uri) - If `None` or a string containing URL is provided then it is - returned unchanged. - .. seealso:: `cf.dirname`, `cf.pathjoin`, `cf.relpath` - - :Parameters: - - filename: `str` or `None` - The name of the file, or `None` - - :Returns: - - `str` - - The normalized absolutised version of *filename*, or - `None`. - - **Examples** - - >>> import os - >>> os.getcwd() - '/data/archive' - >>> cf.abspath('file.nc') - '/data/archive/file.nc' - >>> cf.abspath('..//archive///file.nc') - '/data/archive/file.nc' - >>> cf.abspath('http://data/archive/file.nc') - 'http://data/archive/file.nc' - - """ - u = urlparse(filename) - scheme = u.scheme - if not scheme: - return _os_path_abspath(filename) - - if scheme == "file": - return u.path - - return filename +abspath.__doc__ = cfdm.abspath.__doc__.replace("cfdm.", "cf.") def relpath(filename, start=None): @@ -2783,8 +2746,10 @@ def relpath(filename, start=None): return _os_path_relpath(filename) -def dirname(path, isdir=False): - return cfdm.dirname(path, isdir=isdir) +def dirname(path, normalise=False, uri=None, isdir=False, sep=False): + return cfdm.dirname( + path, normalise=normalise, uri=uri, isdir=isdir, sep=sep + ) dirname.__doc__ = cfdm.dirname.__doc__.replace("cfdm.", "cf.") From dc671611272ffb7be9ba4090a7340c3c481a2abc Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 3 Dec 2024 15:36:57 +0000 Subject: [PATCH 31/51] dev --- cf/data/data.py | 687 +++++++++++++++++----------------- cf/read_write/read.py | 23 +- cf/read_write/um/umread.py | 38 +- cf/test/setup_create_field.py | 24 +- cf/test/test_Data.py | 3 +- cf/test/test_read_write.py | 12 +- 6 files changed, 413 insertions(+), 374 deletions(-) diff --git a/cf/data/data.py b/cf/data/data.py index c6d41def18..e7d4313d1f 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -136,217 +136,217 @@ def __new__(cls, *args, **kwargs): instance._Units_class = Units return instance - def __init__( - self, - array=None, - units=None, - calendar=None, - fill_value=None, - hardmask=True, - chunks="auto", - dt=False, - source=None, - copy=True, - dtype=None, - mask=None, - mask_value=None, - to_memory=False, - init_options=None, - _use_array=True, - ): - """**Initialisation** - - :Parameters: - - array: optional - The array of values. May be a scalar or array-like - object, including another `{{class}}` instance, anything - with a `!to_dask_array` method, `numpy` array, `dask` - array, `xarray` array, `cf.Array` subclass, `list`, - `tuple`, scalar. - - *Parameter example:* - ``array=34.6`` - - *Parameter example:* - ``array=[[1, 2], [3, 4]]`` - - *Parameter example:* - ``array=numpy.ma.arange(10).reshape(2, 1, 5)`` - - units: `str` or `Units`, optional - The physical units of the data. if a `Units` object is - provided then this an also set the calendar. - - The units (without the calendar) may also be set after - initialisation with the `set_units` method. - - *Parameter example:* - ``units='km hr-1'`` - - *Parameter example:* - ``units='days since 2018-12-01'`` - - calendar: `str`, optional - The calendar for reference time units. - - The calendar may also be set after initialisation with the - `set_calendar` method. - - *Parameter example:* - ``calendar='360_day'`` - - fill_value: optional - The fill value of the data. By default, or if set to - `None`, the `numpy` fill value appropriate to the array's - data-type will be used (see - `numpy.ma.default_fill_value`). - - The fill value may also be set after initialisation with - the `set_fill_value` method. - - *Parameter example:* - ``fill_value=-999.`` - - dtype: data-type, optional - The desired data-type for the data. By default the - data-type will be inferred form the *array* - parameter. - - The data-type may also be set after initialisation with - the `dtype` attribute. - - *Parameter example:* - ``dtype=float`` - - *Parameter example:* - ``dtype='float32'`` - - *Parameter example:* - ``dtype=numpy.dtype('i2')`` - - .. versionadded:: 3.0.4 - - mask: optional - Apply this mask to the data given by the *array* - parameter. By default, or if *mask* is `None`, no mask - is applied. May be any scalar or array-like object - (such as a `list`, `numpy` array or `{{class}}` instance) - that is broadcastable to the shape of *array*. Masking - will be carried out where the mask elements evaluate - to `True`. - - This mask will applied in addition to any mask already - defined by the *array* parameter. - - mask_value: scalar array_like - Mask *array* where it is equal to *mask_value*, using - numerically tolerant floating point equality. - - .. versionadded:: (cfdm) 1.11.0.0 - - hardmask: `bool`, optional - If True (the default) then the mask is hard. If False - then the mask is soft. - - dt: `bool`, optional - If True then strings (such as ``'1990-12-01 12:00'``) - given by the *array* parameter are re-interpreted as - date-time objects. By default they are not. - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - {{chunks: `int`, `tuple`, `dict` or `str`, optional}} - - .. versionadded:: (cfdm) NEXTVERSION - - to_memory: `bool`, optional - If True then ensure that the original data are in - memory, rather than on disk. - - If the original data are on disk, then reading data - into memory during initialisation will slow down the - initialisation process, but can considerably improve - downstream performance by avoiding the need for - independent reads for every dask chunk, each time the - data are computed. - - In general, setting *to_memory* to True is not the same - as calling the `persist` of the newly created `{{class}}` - object, which also decompresses data compressed by - convention and computes any data type, mask and - date-time modifications. - - If the input *array* is a `dask.array.Array` object - then *to_memory* is ignored. - - .. versionadded:: (cfdm) NEXTVERSION - - init_options: `dict`, optional - Provide optional keyword arguments to methods and - functions called during the initialisation process. A - dictionary key identifies a method or function. The - corresponding value is another dictionary whose - key/value pairs are the keyword parameter names and - values to be applied. - - Supported keys are: - - * ``'from_array'``: Provide keyword arguments to - the `dask.array.from_array` function. This is used - when initialising data that is not already a dask - array and is not compressed by convention. - - * ``'first_non_missing_value'``: Provide keyword - arguments to the - `cfdm.data.utils.first_non_missing_value` - function. This is used when the input array contains - date-time strings or objects, and may affect - performance. - - *Parameter example:* - ``{'from_array': {'inline_array': True}}`` - - **Examples** - - >>> d = {{package}}.{{class}}(5) - >>> d = {{package}}.{{class}}([1,2,3], units='K') - >>> import numpy - >>> d = {{package}}.{{class}}(numpy.arange(10).reshape(2,5), - ... units='m/s', fill_value=-999) - >>> d = {{package}}.{{class}}('fly') - >>> d = {{package}}.{{class}}(tuple('fly')) - - """ - super().__init__( - array=array, - units=units, - calendar=calendar, - fill_value=fill_value, - hardmask=hardmask, - chunks=chunks, - dt=dt, - source=source, - copy=copy, - dtype=dtype, - mask=mask, - mask_value=mask_value, - to_memory=to_memory, - init_options=init_options, - _use_array=_use_array, - ) - - if source is not None: - try: - deterministic = source.has_deterministic_name() - except AttributeError: - deterministic = False - else: - deterministic = not is_dask_collection(array) - - self._custom["has_deterministic_name"] = deterministic + # def __init__( + # self, + # array=None, + # units=None, + # calendar=None, + # fill_value=None, + # hardmask=True, + # chunks="auto", + # dt=False, + # source=None, + # copy=True, + # dtype=None, + # mask=None, + # mask_value=None, + # to_memory=False, + # init_options=None, + # _use_array=True, + # ): + # """**Initialisation** + # + # :Parameters: + # + # array: optional + # The array of values. May be a scalar or array-like + # object, including another `{{class}}` instance, anything + # with a `!to_dask_array` method, `numpy` array, `dask` + # array, `xarray` array, `cf.Array` subclass, `list`, + # `tuple`, scalar. + # + # *Parameter example:* + # ``array=34.6`` + # + # *Parameter example:* + # ``array=[[1, 2], [3, 4]]`` + # + # *Parameter example:* + # ``array=numpy.ma.arange(10).reshape(2, 1, 5)`` + # + # units: `str` or `Units`, optional + # The physical units of the data. if a `Units` object is + # provided then this an also set the calendar. + # + # The units (without the calendar) may also be set after + # initialisation with the `set_units` method. + # + # *Parameter example:* + # ``units='km hr-1'`` + # + # *Parameter example:* + # ``units='days since 2018-12-01'`` + # + # calendar: `str`, optional + # The calendar for reference time units. + # + # The calendar may also be set after initialisation with the + # `set_calendar` method. + # + # *Parameter example:* + # ``calendar='360_day'`` + # + # fill_value: optional + # The fill value of the data. By default, or if set to + # `None`, the `numpy` fill value appropriate to the array's + # data-type will be used (see + # `numpy.ma.default_fill_value`). + # + # The fill value may also be set after initialisation with + # the `set_fill_value` method. + # + # *Parameter example:* + # ``fill_value=-999.`` + # + # dtype: data-type, optional + # The desired data-type for the data. By default the + # data-type will be inferred form the *array* + # parameter. + # + # The data-type may also be set after initialisation with + # the `dtype` attribute. + # + # *Parameter example:* + # ``dtype=float`` + # + # *Parameter example:* + # ``dtype='float32'`` + # + # *Parameter example:* + # ``dtype=numpy.dtype('i2')`` + # + # .. versionadded:: 3.0.4 + # + # mask: optional + # Apply this mask to the data given by the *array* + # parameter. By default, or if *mask* is `None`, no mask + # is applied. May be any scalar or array-like object + # (such as a `list`, `numpy` array or `{{class}}` instance) + # that is broadcastable to the shape of *array*. Masking + # will be carried out where the mask elements evaluate + # to `True`. + # + # This mask will applied in addition to any mask already + # defined by the *array* parameter. + # + # mask_value: scalar array_like + # Mask *array* where it is equal to *mask_value*, using + # numerically tolerant floating point equality. + # + # .. versionadded:: (cfdm) 1.11.0.0 + # + # hardmask: `bool`, optional + # If True (the default) then the mask is hard. If False + # then the mask is soft. + # + # dt: `bool`, optional + # If True then strings (such as ``'1990-12-01 12:00'``) + # given by the *array* parameter are re-interpreted as + # date-time objects. By default they are not. + # + # {{init source: optional}} + # + # {{init copy: `bool`, optional}} + # + # {{chunks: `int`, `tuple`, `dict` or `str`, optional}} + # + # .. versionadded:: (cfdm) NEXTVERSION + # + # to_memory: `bool`, optional + # If True then ensure that the original data are in + # memory, rather than on disk. + # + # If the original data are on disk, then reading data + # into memory during initialisation will slow down the + # initialisation process, but can considerably improve + # downstream performance by avoiding the need for + # independent reads for every dask chunk, each time the + # data are computed. + # + # In general, setting *to_memory* to True is not the same + # as calling the `persist` of the newly created `{{class}}` + # object, which also decompresses data compressed by + # convention and computes any data type, mask and + # date-time modifications. + # + # If the input *array* is a `dask.array.Array` object + # then *to_memory* is ignored. + # + # .. versionadded:: (cfdm) NEXTVERSION + # + # init_options: `dict`, optional + # Provide optional keyword arguments to methods and + # functions called during the initialisation process. A + # dictionary key identifies a method or function. The + # corresponding value is another dictionary whose + # key/value pairs are the keyword parameter names and + # values to be applied. + # + # Supported keys are: + # + # * ``'from_array'``: Provide keyword arguments to + # the `dask.array.from_array` function. This is used + # when initialising data that is not already a dask + # array and is not compressed by convention. + # + # * ``'first_non_missing_value'``: Provide keyword + # arguments to the + # `cfdm.data.utils.first_non_missing_value` + # function. This is used when the input array contains + # date-time strings or objects, and may affect + # performance. + # + # *Parameter example:* + # ``{'from_array': {'inline_array': True}}`` + # + # **Examples** + # + # >>> d = {{package}}.{{class}}(5) + # >>> d = {{package}}.{{class}}([1,2,3], units='K') + # >>> import numpy + # >>> d = {{package}}.{{class}}(numpy.arange(10).reshape(2,5), + # ... units='m/s', fill_value=-999) + # >>> d = {{package}}.{{class}}('fly') + # >>> d = {{package}}.{{class}}(tuple('fly')) + # + # """ + # super().__init__( + # array=array, + # units=units, + # calendar=calendar, + # fill_value=fill_value, + # hardmask=hardmask, + # chunks=chunks, + # dt=dt, + # source=source, + # copy=copy, + # dtype=dtype, + # mask=mask, + # mask_value=mask_value, + # to_memory=to_memory, + # init_options=init_options, + # _use_array=_use_array, + # ) + # + # if source is not None: + # try: + # deterministic = source.has_deterministic_name() + # except AttributeError: + # deterministic = False + # else: + # deterministic = not is_dask_collection(array) + # + # self._custom["has_deterministic_name"] = deterministic def __contains__(self, value): """Membership test operator ``in`` @@ -700,41 +700,41 @@ def _is_abstract_Array_subclass(self, array): """ return isinstance(array, cfdm.Array) - def _update_deterministic(self, other): - """Update the deterministic name status. - - .. versionadded:: 3.15.1 - - .. seealso:: `get_deterministic_name`, - `has_deterministic_name` - - :Parameters: - - other: `bool` or `Data` - If `False` then set the deterministic name status to - `False`. If `True` then do not change the - deterministic name status. If `Data` then set the - deterministic name status to `False` if and only if - *other* has a False deterministic name status. - - :Returns: - - `None` - - """ - if other is False: - self._custom["has_deterministic_name"] = False - return - - if other is True: - return - - custom = self._custom - deterministic = custom["has_deterministic_name"] - if deterministic: - custom["has_deterministic_name"] = ( - deterministic and other._custom["has_deterministic_name"] - ) + # def _update_deterministic(self, other): + # """Update the deterministic name status. + # + # .. versionadded:: 3.15.1 + # + # .. seealso:: `get_deterministic_name`, + # `has_deterministic_name` + # + # :Parameters: + # + # other: `bool` or `Data` + # If `False` then set the deterministic name status to + # `False`. If `True` then do not change the + # deterministic name status. If `Data` then set the + # deterministic name status to `False` if and only if + # *other* has a False deterministic name status. + # + # :Returns: + # + # `None` + # + # """ + # if other is False: + # self._custom["has_deterministic_name"] = False + # return + # + # if other is True: + # return + # + # custom = self._custom + # deterministic = custom["has_deterministic_name"] + # if deterministic: + # custom["has_deterministic_name"] = ( + # deterministic and other._custom["has_deterministic_name"] + # ) @_inplace_enabled(default=False) def diff(self, axis=-1, n=1, inplace=False): @@ -1565,7 +1565,7 @@ def percentile( axes = d._axes d._axes = (new_axis_identifier(axes),) + axes - d._update_deterministic(not is_dask_collection(q)) + d._update_deterministic(q) return d @@ -2482,7 +2482,6 @@ def _binary_operation(cls, data, other, method): d = super()._binary_operation(data0, other, method) d.override_units(new_Units, inplace=True) - d._update_deterministic(other) if inplace: data.__dict__ = d.__dict__ @@ -2498,7 +2497,7 @@ def _parse_indices(self, *args, **kwargs): """ raise NotImplementedError( - "'cf.Data._parse_indices' is not available. " + "'cf.Data._parse_indices' is no longer available. " "Use function 'cf.parse_indices' instead." ) @@ -3158,6 +3157,10 @@ def _concatenate_post_process( in-place. """ + concatenated_data = super()._concatenate_post_process( + concatenated_data, axis, conformed_data + ) + # Manage cyclicity of axes: if join axis was cyclic, it is no # longer. axis = concatenated_data._parse_axes(axis)[0] @@ -3168,14 +3171,14 @@ def _concatenate_post_process( ) concatenated_data.cyclic(axes=axis, iscyclic=False) - # Set whether or not the concatenated name is deterministic - deterministic = True - for d in conformed_data: - if not d.has_deterministic_name(): - deterministic = False - break - - concatenated_data._update_deterministic(deterministic) + # # Set whether or not the concatenated name is deterministic + # deterministic = True + # for d in conformed_data: + # if not d.has_deterministic_name(): + # deterministic = False + # break# + # + # concatenated_data._update_deterministic(deterministic) return concatenated_data @@ -3883,68 +3886,68 @@ def convert_reference_time( return d - def get_deterministic_name(self): - """Get the deterministic name for the data. - - If there is a deterministic name then the data array may be - assumed to be 'equal' to that of another `Data` object with - the same deterministic name. This measure of equality is - different to that applied by the `equals` method in that NaN - and inf values are, in effect, always considered equal. - - Note that the opposite is not always true. Two `Data` objects - that are considered equal by their `equals` methods might not - have the same deterministic name. - - An exception is raised if there is no deterministic name. - - .. versionadded:: 3.15.1 - - .. seealso:: `has_deterministic_name` - - :Returns: - - `str` - The deterministic name. - - **Examples** - - >>> d = cf.Data([1, 2, 3], 'm') - >>> d.has_deterministic_name() - True - >>> d.get_deterministic_name() - '6380dd3674fbf10d30561484b084e9b3' - >>> d1 = cf.Data([1, 2, 3], 'metre') - >>> d1.get_deterministic_name() - '6380dd3674fbf10d30561484b084e9b3' - >>> d1.get_deterministic_name() == d.get_deterministic_name() - True - >>> d1.equals(d) - True - - >>> e = d + 1 - 1 - >>> e.get_deterministic_name() - '0b83ada62d4b014bae83c3de1c1d3a80' - >>> e.get_deterministic_name() == d.get_deterministic_name() - False - >>> e.equals(d) - True - - """ - if not self.has_deterministic_name(): - raise ValueError() - - units = self._Units - - # The dask graph is never going to be computed, so we can set - # '_force_to_memory=False'. - return tokenize( - self.to_dask_array( - _force_mask_hardness=False, _force_to_memory=False - ).name, - units.formatted(definition=True, names=True), - units._canonical_calendar, - ) + # def get_deterministic_name(self): + # """Get the deterministic name for the data. + # + # If there is a deterministic name then the data array may be + # assumed to be 'equal' to that of another `Data` object with + # the same deterministic name. This measure of equality is + # different to that applied by the `equals` method in that NaN + # and inf values are, in effect, always considered equal. + # + # Note that the opposite is not always true. Two `Data` objects + # that are considered equal by their `equals` methods might not + # have the same deterministic name. + # + # An exception is raised if there is no deterministic name. + # + # .. versionadded:: 3.15.1 + # + # .. seealso:: `has_deterministic_name` + # + # :Returns: + # + # `str` + # The deterministic name. + # + # **Examples** + # + # >>> d = cf.Data([1, 2, 3], 'm') + # >>> d.has_deterministic_name() + # True + # >>> d.get_deterministic_name() + # '6380dd3674fbf10d30561484b084e9b3' + # >>> d1 = cf.Data([1, 2, 3], 'metre') + # >>> d1.get_deterministic_name() + # '6380dd3674fbf10d30561484b084e9b3' + # >>> d1.get_deterministic_name() == d.get_deterministic_name() + # True + # >>> d1.equals(d) + # True + # + # >>> e = d + 1 - 1 + # >>> e.get_deterministic_name() + # '0b83ada62d4b014bae83c3de1c1d3a80' + # >>> e.get_deterministic_name() == d.get_deterministic_name() + # False + # >>> e.equals(d) + # True + # + # """ + # if not self.has_deterministic_name(): + # raise ValueError() + # + # units = self._Units + # + # # The dask graph is never going to be computed, so we can set + # # '_force_to_memory=False'. + # return tokenize( + # self.to_dask_array( + # _force_mask_hardness=False, _force_to_memory=False + # ).name, + # units.formatted(definition=True, names=True), + # units._canonical_calendar, + # ) def set_units(self, value): """Set the units. @@ -5794,28 +5797,28 @@ def halo( return d - def has_deterministic_name(self): - """Whether there is a deterministic name for the data. - - See `get_deterministic_name` for details. - - .. versionadded:: 3.15.1 - - .. seealso:: `get_deterministic_name` - - :Returns: - - `bool` - Whether or not there is a deterministic name. - - **Examples** - - >>> d = cf.Data([1, 2, 3], 'm') - >>> d.has_deterministic_name() - True - - """ - return self._custom.get("has_deterministic_name", False) + # def has_deterministic_name(self): + # """Whether there is a deterministic name for the data. + # + # See `get_deterministic_name` for details. + # + # .. versionadded:: 3.15.1 + # + # .. seealso:: `get_deterministic_name` + # + # :Returns: + # + # `bool` + # Whether or not there is a deterministic name. + # + # **Examples** + # + # >>> d = cf.Data([1, 2, 3], 'm') + # >>> d.has_deterministic_name() + # True + # + # """ + # return self._custom.get("has_deterministic_name", False) def flat(self, ignore_masked=True): """Return a flat iterator over elements of the data array. @@ -6612,7 +6615,7 @@ def isclose(self, y, rtol=None, atol=None): d._set_dask(dx) d.hardmask = self._DEFAULT_HARDMASK d.override_units(_units_None, inplace=True) - d._update_deterministic(not is_dask_collection(y)) + d._update_deterministic(y) return d diff --git a/cf/read_write/read.py b/cf/read_write/read.py index a2ce426d3d..24a47e2e19 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -370,7 +370,7 @@ class read(cfdm.read): .. versionadded:: 3.11.0 - {{read netcdf_engine: `None` or `str`, optional}} + {{read netcdf_backend: `None` or (sequence of) `str`, optional}} .. versionadded:: NEXTVERSION @@ -573,13 +573,15 @@ def __new__( if fmt == "CDL": if info: logger.info( - "It is not necessary to set the cf.read fmt as 'CDL' when " - "cdl_string is True, since that implies CDL is the format." + "It is not necessary to set the cf.read fmt as " + "'CDL' when cdl_string is True, since that implies " + "CDL is the format." ) # pragma: no cover else: raise ValueError( - "cdl_string can only be True when the format is CDL, though " - "fmt is ignored in that case so there is no need to set it." + "cdl_string can only be True when the format is CDL, " + "though fmt is ignored in that case so there is no " + "need to set it." ) if follow_symlinks and not recursive: @@ -694,7 +696,8 @@ def __new__( if domain and ftype == "UM": raise ValueError( - f"Can't read PP/UM file {filename} into domain constructs" + f"Can't read PP/UM file {filename} into domain " + "constructs" ) ftypes.add(ftype) @@ -749,8 +752,8 @@ def __new__( if info: logger.info( - f"Read {field_counter} field{cls._plural(field_counter)} from " - f"{file_counter} file{cls._plural(file_counter)}" + f"Read {field_counter} field{cls._plural(field_counter)} " + f"from {file_counter} file{cls._plural(file_counter)}" ) # pragma: no cover # ---------------------------------------------------------------- @@ -764,8 +767,8 @@ def __new__( n = len(out) # pragma: no cover if info: logger.info( - f"{org_len} input field{cls._plural(org_len)} aggregated into " - f"{n} field{cls._plural(n)}" + f"{org_len} input field{cls._plural(org_len)} " + f"aggregated into {n} field{cls._plural(n)}" ) # pragma: no cover # ---------------------------------------------------------------- diff --git a/cf/read_write/um/umread.py b/cf/read_write/um/umread.py index f3e38f9edd..352fbdac1a 100644 --- a/cf/read_write/um/umread.py +++ b/cf/read_write/um/umread.py @@ -542,6 +542,25 @@ def __init__( increasing verbosity, the more description that is printed about the read process. + squeeze: `bool`, optional + If True then remove all size 1 dimensions from field + construct data arrays, regardless of how the data are + stored in the dataset. If False (the default) then the + presence or not of size 1 dimensions is determined by + how the data are stored in its dataset. + + .. versionadded:: NEXTVERSION + + unsqueeze: `bool`, optional + If True then ensure that field construct data arrays + span all of the size 1 dimensions, regardless of how + the data are stored in the dataset. If False (the + default) then the presence or not of size 1 dimensions + is determined by how the data are stored in its + dataset. + + .. versionadded:: NEXTVERSION + kwargs: *optional* Keyword arguments providing extra CF properties for each return field construct. @@ -3435,7 +3454,24 @@ def read( equivalent to ``fl = cf.read(file).select_by_identity('stash_code=3236')``. - TODOCFA (squeeuze) + squeeze: `bool`, optional + If True then remove all size 1 dimensions from field + construct data arrays, regardless of how the data are + stored in the dataset. If False (the default) then the + presence or not of size 1 dimensions is determined by + how the data are stored in its dataset. + + .. versionadded:: NEXTVERSION + + unsqueeze: `bool`, optional + If True then ensure that field construct data arrays + span all of the size 1 dimensions, regardless of how + the data are stored in the dataset. If False (the + default) then the presence or not of size 1 dimensions + is determined by how the data are stored in its + dataset. + + .. versionadded:: NEXTVERSION :Returns: diff --git a/cf/test/setup_create_field.py b/cf/test/setup_create_field.py index dfff3bd16b..3e751fe47d 100644 --- a/cf/test/setup_create_field.py +++ b/cf/test/setup_create_field.py @@ -3,7 +3,7 @@ import os import unittest -import numpy +import numpy as np faulthandler.enable() # to debug seg faults and timeouts @@ -17,20 +17,18 @@ class create_fieldTest(unittest.TestCase): def test_create_field(self): # Dimension coordinates - dim1 = cf.DimensionCoordinate( - data=cf.Data(numpy.arange(10.0), "degrees") - ) + dim1 = cf.DimensionCoordinate(data=cf.Data(np.arange(10.0), "degrees")) dim1.standard_name = "grid_latitude" dim0 = cf.DimensionCoordinate( - data=cf.Data(numpy.arange(9.0) + 20, "degrees") + data=cf.Data(np.arange(9.0) + 20, "degrees") ) dim0.standard_name = "grid_longitude" dim0.data[-1] += 5 bounds = cf.Data( - numpy.array( - [dim0.data.array - 0.5, dim0.data.array + 0.5] - ).transpose((1, 0)) + np.array([dim0.data.array - 0.5, dim0.data.array + 0.5]).transpose( + (1, 0) + ) ) bounds[-2, 1] = 30 bounds[-1, :] = [30, 36] @@ -54,7 +52,7 @@ def test_create_field(self): aux2 = cf.AuxiliaryCoordinate( data=cf.Data( - numpy.arange(-45, 45, dtype="int32").reshape(10, 9), + np.arange(-45, 45, dtype="int32").reshape(10, 9), units="degree_N", ) ) @@ -62,7 +60,7 @@ def test_create_field(self): aux3 = cf.AuxiliaryCoordinate( data=cf.Data( - numpy.arange(60, 150, dtype="int32").reshape(9, 10), + np.arange(60, 150, dtype="int32").reshape(9, 10), units="degreesE", ) ) @@ -70,7 +68,7 @@ def test_create_field(self): aux4 = cf.AuxiliaryCoordinate( data=cf.Data( - numpy.array( + np.array( [ "alpha", "beta", @@ -97,12 +95,12 @@ def test_create_field(self): # Cell measures msr0 = cf.CellMeasure( - data=cf.Data(1 + numpy.arange(90.0).reshape(9, 10) * 1234, "km 2") + data=cf.Data(1 + np.arange(90.0).reshape(9, 10) * 1234, "km 2") ) msr0.measure = "area" # Data - data = cf.Data(numpy.arange(90.0).reshape(10, 9), "m s-1") + data = cf.Data(np.arange(90.0).reshape(10, 9), "m s-1") properties = {"standard_name": "eastward_wind"} diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 23f1341257..e007e89b39 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -1490,8 +1490,9 @@ def test_Data__getitem__(self): self.assertTrue(e.equals(f)) # Chained subspaces reading from disk - f = cf.read(self.filename)[0] + f = cf.read(self.filename, netcdf_backend="h5netcdf")[0] d = f.data + a = d[:1, [1, 3, 4], :][:, [True, False, True], ::-2].array b = d.array[:1, [1, 3, 4], :][:, [True, False, True], ::-2] self.assertTrue((a == b).all()) diff --git a/cf/test/test_read_write.py b/cf/test/test_read_write.py index 4c61f3e542..efd64056d3 100644 --- a/cf/test/test_read_write.py +++ b/cf/test/test_read_write.py @@ -242,8 +242,7 @@ def test_read_write_format(self): cf.write(self.f1, tmpfile) f = cf.read(tmpfile)[0] - # TODO: reinstate "CFA" at version > 3.14 - for fmt in self.netcdf_fmts: # + ["CFA"]: + for fmt in self.netcdf_fmts: cf.write(f, tmpfile2, fmt=fmt) g = cf.read(tmpfile2, verbose=0) self.assertEqual(len(g), 1) @@ -551,8 +550,7 @@ def test_write_netcdf_mode(self): def test_read_write_netCDF4_compress_shuffle(self): f = cf.read(self.filename)[0] - # TODODASK: reinstate "CFA4" at version > 3.14 - for fmt in ("NETCDF4", "NETCDF4_CLASSIC"): # , "CFA4"): + for fmt in ("NETCDF4", "NETCDF4_CLASSIC"): cf.write(f, tmpfile, fmt=fmt, compress=1, shuffle=True) g = cf.read(tmpfile)[0] self.assertTrue( @@ -738,9 +736,9 @@ def test_read_cdl_string(self): f0 = cf.read(cdl_string_1, cdl_string=True, fmt="NETCDF") # If the user forgets the cdl_string=True argument they will - # accidentally attempt to create a file with a very long name of - # the CDL string, which will in most, if not all, cases result in - # an "OSError: [Errno 36] File name too long" error: + # accidentally attempt to create a file with a very long name + # of the CDL string, which will in most, if not all, cases + # result in an "OSError: [Errno 36] File name too long" error: with self.assertRaises(OSError): cf.read(cdl_string_1) From 65bb9f90c823df1fbfaea08c440a258e7032f82b Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 4 Dec 2024 08:55:35 +0000 Subject: [PATCH 32/51] dev --- cf/data/data.py | 232 ------------------------------------------------ 1 file changed, 232 deletions(-) diff --git a/cf/data/data.py b/cf/data/data.py index ecd4899553..ec622b899b 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -251,10 +251,6 @@ def __contains__(self, value): return bool(dx.any()) - def __data__(self): - """Returns a new reference to self.""" - return self - def __getitem__(self, indices): """Return a subspace of the data defined by indices. @@ -4376,73 +4372,6 @@ def clip(self, a_min, a_max, units=None, inplace=False, i=False): d._set_dask(dx) return d - @classmethod - def asdata(cls, d, dtype=None, copy=False): - """Convert the input to a `Data` object. - - If the input *d* has the Data interface (i.e. it has a - `__data__` method), then the output of this method is used as - the returned `Data` object. Otherwise, `Data(d)` is returned. - - :Parameters: - - d: data-like - Input data in any form that can be converted to a - `Data` object. This includes `Data` and `Field` - objects, and objects with the Data interface, numpy - arrays and any object which may be converted to a - numpy array. - - dtype: data-type, optional - By default, the data-type is inferred from the input data. - - copy: `bool`, optional - If True and *d* has the Data interface, then a copy of - `d.__data__()` is returned. - - :Returns: - - `Data` - `Data` interpretation of *d*. No copy is performed on the - input if it is already a `Data` object with matching dtype - and *copy* is False. - - **Examples** - - >>> d = cf.Data([1, 2]) - >>> cf.Data.asdata(d) is d - True - >>> d.asdata(d) is d - True - - >>> cf.Data.asdata([1, 2]) - - - >>> cf.Data.asdata(numpy.array([1, 2])) - - - """ - data = getattr(d, "__data__", None) - if data is None: - # d does not have a Data interface - data = cls(d) - if dtype is not None: - data.dtype = dtype - - return data - - # d does have a Data interface - data = data() - if copy: - data = data.copy() - if dtype is not None and np.dtype(dtype) != data.dtype: - data.dtype = dtype - elif dtype is not None and np.dtype(dtype) != data.dtype: - data = data.copy() - data.dtype = dtype - - return data - @classmethod def arctan2(cls, x1, x2): """Element-wise arc tangent of ``x1/x2`` with correct quadrant. @@ -7483,167 +7412,6 @@ def trunc(self, inplace=False, i=False): d._set_dask(dx) return d - @classmethod - def full( - cls, - shape, - fill_value, - dtype=None, - units=None, - calendar=None, - chunks="auto", - ): - """Return a new array of given shape and type, filled with a - fill value. - - .. seealso:: `empty`, `ones`, `zeros` - - :Parameters: - - shape: `int` or `tuple` of `int` - The shape of the new array. e.g. ``(2, 3)`` or ``2``. - - fill_value: scalar - The fill value. - - dtype: data-type - The desired data-type for the array. The default, `None`, - means ``np.array(fill_value).dtype``. - - units: `str` or `Units` - The units for the new data array. - - calendar: `str`, optional - The calendar for reference time units. - - {{chunks: `int`, `tuple`, `dict` or `str`, optional}} - - .. versionadded:: 3.14.0 - - :Returns: - - `Data` - Array of *fill_value* with the given shape and data - type. - - **Examples** - - >>> d = cf.Data.full((2, 3), -99) - >>> print(d.array) - [[-99 -99 -99] - [-99 -99 -99]] - - >>> d = cf.Data.full(2, 0.0) - >>> print(d.array) - [0. 0.] - - >>> d = cf.Data.full((2,), 0, dtype=bool) - >>> print(d.array) - [False False] - - """ - if dtype is None: - # Need to explicitly set the default because dtype is not - # a named keyword of da.full - dtype = getattr(fill_value, "dtype", None) - if dtype is None: - dtype = np.array(fill_value).dtype - - dx = da.full(shape, fill_value, dtype=dtype, chunks=chunks) - return cls(dx, units=units, calendar=calendar) - - @classmethod - def ones(cls, shape, dtype=None, units=None, calendar=None, chunks="auto"): - """Returns a new array filled with ones of set shape and type. - - .. seealso:: `empty`, `full`, `zeros` - - :Parameters: - - shape: `int` or `tuple` of `int` - The shape of the new array. e.g. ``(2, 3)`` or ``2``. - - dtype: data-type - The desired data-type for the array, e.g. - `numpy.int8`. The default is `numpy.float64`. - - units: `str` or `Units` - The units for the new data array. - - calendar: `str`, optional - The calendar for reference time units. - - {{chunks: `int`, `tuple`, `dict` or `str`, optional}} - - .. versionadded:: 3.14.0 - - :Returns: - - `Data` - Array of ones with the given shape and data type. - - **Examples** - - >>> d = cf.Data.ones((2, 3)) - >>> print(d.array) - [[1. 1. 1.] - [1. 1. 1.]] - - >>> d = cf.Data.ones((2,), dtype=bool) - >>> print(d.array) - [ True True] - - """ - dx = da.ones(shape, dtype=dtype, chunks=chunks) - return cls(dx, units=units, calendar=calendar) - - @classmethod - def zeros( - cls, shape, dtype=None, units=None, calendar=None, chunks="auto" - ): - """Returns a new array filled with zeros of set shape and type. - - .. seealso:: `empty`, `full`, `ones` - - :Parameters: - - shape: `int` or `tuple` of `int` - The shape of the new array. - - dtype: data-type - The data-type of the new array. By default the - data-type is ``float``. - - units: `str` or `Units` - The units for the new data array. - - calendar: `str`, optional - The calendar for reference time units. - - {{chunks: `int`, `tuple`, `dict` or `str`, optional}} - - .. versionadded:: 3.14.0 - - :Returns: - - `Data` - Array of zeros with the given shape and data type. - - **Examples** - - >>> d = cf.Data.zeros((2, 3)) - >>> print(d.array) - [[0. 0. 0.] - [0. 0. 0.]] - - >>> d = cf.Data.zeros((2,), dtype=bool) - >>> print(d.array) - [False False] - - """ - dx = da.zeros(shape, dtype=dtype, chunks=chunks) - return cls(dx, units=units, calendar=calendar) - @_deprecated_kwarg_check("out", version="3.14.0", removed_at="5.0.0") @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) From fea47c099023d38465963faccb67d6d718438da7 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 4 Dec 2024 11:55:38 +0000 Subject: [PATCH 33/51] dev --- cf/data/data.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cf/data/data.py b/cf/data/data.py index ec622b899b..1ec4bd7f94 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -2813,7 +2813,7 @@ def _concatenate_conform_units(cls, data1, units0, relaxed_units, copy): :Parameters: - data1: `{{class}}` + data1: `Data` Data with units. units0: `Units` @@ -2827,7 +2827,7 @@ def _concatenate_conform_units(cls, data1, units0, relaxed_units, copy): :Returns: - `{{class}}` + `Data` Returns *data1*, possibly modified so that it conforms to *units0*. If *copy* is False and *data1* is modified, then it is done so in-place. @@ -2874,19 +2874,19 @@ def _concatenate_post_process( :Parameters: - concatenated_data: `{{class}}` + concatenated_data: `Data` The concatenated data array. axis: `int` The axis of concatenation. - conformed_data: sequence of `{{class}}` + conformed_data: sequence of `Data` The ordered sequence of data arrays that were concatenated. :Returns: - `{{class}}` + `Data` Returns *concatenated_data*, possibly modified in-place. From 5a7da0f0c9ca56ab5388991ec846478218efde35 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 6 Dec 2024 15:59:31 +0000 Subject: [PATCH 34/51] dev --- cf/read_write/read.py | 82 +++++++++++++++++++++-------------------- cf/umread_lib/umfile.py | 4 +- 2 files changed, 45 insertions(+), 41 deletions(-) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 0a669ccdad..645cc287aa 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -661,7 +661,7 @@ def __new__( ftype = "UM" else: try: - ftype = cls.file_type(filename) + ftype = cls.file_format(filename) except Exception as error: if not ignore_read_error: raise ValueError(error) @@ -908,38 +908,39 @@ def _read_a_file( if umversion is not None: umversion = float(str(umversion).replace(".", "0", 1)) - extra_read_vars = { - "fmt": selected_fmt, - "ignore_read_error": ignore_read_error, - } +# extra_read_vars = { +# "fmt": selected_fmt, +# "ignore_read_error": ignore_read_error, +# } # ---------------------------------------------------------------- # Still here? Read the file into fields or domains. # ---------------------------------------------------------------- - originally_cdl = ftype == "CDL" - if originally_cdl: - # Create a temporary netCDF file from input CDL - ftype = "netCDF" - cdl_filename = filename - filename = cls.netcdf.cdl_to_netcdf(filename) - extra_read_vars["fmt"] = "NETCDF" - - if not cls.netcdf.is_netcdf_file(filename): - error_msg = ( - f"Can't determine format of file {filename} generated " - f"from CDL file {cdl_filename}" - ) - if ignore_read_error: - logger.warning(error_msg) # pragma: no cover - return FieldList() - else: - raise IOError(error_msg) - - if ftype == "netCDF" and extra_read_vars["fmt"] in ( - None, - "NETCDF", - "CFA", - ): + # originally_cdl = ftype == "CDL" + # if originally_cdl: + # # Create a temporary netCDF file from input CDL + # ftype = "netCDF" + # cdl_filename = filename + # filename = cls.netcdf.cdl_to_netcdf(filename) + # extra_read_vars["fmt"] = "NETCDF" + # + # if not cls.netcdf.is_netcdf_file(filename): + # error_msg = ( + # f"Can't determine format of file {filename} generated " + # f"from CDL file {cdl_filename}" + # ) + # if ignore_read_error: + # logger.warning(error_msg) # pragma: no cover + # return FieldList() + # else: + # raise IOError(error_msg) + + if ftype in ("netCDF", "CDL"): # and extra_read_vars["fmt"] in ( +# None, +# "NETCDF", +# "CDL", +# "CFA", +# ): out = super().__new__( cls, filename, @@ -947,7 +948,7 @@ def _read_a_file( extra=extra, verbose=verbose, warnings=warnings, - extra_read_vars=extra_read_vars, +# extra_read_vars=extra_read_vars, mask=mask, unpack=unpack, warn_valid=warn_valid, @@ -963,7 +964,7 @@ def _read_a_file( squeeze=squeeze, unsqueeze=unsqueeze, ) - elif ftype == "UM" and extra_read_vars["fmt"] in (None, "UM"): + elif ftype == "UM": # and extra_read_vars["fmt"] in (None, "UM"): if domain: raise ValueError( "Can't set domain=True when reading UM or PP datasets" @@ -997,7 +998,7 @@ def _read_a_file( return FieldList(out) @classmethod - def file_type(cls, filename): + def file_format(cls, filename): """Return the file format. :Parameters: @@ -1013,15 +1014,16 @@ def file_type(cls, filename): **Examples** - >>> file_type(filename) + >>> r.file_format(filename) 'netCDF' """ # ---------------------------------------------------------------- # NetCDF # ---------------------------------------------------------------- - if cls.netcdf.is_netcdf_file(filename): - return "netCDF" + fmt = cls.netcdf.file_format(filename) + if fmt: + return fmt # ---------------------------------------------------------------- # PP or FF @@ -1029,11 +1031,11 @@ def file_type(cls, filename): if cls.um.is_um_file(filename): return "UM" - # ---------------------------------------------------------------- - # CDL - # ---------------------------------------------------------------- - if cls.netcdf.is_cdl_file(filename): - return "CDL" +# # ---------------------------------------------------------------- +# # CDL +# # ---------------------------------------------------------------- +# if cls.netcdf.is_cdl_file(filename): +# return "CDL" # Still here? raise IOError(f"Can't determine format of file {filename}") diff --git a/cf/umread_lib/umfile.py b/cf/umread_lib/umfile.py index 94fbb33043..be7581f75f 100644 --- a/cf/umread_lib/umfile.py +++ b/cf/umread_lib/umfile.py @@ -132,7 +132,9 @@ def _detect_file_type(self): file_type_obj = c.detect_file_type(self.fd) except Exception: self.close_fd() - raise IOError(f"File {self.path} has unsupported format") + raise IOError( + f"Could not interpret {self.path} as a PP or UM dataset" + ) d = c.file_type_obj_to_dict(file_type_obj) self.fmt = d["fmt"] From 3ecd63ca7568484422edff70ada45dc1d186e50e Mon Sep 17 00:00:00 2001 From: David Hassell Date: Sat, 7 Dec 2024 00:11:40 +0000 Subject: [PATCH 35/51] dev --- cf/read_write/read.py | 222 ++++++++------------------ cf/read_write/um/umread.py | 17 +- cf/umread_lib/umfile.py | 2 +- docs/source/recipes/plot_17_recipe.py | 4 +- docs/source/recipes/plot_18_recipe.py | 20 ++- docs/source/recipes/plot_20_recipe.py | 6 +- 6 files changed, 99 insertions(+), 172 deletions(-) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 645cc287aa..5cbd9e9cd9 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -600,14 +600,6 @@ def __new__( aggregate_options["copy"] = False - # Parse the extra parameter - if extra is None: - extra = () - elif isinstance(extra, str): - extra = (extra,) - - ftypes = set() - # Count the number of fields (in all files) and the number of # files field_counter = -1 @@ -657,39 +649,20 @@ def __new__( if info: logger.info(f"File: {filename}") # pragma: no cover - if um: - ftype = "UM" - else: - try: - ftype = cls.file_format(filename) - except Exception as error: - if not ignore_read_error: - raise ValueError(error) - - logger.warning(f"WARNING: {error}") # pragma: no cover - continue - - if domain and ftype == "UM": - raise ValueError( - f"Can't read PP/UM file {filename} into domain " - "constructs" - ) - - ftypes.add(ftype) - # -------------------------------------------------------- # Read the file # -------------------------------------------------------- + ftypes = [None] file_contents = cls._read_a_file( filename, - ftype=ftype, + ftypes=ftypes, external=external, ignore_read_error=ignore_read_error, verbose=verbose, warnings=warnings, aggregate=aggregate, aggregate_options=aggregate_options, - selected_fmt=fmt, + fmt=fmt, um=um, extra=extra, height_at_top_of_model=height_at_top_of_model, @@ -711,9 +684,10 @@ def __new__( ) # -------------------------------------------------------- - # Select matching fields (not from UM files, yet) + # Select matching fields (only from netCDF files at + # this stage - we'll do UM fields later) # -------------------------------------------------------- - if select and ftype != "UM": + if select and ftypes[-1] == "netCDF": file_contents = file_contents.select_by_identity(*select) # -------------------------------------------------------- @@ -787,14 +761,14 @@ def _plural(n): # pragma: no cover def _read_a_file( cls, filename, - ftype=None, + ftypes=None, aggregate=True, aggregate_options=None, ignore_read_error=False, verbose=None, warnings=False, external=None, - selected_fmt=None, + fmt=None, um=None, extra=None, height_at_top_of_model=None, @@ -821,7 +795,7 @@ def _read_a_file( filename: `str` See `cf.read` for details. - ftype: `str` + ftypes: `str` TODOCFA The file format to interpret the file. Recognised formats are ``'netCDF'``, ``'CDL'``, ``'UM'`` and ``'PP'``. @@ -885,62 +859,15 @@ def _read_a_file( The field or domain constructs in the dataset. """ - if aggregate_options is None: - aggregate_options = {} - - # Find this file's type - fmt = None - word_size = None - endian = None - height_at_top_of_model = None - umversion = 405 - - if um: - fmt = um.get("fmt") - word_size = um.get("word_size") - endian = um.get("endian") - umversion = um.get("version", umversion) - height_at_top_of_model = um.get("height_at_top_of_model") - - if fmt is not None: - fmt = fmt.upper() - - if umversion is not None: - umversion = float(str(umversion).replace(".", "0", 1)) + if fmt: + if isinstance(fmt, str): + fmt = (fmt,) -# extra_read_vars = { -# "fmt": selected_fmt, -# "ignore_read_error": ignore_read_error, -# } + fmt = set(fmt) - # ---------------------------------------------------------------- - # Still here? Read the file into fields or domains. - # ---------------------------------------------------------------- - # originally_cdl = ftype == "CDL" - # if originally_cdl: - # # Create a temporary netCDF file from input CDL - # ftype = "netCDF" - # cdl_filename = filename - # filename = cls.netcdf.cdl_to_netcdf(filename) - # extra_read_vars["fmt"] = "NETCDF" - # - # if not cls.netcdf.is_netcdf_file(filename): - # error_msg = ( - # f"Can't determine format of file {filename} generated " - # f"from CDL file {cdl_filename}" - # ) - # if ignore_read_error: - # logger.warning(error_msg) # pragma: no cover - # return FieldList() - # else: - # raise IOError(error_msg) - - if ftype in ("netCDF", "CDL"): # and extra_read_vars["fmt"] in ( -# None, -# "NETCDF", -# "CDL", -# "CFA", -# ): + errors = [] + + try: out = super().__new__( cls, filename, @@ -948,7 +875,6 @@ def _read_a_file( extra=extra, verbose=verbose, warnings=warnings, -# extra_read_vars=extra_read_vars, mask=mask, unpack=unpack, warn_valid=warn_valid, @@ -963,79 +889,61 @@ def _read_a_file( to_memory=to_memory, squeeze=squeeze, unsqueeze=unsqueeze, - ) - elif ftype == "UM": # and extra_read_vars["fmt"] in (None, "UM"): - if domain: - raise ValueError( - "Can't set domain=True when reading UM or PP datasets" - ) - - out = cls.um.read( - filename, - um_version=umversion, - verbose=verbose, - set_standard_name=False, - height_at_top_of_model=height_at_top_of_model, fmt=fmt, - word_size=word_size, - endian=endian, - select=select, - squeeze=squeeze, - unsqueeze=unsqueeze, + ignore_unknown_format=ignore_read_error, ) + except RuntimeError as error: + if fmt is None or fmt.intersection(("UM",)): + # Set to None to indicate that we should try other + # file formats + errors.append(error) + out = None + else: + raise + else: + if out or not ignore_read_error: + ftypes.append("netCDF") + else: + # Set to None to indicate that we should try other + # file formats + out = None - # PP fields are aggregated intrafile prior to interfile - # aggregation - if aggregate: - # For PP fields, the default is strict_units=False - if "strict_units" not in aggregate_options: - aggregate_options["relaxed_units"] = True + if out is None: + if not um: + um = {} + try: + out = cls.um.read( + filename, + um_version=um.get("version"), + verbose=verbose, + set_standard_name=False, + height_at_top_of_model=height_at_top_of_model, + fmt=um.get("fmt"), + word_size=um.get("word_size"), + endian=um.get("endian"), + select=select, + squeeze=squeeze, + unsqueeze=unsqueeze, + domain=domain, + ) + except Exception as error: + errors.append(error) + errors = '\n'.join(map(str, errors)) + raise RuntimeError(f"\n{errors}") + else: + if out or not ignore_read_error: + ftypes.append("UM") + + # UM fields are aggregated intrafile prior to + # interfile aggregation + if aggregate: + # Set defaults specific to UM fields + if "strict_units" not in aggregate_options: + aggregate_options["relaxed_units"] = True + # Return the fields/domains if domain: return DomainList(out) return FieldList(out) - - @classmethod - def file_format(cls, filename): - """Return the file format. - - :Parameters: - - filename: `str` - The file name. - - :Returns: - - `str` - The format type of the file. One of ``'netCDF'``, ``'UM'`` - or ``'CDL'``. - - **Examples** - - >>> r.file_format(filename) - 'netCDF' - - """ - # ---------------------------------------------------------------- - # NetCDF - # ---------------------------------------------------------------- - fmt = cls.netcdf.file_format(filename) - if fmt: - return fmt - - # ---------------------------------------------------------------- - # PP or FF - # ---------------------------------------------------------------- - if cls.um.is_um_file(filename): - return "UM" - -# # ---------------------------------------------------------------- -# # CDL -# # ---------------------------------------------------------------- -# if cls.netcdf.is_cdl_file(filename): -# return "CDL" - - # Still here? - raise IOError(f"Can't determine format of file {filename}") diff --git a/cf/read_write/um/umread.py b/cf/read_write/um/umread.py index 352fbdac1a..be0d126f71 100644 --- a/cf/read_write/um/umread.py +++ b/cf/read_write/um/umread.py @@ -3379,7 +3379,7 @@ class UMRead(cfdm.read_write.IORead): def read( self, filename, - um_version=405, + um_version=None, aggregate=True, endian=None, word_size=None, @@ -3391,6 +3391,7 @@ def read( select=None, squeeze=False, unsqueeze=False, + domain=False, ): """Read fields from a PP file or UM fields file. @@ -3484,6 +3485,12 @@ def read( >>> f = read('*/file[0-9].pp', um_version=708) """ + if domain: + raise ValueError( + "Can't read Domain constructs from UM or PP datasets " + "(only Field constructs)" + ) + if not _stash2standard_name: # -------------------------------------------------------- # Create the STASH code to standard_name conversion @@ -3496,6 +3503,14 @@ def read( else: byte_ordering = None + if fmt is not None: + fmt = fmt.upper() + + if um_version is None: + um_version = 405 + else: + um_version = float(str(um_version).replace(".", "0", 1)) + self.read_vars = { "filename": filename, "byte_ordering": byte_ordering, diff --git a/cf/umread_lib/umfile.py b/cf/umread_lib/umfile.py index be7581f75f..9b41ad30f4 100644 --- a/cf/umread_lib/umfile.py +++ b/cf/umread_lib/umfile.py @@ -133,7 +133,7 @@ def _detect_file_type(self): except Exception: self.close_fd() raise IOError( - f"Could not interpret {self.path} as a PP or UM dataset" + f"Can't open file {self.path} as a PP or UM dataset" ) d = c.file_type_obj_to_dict(file_type_obj) diff --git a/docs/source/recipes/plot_17_recipe.py b/docs/source/recipes/plot_17_recipe.py index 0738c62a3a..656f7c8717 100644 --- a/docs/source/recipes/plot_17_recipe.py +++ b/docs/source/recipes/plot_17_recipe.py @@ -91,9 +91,7 @@ if i == 0: set_title = "Perceptually uniform\ncolour maps" elif i == 1: - set_title = ( - "NCL colour maps enhanced to \nhelp with colour blindness" - ) + set_title = "NCL colour maps enhanced to \nhelp with colour blindness" elif i == 2: set_title = "Orography/bathymetry\ncolour maps" else: diff --git a/docs/source/recipes/plot_18_recipe.py b/docs/source/recipes/plot_18_recipe.py index d219bdfe19..3e306906ed 100644 --- a/docs/source/recipes/plot_18_recipe.py +++ b/docs/source/recipes/plot_18_recipe.py @@ -13,11 +13,11 @@ # %% # 1. Import cf-python, cf-plot and other required packages: import cfplot as cfp -import cf - import matplotlib.pyplot as plt import scipy.stats.mstats as mstats +import cf + # %% # 2. Read the data in and unpack the Fields from FieldLists using indexing. # In our example We are investigating the influence of the land height on @@ -62,7 +62,7 @@ # unitless fraction, but the values are in the tens, so we need to # normalise these to all lie between 0 and 1 and change the units # appropriately: -sub_snow = ((sub_snow - sub_snow.minimum()) / (sub_snow.range())) +sub_snow = (sub_snow - sub_snow.minimum()) / (sub_snow.range()) sub_snow.override_units("1", inplace=True) # %% @@ -93,7 +93,9 @@ # and its strength visually. We use 'gpos' to position the plots in two # columns and apply some specific axes ticks and labels for clarity. cfp.gopen( - rows=1, columns=2, top=0.85, + rows=1, + columns=2, + top=0.85, file="snow_and_orog_on_same_grid.png", user_position=True, ) @@ -131,10 +133,12 @@ # Don't add extentions on the colourbar since it can only be 0 to 1 inclusive cfp.levs(min=0, max=1, step=0.1, extend="neither") cfp.cscale("precip_11lev", ncols=11, reverse=1) -cfp.con(sub_snow, lines=False, - title="Snow cover extent (from satellite imagery)", - colorbar_drawedges=False, - **label_info +cfp.con( + sub_snow, + lines=False, + title="Snow cover extent (from satellite imagery)", + colorbar_drawedges=False, + **label_info, ) cfp.gclose() diff --git a/docs/source/recipes/plot_20_recipe.py b/docs/source/recipes/plot_20_recipe.py index 11c3250842..1745652afc 100644 --- a/docs/source/recipes/plot_20_recipe.py +++ b/docs/source/recipes/plot_20_recipe.py @@ -10,6 +10,7 @@ # %% # 1. Import cf-python and cf-plot: import cfplot as cfp + import cf # %% @@ -81,7 +82,8 @@ cfp.mapset(resolution="10m") cfp.cscale("ncl_default") cfp.gopen( - file=f"irish-sea-currents-divergence-{chosen_time.replace(' ', '-')}.png") + file=f"irish-sea-currents-divergence-{chosen_time.replace(' ', '-')}.png" +) cfp.vect(u=u_2, v=v_2, stride=6, scale=3, key_length=1) cfp.con( div, @@ -89,6 +91,6 @@ title=( f"Depth-averaged Irish Sea currents at {chosen_time} with " "their divergence" - ) + ), ) cfp.gclose() From d6870d9596196c249c339996d7362a4e53ebb827 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Sat, 7 Dec 2024 12:10:13 +0000 Subject: [PATCH 36/51] dev --- cf/read_write/read.py | 102 ++++++++++++++++++++----------------- cf/read_write/um/umread.py | 7 ++- cf/umread_lib/umfile.py | 5 +- 3 files changed, 62 insertions(+), 52 deletions(-) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 5cbd9e9cd9..b240d78b74 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -6,6 +6,7 @@ from urllib.parse import urlparse import cfdm +from cfdm.read_write.exceptions import UnknownFileFormatError from cfdm.read_write.netcdf import NetCDFRead from ..aggregate import aggregate as cf_aggregate @@ -864,51 +865,52 @@ def _read_a_file( fmt = (fmt,) fmt = set(fmt) - - errors = [] - - try: - out = super().__new__( - cls, - filename, - external=external, - extra=extra, - verbose=verbose, - warnings=warnings, - mask=mask, - unpack=unpack, - warn_valid=warn_valid, - domain=domain, - storage_options=storage_options, - netcdf_backend=netcdf_backend, - dask_chunks=dask_chunks, - store_hdf5_chunks=store_hdf5_chunks, - cache=cache, - cfa=cfa, - cfa_write=cfa_write, - to_memory=to_memory, - squeeze=squeeze, - unsqueeze=unsqueeze, - fmt=fmt, - ignore_unknown_format=ignore_read_error, - ) - except RuntimeError as error: - if fmt is None or fmt.intersection(("UM",)): - # Set to None to indicate that we should try other - # file formats - errors.append(error) - out = None - else: - raise else: - if out or not ignore_read_error: - ftypes.append("netCDF") + fmt = set(("netCDF", "CDL", "UM")) + + file_format_errors = [] + + out = None + if fmt.intersection(("netCDF", "CDL")): + try: + out = super().__new__( + cls, + filename, + external=external, + extra=extra, + verbose=verbose, + warnings=warnings, + mask=mask, + unpack=unpack, + warn_valid=warn_valid, + domain=domain, + storage_options=storage_options, + netcdf_backend=netcdf_backend, + dask_chunks=dask_chunks, + store_hdf5_chunks=store_hdf5_chunks, + cache=cache, + cfa=cfa, + cfa_write=cfa_write, + to_memory=to_memory, + squeeze=squeeze, + unsqueeze=unsqueeze, + fmt=fmt, + ignore_unknown_format=ignore_read_error, + ) + except UnknownFileFormatError as error: + fmt.difference_update(("netCDF", "CDL")) + if fmt: + file_format_errors.append(error) + else: + raise else: - # Set to None to indicate that we should try other - # file formats - out = None + if out or not ignore_read_error: + # Zero or more fields/domains were successfully read + fmt = set() + file_format_errors = () + ftypes.append("netCDF") - if out is None: + if fmt.intersection(("UM",)): if not um: um = {} @@ -927,21 +929,25 @@ def _read_a_file( unsqueeze=unsqueeze, domain=domain, ) - except Exception as error: - errors.append(error) - errors = '\n'.join(map(str, errors)) - raise RuntimeError(f"\n{errors}") + except UnknownFileFormatError as error: + fmt.difference_update(("UM",)) + file_format_errors.append(error) else: if out or not ignore_read_error: + file_format_errors = () ftypes.append("UM") - + # UM fields are aggregated intrafile prior to # interfile aggregation if aggregate: # Set defaults specific to UM fields if "strict_units" not in aggregate_options: aggregate_options["relaxed_units"] = True - + + if file_format_errors: + file_format_errors = "\n".join(map(str, file_format_errors)) + raise UnknownFileFormatError(f"\n{file_format_errors}") + # Return the fields/domains if domain: return DomainList(out) diff --git a/cf/read_write/um/umread.py b/cf/read_write/um/umread.py index be0d126f71..058bb52d02 100644 --- a/cf/read_write/um/umread.py +++ b/cf/read_write/um/umread.py @@ -9,6 +9,7 @@ import dask.array as da import numpy as np from cfdm import Constructs, is_log_level_info +from cfdm.read_write.exceptions import UnknownFileFormatError from dask.array.core import getter, normalize_chunks from dask.base import tokenize from netCDF4 import date2num as netCDF4_date2num @@ -3590,13 +3591,15 @@ def _open_um_file( fmt=fmt, parse=parse, ) - except Exception as error: + except Exception: try: f.close_fd() except Exception: pass - raise Exception(error) + raise UnknownFileFormatError( + f"Can't open {filename} as a PP or UM dataset" + ) self._um_file = f return f diff --git a/cf/umread_lib/umfile.py b/cf/umread_lib/umfile.py index 9b41ad30f4..166c059ede 100644 --- a/cf/umread_lib/umfile.py +++ b/cf/umread_lib/umfile.py @@ -2,6 +2,7 @@ from functools import cmp_to_key import numpy +from cfdm.read_write.exceptions import UnknownFileFormatError from . import cInterface from .extraData import ExtraDataUnpacker @@ -132,8 +133,8 @@ def _detect_file_type(self): file_type_obj = c.detect_file_type(self.fd) except Exception: self.close_fd() - raise IOError( - f"Can't open file {self.path} as a PP or UM dataset" + raise UnknownFileFormatError( + f"Can't open {self.path} as a PP or UM dataset" ) d = c.file_type_obj_to_dict(file_type_obj) From 03ed6abb7327035f3c9d6975f1e06e6e82f0b6c0 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Sat, 7 Dec 2024 18:16:51 +0000 Subject: [PATCH 37/51] dev --- cf/read_write/read.py | 333 +++++++++++++----------------------------- 1 file changed, 103 insertions(+), 230 deletions(-) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index b240d78b74..c17cd49890 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -601,6 +601,23 @@ def __new__( aggregate_options["copy"] = False + # ------------------------------------------------------------ + # Parse the 'fmt' keyword parameter + # ------------------------------------------------------------ + if fmt: + if isinstance(fmt, str): + fmt = (fmt,) + + fmt = set(fmt) + else: + fmt = set(("netCDF", "CDL", "UM")) + + # ------------------------------------------------------------ + # Parse the 'um' keyword parameter + # ------------------------------------------------------------ + if not um: + um = {} + # Count the number of fields (in all files) and the number of # files field_counter = -1 @@ -646,49 +663,97 @@ def __new__( files2 = files3 + # How each file was read, as netCDF, or UM, etc. + ftypes = set() + for filename in files2: if info: logger.info(f"File: {filename}") # pragma: no cover - # -------------------------------------------------------- + # ---------------------------------------------------- # Read the file - # -------------------------------------------------------- - ftypes = [None] - file_contents = cls._read_a_file( - filename, - ftypes=ftypes, - external=external, - ignore_read_error=ignore_read_error, - verbose=verbose, - warnings=warnings, - aggregate=aggregate, - aggregate_options=aggregate_options, - fmt=fmt, - um=um, - extra=extra, - height_at_top_of_model=height_at_top_of_model, - dask_chunks=dask_chunks, - store_hdf5_chunks=store_hdf5_chunks, - mask=mask, - unpack=unpack, - warn_valid=warn_valid, - select=select, - domain=domain, - cfa=cfa, - cfa_write=cfa_write, - to_memory=to_memory, - netcdf_backend=netcdf_backend, - storage_options=storage_options, - cache=cache, - squeeze=squeeze, - unsqueeze=unsqueeze, - ) + # ---------------------------------------------------- + fmts = fmt.copy() + file_format_errors = [] + + if fmts.intersection(("netCDF", "CDL")): + try: + file_contents = super().__new__( + cls, + filename, + external=external, + extra=extra, + verbose=verbose, + warnings=warnings, + mask=mask, + unpack=unpack, + warn_valid=warn_valid, + domain=domain, + storage_options=storage_options, + netcdf_backend=netcdf_backend, + dask_chunks=dask_chunks, + store_hdf5_chunks=store_hdf5_chunks, + cache=cache, + cfa=cfa, + cfa_write=cfa_write, + to_memory=to_memory, + squeeze=squeeze, + unsqueeze=unsqueeze, + fmt=fmt, + ignore_unknown_format=ignore_read_error, + ) + except UnknownFileFormatError as error: + fmts.difference_update(("netCDF", "CDL")) + file_format_errors.append(error) + else: + file_format_errors = () + if file_contents or not ignore_read_error: + # Zero or more fields/domains were + # successfully read + fmts = set() + ftype = "netCDF" + + if fmts.intersection(("UM",)): + try: + file_contents = cls.um.read( + filename, + um_version=um.get("version"), + verbose=verbose, + set_standard_name=False, + height_at_top_of_model=height_at_top_of_model, + fmt=um.get("fmt"), + word_size=um.get("word_size"), + endian=um.get("endian"), + select=select, + squeeze=squeeze, + unsqueeze=unsqueeze, + domain=domain, + ) + except UnknownFileFormatError as error: + fmts.difference_update(("UM",)) + file_format_errors.append(error) + else: + file_format_errors = () + if file_contents or not ignore_read_error: + fmts = set() + ftype = "UM" + + if file_format_errors: + error = "\n".join(map(str, file_format_errors)) + raise UnknownFileFormatError(f"\n{error}") + + if domain: + file_contents = DomainList(file_contents) + + file_contents = FieldList(file_contents) + + ftypes.add(ftype) # -------------------------------------------------------- # Select matching fields (only from netCDF files at # this stage - we'll do UM fields later) # -------------------------------------------------------- - if select and ftypes[-1] == "netCDF": + if select and ftype == "netCDF": file_contents = file_contents.select_by_identity(*select) # -------------------------------------------------------- @@ -712,6 +777,11 @@ def __new__( if aggregate and len(out) > 1: org_len = len(out) # pragma: no cover + if "UM" in ftypes: + # Set defaults specific to UM fields + if "strict_units" not in aggregate_options: + aggregate_options["relaxed_units"] = True + out = cf_aggregate(out, **aggregate_options) n = len(out) # pragma: no cover @@ -756,200 +826,3 @@ def __new__( def _plural(n): # pragma: no cover """Return a suffix which reflects a word's plural.""" return "s" if n != 1 else "" # pragma: no cover - - @classmethod - @_manage_log_level_via_verbosity - def _read_a_file( - cls, - filename, - ftypes=None, - aggregate=True, - aggregate_options=None, - ignore_read_error=False, - verbose=None, - warnings=False, - external=None, - fmt=None, - um=None, - extra=None, - height_at_top_of_model=None, - mask=True, - unpack=True, - warn_valid=False, - dask_chunks="storage-aligned", - store_hdf5_chunks=True, - select=None, - domain=False, - cfa=None, - cfa_write=None, - to_memory=None, - netcdf_backend=None, - storage_options=None, - cache=True, - squeeze=False, - unsqueeze=False, - ): - """Read the contents of a single file into a field list. - - :Parameters: - - filename: `str` - See `cf.read` for details. - - ftypes: `str` TODOCFA - The file format to interpret the file. Recognised formats are - ``'netCDF'``, ``'CDL'``, ``'UM'`` and ``'PP'``. - - aggregate_options: `dict`, optional - See `cf.read` for details. - - ignore_read_error: `bool`, optional - See `cf.read` for details. - - mask: `bool`, optional - See `cf.read` for details. - - unpack: `bool`, optional - See `cf.read` for details. - - verbose: `int` or `str` or `None`, optional - See `cf.read` for details. - - select: optional - For `read. Ignored for a netCDF file. - - domain: `bool`, optional - See `cf.read` for details. - - cfa: `dict`, optional - See `cf.read` for details. - - .. versionadded:: 3.15.0 - - storage_options: `dict` or `None`, optional - See `cf.read` for details. - - .. versionadded:: NEXTVERSION - - netcdf_backend: `str` or `None`, optional - See `cf.read` for details. - - .. versionadded:: NEXTVERSION - - cache: `bool`, optional - See `cf.read` for details. - - .. versionadded:: NEXTVERSION - - squeeze: `bool`, optional - Whether or not to remove all size 1 axes from field - construct data arrays. See `cf.read` for details. - - .. versionadded:: NEXTVERSION - - unsqueeze: `bool`, optional - Whether or not to ensure that all size 1 axes are - spanned by field construct data arrays. See - `cf.read` for details. - - .. versionadded:: NEXTVERSION - - :Returns: - - `FieldList` or `DomainList` - The field or domain constructs in the dataset. - - """ - if fmt: - if isinstance(fmt, str): - fmt = (fmt,) - - fmt = set(fmt) - else: - fmt = set(("netCDF", "CDL", "UM")) - - file_format_errors = [] - - out = None - if fmt.intersection(("netCDF", "CDL")): - try: - out = super().__new__( - cls, - filename, - external=external, - extra=extra, - verbose=verbose, - warnings=warnings, - mask=mask, - unpack=unpack, - warn_valid=warn_valid, - domain=domain, - storage_options=storage_options, - netcdf_backend=netcdf_backend, - dask_chunks=dask_chunks, - store_hdf5_chunks=store_hdf5_chunks, - cache=cache, - cfa=cfa, - cfa_write=cfa_write, - to_memory=to_memory, - squeeze=squeeze, - unsqueeze=unsqueeze, - fmt=fmt, - ignore_unknown_format=ignore_read_error, - ) - except UnknownFileFormatError as error: - fmt.difference_update(("netCDF", "CDL")) - if fmt: - file_format_errors.append(error) - else: - raise - else: - if out or not ignore_read_error: - # Zero or more fields/domains were successfully read - fmt = set() - file_format_errors = () - ftypes.append("netCDF") - - if fmt.intersection(("UM",)): - if not um: - um = {} - - try: - out = cls.um.read( - filename, - um_version=um.get("version"), - verbose=verbose, - set_standard_name=False, - height_at_top_of_model=height_at_top_of_model, - fmt=um.get("fmt"), - word_size=um.get("word_size"), - endian=um.get("endian"), - select=select, - squeeze=squeeze, - unsqueeze=unsqueeze, - domain=domain, - ) - except UnknownFileFormatError as error: - fmt.difference_update(("UM",)) - file_format_errors.append(error) - else: - if out or not ignore_read_error: - file_format_errors = () - ftypes.append("UM") - - # UM fields are aggregated intrafile prior to - # interfile aggregation - if aggregate: - # Set defaults specific to UM fields - if "strict_units" not in aggregate_options: - aggregate_options["relaxed_units"] = True - - if file_format_errors: - file_format_errors = "\n".join(map(str, file_format_errors)) - raise UnknownFileFormatError(f"\n{file_format_errors}") - - # Return the fields/domains - if domain: - return DomainList(out) - - return FieldList(out) From 8b127c1ea34589a41e24f503f2b2ae841169aeac Mon Sep 17 00:00:00 2001 From: David Hassell Date: Sun, 8 Dec 2024 11:55:05 +0000 Subject: [PATCH 38/51] dev --- cf/read_write/read.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index c17cd49890..2ee0b21b57 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -674,13 +674,15 @@ def __new__( # Read the file # ---------------------------------------------------- fmts = fmt.copy() + + # Record unknown file format errors file_format_errors = [] if fmts.intersection(("netCDF", "CDL")): try: file_contents = super().__new__( cls, - filename, + filename=filename, external=external, extra=extra, verbose=verbose, @@ -709,7 +711,9 @@ def __new__( file_format_errors = () if file_contents or not ignore_read_error: # Zero or more fields/domains were - # successfully read + # successfully read. Set 'fmts' to an + # empty set so that no other file formats + # are attempted. fmts = set() ftype = "netCDF" @@ -735,6 +739,10 @@ def __new__( else: file_format_errors = () if file_contents or not ignore_read_error: + # Zero or more fields/domains were + # successfully read. Set 'fmts' to an + # empty set so that no other file formats + # are attempted. fmts = set() ftype = "UM" From d6904b00cd0aae8ebed24025fe6f7e3bd99a862c Mon Sep 17 00:00:00 2001 From: David Hassell Date: Sun, 8 Dec 2024 12:50:33 +0000 Subject: [PATCH 39/51] dev --- cf/read_write/read.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 2ee0b21b57..af05669532 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -701,7 +701,7 @@ def __new__( to_memory=to_memory, squeeze=squeeze, unsqueeze=unsqueeze, - fmt=fmt, + file_type=fmt, ignore_unknown_format=ignore_read_error, ) except UnknownFileFormatError as error: From 4c586e3c2c6cf273cd8dc8feeadcbec2de51e394 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 9 Dec 2024 12:49:02 +0000 Subject: [PATCH 40/51] dev --- cf/read_write/read.py | 162 ++++++++++++++++++++++++------------- cf/read_write/um/umread.py | 12 ++- cf/test/test_read_write.py | 72 ++++++++++++++++- 3 files changed, 183 insertions(+), 63 deletions(-) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index af05669532..b74b06a5a1 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -6,7 +6,7 @@ from urllib.parse import urlparse import cfdm -from cfdm.read_write.exceptions import UnknownFileFormatError +from cfdm.read_write.exceptions import UnknownFileFormatError as FileTypeError from cfdm.read_write.netcdf import NetCDFRead from ..aggregate import aggregate as cf_aggregate @@ -183,7 +183,7 @@ class read(cfdm.read): are read. Sub-directories are not read unless the *recursive* parameter is True. If any directories contain files that are not valid datasets then an exception will - be raised, unless the *ignore_read_error* parameter is + be raised, unless the *ignore_unknown_type* parameter is True. As a special case, if the `cdl_string` parameter is set to @@ -199,18 +199,23 @@ class read(cfdm.read): {{read warnings: `bool`, optional}} - ignore_read_error: `bool`, optional - If True then ignore any file which raises an IOError - whilst being read, as would be the case for an empty file, - unknown file format, etc. By default the IOError is - raised. + {{read ignore_unknown_type: `bool`, optional}} - fmt: `str`, optional - Only read files of the given format, ignoring all other - files. Valid formats are ``'NETCDF'`` for CF-netCDF files, - ``'CFA'`` for CFA-netCDF files, ``'UM'`` for PP or UM - fields files, and ``'CDL'`` for CDL text files. By default - files of any of these formats are read. + .. versionadded:: NEXTVERSION + + {{read file_type: (sequence of) `str`, optional}} + + Valid files types are: + + ============ ============================================ + *file_type* Description + ============ ============================================ + ``'netCDF'`` Binary netCDF-3 or netCDF-4 file + ``'CDL'`` Text CDL representation of a netCDF file + ``'UM'`` UM fields file or PP file + ============ ============================================ + + .. versionadded:: NEXTVERSION cdl_string: `bool`, optional If True and the format to read is CDL, read a string @@ -420,6 +425,12 @@ class read(cfdm.read): chunks: deprecated at version NEXTVERSION Use the *dask_chunks* parameter instead. + fmt: deprecated at version NEXTVERSION + Use the *file_type* parameter instead. + + ignore_read_error: deprecated at version NEXTVERSION + Use the *ignore_unknown_type* parameter instead. + :Returns: `FieldList` or `DomainList` @@ -475,12 +486,12 @@ def __new__( external=None, verbose=None, warnings=False, - ignore_read_error=False, + ignore_unknown_type=False, aggregate=True, nfields=None, squeeze=False, unsqueeze=False, - fmt=None, + file_type=None, cdl_string=False, select=None, extra=None, @@ -505,6 +516,8 @@ def __new__( storage_options=None, cache=True, chunks="auto", + ignore_read_error=False, + fmt=None, ): """Read field or domain constructs from a dataset.""" if field: @@ -556,6 +569,24 @@ def __new__( removed_at="5.0.0", ) # pragma: no cover + if fmt is not None: + _DEPRECATION_ERROR_FUNCTION_KWARGS( + "cf.read", + {"fmt": fmt}, + "Use keyword 'file_type' instead.", + version="NEXTVERSION", + removed_at="5.0.0", + ) # pragma: no cover + + if ignore_read_error: + _DEPRECATION_ERROR_FUNCTION_KWARGS( + "cf.read", + {"ignore_read_error": ignore_read_error}, + "Use keyword 'ignore_unknown_type' instead.", + version="NEXTVERSION", + removed_at="5.0.0", + ) # pragma: no cover + cls.netcdf = NetCDFRead(cls.implementation) cls.um = UMRead(cls.implementation) @@ -566,8 +597,8 @@ def __new__( info = cfdm.is_log_level_info(logger) # Manage input parameters where contradictions are possible: - if cdl_string and fmt: - if fmt == "CDL": + if cdl_string and file_type: + if file_type == "CDL": if info: logger.info( "It is not necessary to set the cf.read fmt as " @@ -601,16 +632,16 @@ def __new__( aggregate_options["copy"] = False - # ------------------------------------------------------------ - # Parse the 'fmt' keyword parameter - # ------------------------------------------------------------ - if fmt: - if isinstance(fmt, str): - fmt = (fmt,) - - fmt = set(fmt) - else: - fmt = set(("netCDF", "CDL", "UM")) + ## ------------------------------------------------------------ + ## Parse the 'fmt' keyword parameter + ## ------------------------------------------------------------ + #if file_type: + # if isinstance(file_type, str): + # file_type = (file_type,) + # + # file_type = set(file_type) + #else: + # file_type = set(("netCDF", "CDL", "UM")) # ------------------------------------------------------------ # Parse the 'um' keyword parameter @@ -643,7 +674,7 @@ def __new__( # Glob files on disk files2 = glob(file_glob) - if not files2 and not ignore_read_error: + if not files2 and not ignore_unknown_type: open(file_glob, "rb") files3 = [] @@ -673,12 +704,14 @@ def __new__( # ---------------------------------------------------- # Read the file # ---------------------------------------------------- - fmts = fmt.copy() + file_types = file_type.copy() + ftype = None + file_contents = None # Record unknown file format errors file_format_errors = [] - - if fmts.intersection(("netCDF", "CDL")): + print ('---------', file_types) + if file_types.intersection(("netCDF", "CDL")): try: file_contents = super().__new__( cls, @@ -701,24 +734,29 @@ def __new__( to_memory=to_memory, squeeze=squeeze, unsqueeze=unsqueeze, - file_type=fmt, - ignore_unknown_format=ignore_read_error, + file_type=file_type, +# ignore_unknown_type=ignore_unknown_type, ) - except UnknownFileFormatError as error: - fmts.difference_update(("netCDF", "CDL")) - file_format_errors.append(error) + except FileTypeError as error: + if file_type is None: + file_format_errors.append(error) + + file_types.difference_update(("netCDF", "CDL")) else: - file_format_errors = () - if file_contents or not ignore_read_error: + file_format_errors = [] +# if file_contents or not ignore_unknown_type: # Zero or more fields/domains were - # successfully read. Set 'fmts' to an - # empty set so that no other file formats - # are attempted. - fmts = set() - ftype = "netCDF" - - if fmts.intersection(("UM",)): + # successfully read. Set 'file_types' to + # an empty set so that no other file + # formats are attempted. + file_types = set() + ftype = "netCDF" + + print ('here yyy',file_types, file_contents, file_format_errors) + if file_types.intersection(("UM",)): + print ('UM', filename) try: + print ('9999') file_contents = cls.um.read( filename, um_version=um.get("version"), @@ -732,30 +770,40 @@ def __new__( squeeze=squeeze, unsqueeze=unsqueeze, domain=domain, +# ignore_unknown_type=ignore_unknown_type, ) - except UnknownFileFormatError as error: - fmts.difference_update(("UM",)) - file_format_errors.append(error) + except FileTypeError as error: + if file_type is None: + file_format_errors.append(error) + +# print (1111111) + file_types.difference_update(("UM",)) +# file_format_errors.append(error) else: - file_format_errors = () - if file_contents or not ignore_read_error: + print (1111155511, file_contents) +# file_format_errors = [] +# if file_contents or not ignore_unknown_type: +# print ('bon') # Zero or more fields/domains were - # successfully read. Set 'fmts' to an - # empty set so that no other file formats - # are attempted. - fmts = set() - ftype = "UM" + # successfully read. Set 'file_types' to + # an empty set so that no other file + # formats are attempted. + file_format_errors = [] + file_types = set() + ftype = "UM" if file_format_errors: + print ('rrrr',file_format_errors, file_contents) error = "\n".join(map(str, file_format_errors)) - raise UnknownFileFormatError(f"\n{error}") + raise FileTypeError(f"\n{error}") if domain: file_contents = DomainList(file_contents) file_contents = FieldList(file_contents) - ftypes.add(ftype) + if ftype: + ftypes.add(ftype) # -------------------------------------------------------- # Select matching fields (only from netCDF files at diff --git a/cf/read_write/um/umread.py b/cf/read_write/um/umread.py index 058bb52d02..94fbd32658 100644 --- a/cf/read_write/um/umread.py +++ b/cf/read_write/um/umread.py @@ -29,9 +29,6 @@ from ...umread_lib.umfile import File from ...units import Units -# import numpy as np - - logger = logging.getLogger(__name__) _cached_runid = {} @@ -3393,6 +3390,7 @@ def read( squeeze=False, unsqueeze=False, domain=False, + ignore_unknown_type=False, ): """Read fields from a PP file or UM fields file. @@ -3526,7 +3524,13 @@ def read( else: byte_ordering = None +# try: f = self.file_open(filename, parse=True) +# except UnknownFileFormatError: +# if not ignore_unknown_type: + # raise +# +# return [] info = is_log_level_info(logger) @@ -3598,7 +3602,7 @@ def _open_um_file( pass raise UnknownFileFormatError( - f"Can't open {filename} as a PP or UM dataset" + f"Can't interpret {filename} as a PP or UM dataset" ) self._um_file = f diff --git a/cf/test/test_read_write.py b/cf/test/test_read_write.py index efd64056d3..7c1197a2af 100644 --- a/cf/test/test_read_write.py +++ b/cf/test/test_read_write.py @@ -9,6 +9,7 @@ import unittest import numpy as np +from cfdm.read_write.exceptions import UnknownFileFormatError faulthandler.enable() # to debug seg faults and timeouts @@ -725,7 +726,7 @@ def test_read_cdl_string(self): self.assertEqual(f_from_str[1], f_from_file[0]) # Check compatibility with the `fmt` kwarg. - f0 = cf.read(cdl_string_1, cdl_string=True, fmt="CDL") # fine + f0 = cf.read(cdl_string_1, cdl_string=True, file_type="CDL") self.assertEqual(len(f0), len(f_from_file)) self.assertEqual(f0[0], f_from_file[0]) # If the 'fmt' and 'cdl_string' values contradict each other, @@ -733,7 +734,7 @@ def test_read_cdl_string(self): # it then gets interpreted as NETCDF, so default fmt is fine and # it is tested in f_from_str above where fmt is not set. with self.assertRaises(ValueError): - f0 = cf.read(cdl_string_1, cdl_string=True, fmt="NETCDF") + f0 = cf.read(cdl_string_1, cdl_string=True, file_type="netCDF") # If the user forgets the cdl_string=True argument they will # accidentally attempt to create a file with a very long name @@ -874,6 +875,73 @@ def test_read_url(self): f = cf.read(remote) self.assertEqual(len(f), 1) + def test_read_file_type(self): + """Test the cf.read 'file_type' keyword.""" + # netCDF file + for file_type in ( + None, + "netCDF", + ("netCDF",), + ("netCDF", "CDL"), + ("netCDF", "CDL", "bad value"), + ): + f = cf.read(self.filename, file_type=file_type) + self.assertEqual(len(f), 1) + + for file_type in ("CDL", "bad value"): + f = cf.read(self.filename, file_type=file_type) + self.assertEqual(len(f), 0) + + # CDL file + subprocess.run( + " ".join(["ncdump", self.filename, ">", tmpfile]), + shell=True, + check=True, + ) + for file_type in ( + None, + "CDL", + ("netCDF", "CDL"), + ("netCDF", "CDL", "bad value"), + ): + f = cf.read(tmpfile, file_type=file_type) + self.assertEqual(len(f), 1) + + for file_type in ("netCDF", "bad value"): + f = cf.read(tmpfile, file_type=file_type) + self.assertEqual(len(f), 0) + + # UM file + for file_type in (None, "UM"): + print ('file_type=', file_type) + f = cf.read("umfile.pp", file_type=file_type) + self.assertEqual(len(f), 1) + + for file_type in ("netCDF", "bad value"): + print ('NNNNNNNNNNNNNNNNNN', file_type) + f = cf.read("umfile.pp", file_type=file_type) + self.assertEqual(len(f), 0) + + # Not a netCDF, CDL, or UM file + with self.assertRaises(UnknownFileFormatError): + f = cfdm.read("test_read_write.py") + + for file_type in ("netCDF", "CDL", "bad value"): + f = cfdm.read("test_read_write.py", file_type=file_type) + self.assertEqual(len(f), 0) + +# def test_read_ignore_unknown_type(self): +# """Test the cf.read 'ignore_unknown_type' keyword.""" +# # netCDF file +# f = cf.read(self.filename) +# +# # Unresocgnised type +# f = cf.read("test_read_write.py", ignore_unknown_type=True) +# self.assertEqual(len(f), 0) +# +# with self.assertRaises(UnknownFileFormatError): +# cf.read("test_read_write.py") + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) From 24435d92c59c341ff4b7844034bae8b0efb5fa50 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 10 Dec 2024 22:55:34 +0000 Subject: [PATCH 41/51] dev --- cf/read_write/read.py | 187 ++++++++++++++++--------------------- cf/read_write/um/umread.py | 23 +++-- cf/test/test_read_write.py | 137 +++++++++++---------------- cf/umread_lib/umfile.py | 4 +- 4 files changed, 155 insertions(+), 196 deletions(-) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index b74b06a5a1..2958bae9fe 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -6,7 +6,7 @@ from urllib.parse import urlparse import cfdm -from cfdm.read_write.exceptions import UnknownFileFormatError as FileTypeError +from cfdm.read_write.exceptions import FileTypeError from cfdm.read_write.netcdf import NetCDFRead from ..aggregate import aggregate as cf_aggregate @@ -199,20 +199,16 @@ class read(cfdm.read): {{read warnings: `bool`, optional}} - {{read ignore_unknown_type: `bool`, optional}} - - .. versionadded:: NEXTVERSION - {{read file_type: (sequence of) `str`, optional}} - Valid files types are: + Valid file types are: ============ ============================================ - *file_type* Description + file type Description ============ ============================================ - ``'netCDF'`` Binary netCDF-3 or netCDF-4 file - ``'CDL'`` Text CDL representation of a netCDF file - ``'UM'`` UM fields file or PP file + ``'netCDF'`` Binary netCDF-3 or netCDF-4 files + ``'CDL'`` Text CDL representations of netCDF files + ``'UM'`` UM fields files or PP files ============ ============================================ .. versionadded:: NEXTVERSION @@ -429,7 +425,7 @@ class read(cfdm.read): Use the *file_type* parameter instead. ignore_read_error: deprecated at version NEXTVERSION - Use the *ignore_unknown_type* parameter instead. + Use the *file_type* parameter instead. :Returns: @@ -486,7 +482,6 @@ def __new__( external=None, verbose=None, warnings=False, - ignore_unknown_type=False, aggregate=True, nfields=None, squeeze=False, @@ -582,48 +577,25 @@ def __new__( _DEPRECATION_ERROR_FUNCTION_KWARGS( "cf.read", {"ignore_read_error": ignore_read_error}, - "Use keyword 'ignore_unknown_type' instead.", + "Use keyword 'file_type' instead.", version="NEXTVERSION", removed_at="5.0.0", ) # pragma: no cover + info = cfdm.is_log_level_info(logger) + cls.netcdf = NetCDFRead(cls.implementation) cls.um = UMRead(cls.implementation) - # Parse select + # ------------------------------------------------------------ + # Parse the 'select' keyword parameter + # ------------------------------------------------------------ if isinstance(select, (str, Query, Pattern)): select = (select,) - info = cfdm.is_log_level_info(logger) - - # Manage input parameters where contradictions are possible: - if cdl_string and file_type: - if file_type == "CDL": - if info: - logger.info( - "It is not necessary to set the cf.read fmt as " - "'CDL' when cdl_string is True, since that implies " - "CDL is the format." - ) # pragma: no cover - else: - raise ValueError( - "cdl_string can only be True when the format is CDL, " - "though fmt is ignored in that case so there is no " - "need to set it." - ) - - if follow_symlinks and not recursive: - raise ValueError( - f"Can't set follow_symlinks={follow_symlinks!r} " - f"when recursive={recursive!r}" - ) - - # Initialise the output list of fields/domains - if domain: - out = DomainList() - else: - out = FieldList() - + # ------------------------------------------------------------ + # Parse the 'aggregate' keyword parameter + # ------------------------------------------------------------ if isinstance(aggregate, dict): aggregate_options = aggregate.copy() aggregate = True @@ -632,16 +604,16 @@ def __new__( aggregate_options["copy"] = False - ## ------------------------------------------------------------ - ## Parse the 'fmt' keyword parameter - ## ------------------------------------------------------------ - #if file_type: - # if isinstance(file_type, str): - # file_type = (file_type,) - # - # file_type = set(file_type) - #else: - # file_type = set(("netCDF", "CDL", "UM")) + # ------------------------------------------------------------ + # Parse the 'file_type' keyword parameter + # ------------------------------------------------------------ + netCDF_file_types = set(("netCDF", "CDL")) + UM_file_types = set(("UM",)) + if file_type is not None: + if isinstance(file_type, str): + file_type = (file_type,) + + file_type = set(file_type) # ------------------------------------------------------------ # Parse the 'um' keyword parameter @@ -649,6 +621,28 @@ def __new__( if not um: um = {} + # ------------------------------------------------------------ + # Parse the 'cdl_string' keyword parameter + # ------------------------------------------------------------ + if cdl_string and file_type is not None: + raise ValueError("Can't set file_type when cdl_string=True") + + # ------------------------------------------------------------ + # Parse the 'follow_symlinks' and 'recursive' keyword + # parameters + # ------------------------------------------------------------ + if follow_symlinks and not recursive: + raise ValueError( + f"Can't set follow_symlinks={follow_symlinks!r} " + f"when recursive={recursive!r}" + ) + + # Initialise the output list of fields/domains + if domain: + out = DomainList() + else: + out = FieldList() + # Count the number of fields (in all files) and the number of # files field_counter = -1 @@ -661,6 +655,7 @@ def __new__( files = [ NetCDFRead.string_to_cdl(cdl_string) for cdl_string in files ] + file_type = set(("CDL",)) for file_glob in flat(files): # Expand variables @@ -674,8 +669,9 @@ def __new__( # Glob files on disk files2 = glob(file_glob) - if not files2 and not ignore_unknown_type: - open(file_glob, "rb") + if not files2: + # Trigger a FileNotFoundError error + open(file_glob) files3 = [] for x in files2: @@ -694,7 +690,7 @@ def __new__( files2 = files3 - # How each file was read, as netCDF, or UM, etc. + # The types of all of the input files ftypes = set() for filename in files2: @@ -704,14 +700,19 @@ def __new__( # ---------------------------------------------------- # Read the file # ---------------------------------------------------- - file_types = file_type.copy() + file_contents = [] + + # The type of this file ftype = None - file_contents = None - # Record unknown file format errors + # Record file type errors file_format_errors = [] - print ('---------', file_types) - if file_types.intersection(("netCDF", "CDL")): + + if ftype is None and ( + file_type is None + or file_type.intersection(netCDF_file_types) + ): + # Try to read as netCDF try: file_contents = super().__new__( cls, @@ -735,28 +736,19 @@ def __new__( squeeze=squeeze, unsqueeze=unsqueeze, file_type=file_type, -# ignore_unknown_type=ignore_unknown_type, ) except FileTypeError as error: if file_type is None: file_format_errors.append(error) - - file_types.difference_update(("netCDF", "CDL")) else: file_format_errors = [] -# if file_contents or not ignore_unknown_type: - # Zero or more fields/domains were - # successfully read. Set 'file_types' to - # an empty set so that no other file - # formats are attempted. - file_types = set() ftype = "netCDF" - - print ('here yyy',file_types, file_contents, file_format_errors) - if file_types.intersection(("UM",)): - print ('UM', filename) + + if ftype is None and ( + file_type is None or file_type.intersection(UM_file_types) + ): + # Try to read as UM try: - print ('9999') file_contents = cls.um.read( filename, um_version=um.get("version"), @@ -770,30 +762,16 @@ def __new__( squeeze=squeeze, unsqueeze=unsqueeze, domain=domain, -# ignore_unknown_type=ignore_unknown_type, + file_type=file_type, ) except FileTypeError as error: if file_type is None: file_format_errors.append(error) - -# print (1111111) - file_types.difference_update(("UM",)) -# file_format_errors.append(error) else: - print (1111155511, file_contents) -# file_format_errors = [] -# if file_contents or not ignore_unknown_type: -# print ('bon') - # Zero or more fields/domains were - # successfully read. Set 'file_types' to - # an empty set so that no other file - # formats are attempted. file_format_errors = [] - file_types = set() ftype = "UM" if file_format_errors: - print ('rrrr',file_format_errors, file_contents) error = "\n".join(map(str, file_format_errors)) raise FileTypeError(f"\n{error}") @@ -805,28 +783,19 @@ def __new__( if ftype: ftypes.add(ftype) - # -------------------------------------------------------- - # Select matching fields (only from netCDF files at - # this stage - we'll do UM fields later) - # -------------------------------------------------------- + # Select matching fields (only for netCDF files at + # this stage - we'll other it for other file types + # later) if select and ftype == "netCDF": file_contents = file_contents.select_by_identity(*select) - # -------------------------------------------------------- - # Add this file's contents to that already read from other - # files - # -------------------------------------------------------- + # Add this file's contents to that already read from + # other files out.extend(file_contents) field_counter = len(out) file_counter += 1 - if info: - logger.info( - f"Read {field_counter} field{cls._plural(field_counter)} " - f"from {file_counter} file{cls._plural(file_counter)}" - ) # pragma: no cover - # ---------------------------------------------------------------- # Aggregate the output fields/domains # ---------------------------------------------------------------- @@ -863,12 +832,18 @@ def __new__( del f._custom["standard_name"] # ---------------------------------------------------------------- - # Select matching fields from UM/PP fields (post setting of + # Select matching fields from UM files (post setting of their # standard names) # ---------------------------------------------------------------- if select and "UM" in ftypes: out = out.select_by_identity(*select) + if info: + logger.info( + f"Read {field_counter} field{cls._plural(field_counter)} " + f"from {file_counter} file{cls._plural(file_counter)}" + ) # pragma: no cover + if nfields is not None and len(out) != nfields: raise ValueError( f"{nfields} field{cls._plural(nfields)} requested but " diff --git a/cf/read_write/um/umread.py b/cf/read_write/um/umread.py index 94fbd32658..0f19ec1380 100644 --- a/cf/read_write/um/umread.py +++ b/cf/read_write/um/umread.py @@ -9,7 +9,7 @@ import dask.array as da import numpy as np from cfdm import Constructs, is_log_level_info -from cfdm.read_write.exceptions import UnknownFileFormatError +from cfdm.read_write.exceptions import FileTypeError from dask.array.core import getter, normalize_chunks from dask.base import tokenize from netCDF4 import date2num as netCDF4_date2num @@ -3390,6 +3390,7 @@ def read( squeeze=False, unsqueeze=False, domain=False, + file_type=None, ignore_unknown_type=False, ): """Read fields from a PP file or UM fields file. @@ -3524,13 +3525,19 @@ def read( else: byte_ordering = None -# try: + # ------------------------------------------------------------ + # Parse the 'file_type' keyword parameter + # ------------------------------------------------------------ + if file_type is not None: + if isinstance(file_type, str): + file_type = (file_type,) + + file_type = set(file_type) + if not file_type.intersection(("UM",)): + # Return now if there are valid file types + return [] + f = self.file_open(filename, parse=True) -# except UnknownFileFormatError: -# if not ignore_unknown_type: - # raise -# -# return [] info = is_log_level_info(logger) @@ -3601,7 +3608,7 @@ def _open_um_file( except Exception: pass - raise UnknownFileFormatError( + raise FileTypeError( f"Can't interpret {filename} as a PP or UM dataset" ) diff --git a/cf/test/test_read_write.py b/cf/test/test_read_write.py index 7c1197a2af..2f73b94f52 100644 --- a/cf/test/test_read_write.py +++ b/cf/test/test_read_write.py @@ -9,7 +9,7 @@ import unittest import numpy as np -from cfdm.read_write.exceptions import UnknownFileFormatError +from cfdm.read_write.exceptions import FileTypeError faulthandler.enable() # to debug seg faults and timeouts @@ -43,11 +43,13 @@ def _remove_tmpfiles(): atexit.register(_remove_tmpfiles) +filename = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "test_file.nc" +) + class read_writeTest(unittest.TestCase): - filename = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "test_file.nc" - ) + filename = filename broken_bounds = os.path.join( os.path.dirname(os.path.abspath(__file__)), "broken_bounds.cdl" @@ -59,6 +61,7 @@ class read_writeTest(unittest.TestCase): chunk_sizes = (100000, 300) + f = cf.read(filename)[0] f0 = cf.example_field(0) f1 = cf.example_field(1) @@ -172,9 +175,8 @@ def test_read_directory(self): def test_read_select(self): # select on field list - f = cf.read(self.filename, select="eastward_wind") - g = cf.read(self.filename) - self.assertTrue(f.equals(g, verbose=2), "Bad read with select keyword") + f = cf.read(self.filename, select="eastward_wind")[0] + self.assertTrue(f.equals(self.f)) def test_read_squeeze(self): # select on field list @@ -189,7 +191,7 @@ def test_read_aggregate(self): cf.read(self.filename, aggregate={}) def test_read_extra(self): - # Test field keyword of cf.read + # Test 'extra' keyword of cf.read filename = self.filename f = cf.read(filename) @@ -256,7 +258,7 @@ def test_read_write_format(self): def test_write_netcdf_mode(self): """Test the `mode` parameter to `write`, notably append mode.""" - g = cf.read(self.filename) # note 'g' has one field + g = self.f.copy() # Test special case #1: attempt to append fields with groups # (other than 'root') which should be forbidden. Using fmt="NETCDF4" @@ -264,16 +266,16 @@ def test_write_netcdf_mode(self): # # Note: this is not the most natural test to do first, but putting # it before the rest reduces spurious seg faults for me, so... - g[0].nc_set_variable_groups(["forecast", "model"]) + g.nc_set_variable_groups(["forecast", "model"]) cf.write(g, tmpfile, fmt="NETCDF4", mode="w") # 1. overwrite to wipe f = cf.read(tmpfile) with self.assertRaises(ValueError): - cf.write(g[0], tmpfile, fmt="NETCDF4", mode="a") + cf.write(g, tmpfile, fmt="NETCDF4", mode="a") # Test special case #2: attempt to append fields with contradictory # featureType to the original file: - g[0].nc_clear_variable_groups() - g[0].nc_set_global_attribute("featureType", "profile") + g.nc_clear_variable_groups() + g.nc_set_global_attribute("featureType", "profile") cf.write( g, tmpfile, @@ -286,20 +288,20 @@ def test_write_netcdf_mode(self): with self.assertRaises(ValueError): cf.write(h, tmpfile, fmt="NETCDF4", mode="a") # Now remove featureType attribute for subsquent tests: - g_attrs = g[0].nc_clear_global_attributes() + g_attrs = g.nc_clear_global_attributes() del g_attrs["featureType"] - g[0].nc_set_global_attributes(g_attrs) + g.nc_set_global_attributes(g_attrs) # Set a non-trivial (i.e. not only 'Conventions') global attribute to # make the global attribute testing more robust: add_global_attr = ["remark", "A global comment."] - original_global_attrs = g[0].nc_global_attributes() + original_global_attrs = g.nc_global_attributes() original_global_attrs[add_global_attr[0]] = None # -> None on fields - g[0].nc_set_global_attribute(*add_global_attr) + g.nc_set_global_attribute(*add_global_attr) # First test a bad mode value: with self.assertRaises(ValueError): - cf.write(g[0], tmpfile, mode="g") + cf.write(g, tmpfile, mode="g") g_copy = g.copy() @@ -318,7 +320,7 @@ def test_write_netcdf_mode(self): new_length = 1 # since 1 == len(g) self.assertEqual(len(f), new_length) # Ignore as 'remark' should be 'None' on the field as tested below - self.assertTrue(f[0].equals(g[0], ignore_properties=["remark"])) + self.assertTrue(f[0].equals(g, ignore_properties=["remark"])) self.assertEqual( f[0].nc_global_attributes(), original_global_attrs ) @@ -536,11 +538,11 @@ def test_write_netcdf_mode(self): cf.write(g, tmpfile, fmt=fmt, mode="w") # 1. overwrite to wipe cf.write(g_copy, tmpfile, fmt=fmt, mode="a") # 2. now append f = cf.read(tmpfile) - self.assertEqual(len(f), 2 * len(g)) + self.assertEqual(len(f), 2) self.assertTrue( any( [ - file_field.equals(g[0], ignore_properties=["remark"]) + file_field.equals(g, ignore_properties=["remark"]) for file_field in f ] ) @@ -550,7 +552,7 @@ def test_write_netcdf_mode(self): ) def test_read_write_netCDF4_compress_shuffle(self): - f = cf.read(self.filename)[0] + f = self.f for fmt in ("NETCDF4", "NETCDF4_CLASSIC"): cf.write(f, tmpfile, fmt=fmt, compress=1, shuffle=True) g = cf.read(tmpfile)[0] @@ -560,7 +562,7 @@ def test_read_write_netCDF4_compress_shuffle(self): ) def test_write_datatype(self): - f = cf.read(self.filename)[0] + f = self.f self.assertEqual(f.dtype, np.dtype(float)) cf.write( f, @@ -572,7 +574,6 @@ def test_write_datatype(self): self.assertEqual(g.dtype, np.dtype("float32")) # Keyword single - f = cf.read(self.filename)[0] self.assertEqual(f.dtype, np.dtype(float)) cf.write(f, tmpfile, fmt="NETCDF4", single=True) g = cf.read(tmpfile)[0] @@ -671,7 +672,7 @@ def test_read_CDL(self): check=True, ) - f0 = cf.read(self.filename)[0] + f0 = self.f # Case (1) as above, so read in and check the fields are as should be f = cf.read(tmpfile)[0] @@ -696,6 +697,9 @@ def test_read_CDL(self): def test_read_cdl_string(self): """Test the `cdl_string` keyword of the `read` function.""" + f = self.f0 + cf.write(f, tmpfile0) + # Test CDL in full, header-only and coordinate-only type: tempfile_to_option_mapping = { tmpfile: None, @@ -705,36 +709,23 @@ def test_read_cdl_string(self): for tempf, option in tempfile_to_option_mapping.items(): # Set up the CDL string to test... - command_to_run = ["ncdump", self.filename, ">", tempf] + command_to_run = ["ncdump", tmpfile0, ">", tempf] if option: command_to_run.insert(1, option) + subprocess.run(" ".join(command_to_run), shell=True, check=True) - with open(tempf, "r") as file: - cdl_string_1 = file.read() - - # ... and now test it as an individual string input - f_from_str = cf.read(cdl_string_1, cdl_string=True) - f_from_file = cf.read(tempf) # len 1 so only one field to check - self.assertEqual(len(f_from_str), len(f_from_file)) - self.assertEqual(f_from_str[0], f_from_file[0]) - - # ... and test further by inputting it in duplicate as a sequence - f_from_str = cf.read([cdl_string_1, cdl_string_1], cdl_string=True) - f_from_file = cf.read(tempf) # len 1 so only one field to check - self.assertEqual(len(f_from_str), 2 * len(f_from_file)) - self.assertEqual(f_from_str[0], f_from_file[0]) - self.assertEqual(f_from_str[1], f_from_file[0]) - - # Check compatibility with the `fmt` kwarg. - f0 = cf.read(cdl_string_1, cdl_string=True, file_type="CDL") - self.assertEqual(len(f0), len(f_from_file)) - self.assertEqual(f0[0], f_from_file[0]) - # If the 'fmt' and 'cdl_string' values contradict each other, - # alert the user to this. Note that the default fmt is None but - # it then gets interpreted as NETCDF, so default fmt is fine and - # it is tested in f_from_str above where fmt is not set. + with open(tempf, "rt") as fh: + cdl_string_1 = fh.read() + + for cdl_input in (cdl_string_1, (cdl_string_1,)): + f_from_str = cf.read(cdl_input, cdl_string=True) + self.assertEqual(len(f_from_str), 1) + self.assertEqual(f_from_str[0], f) + + # Check compatibility with the 'file_type' kwarg. + for file_type in ("netCDF", "CDL", "UM", ()): with self.assertRaises(ValueError): - f0 = cf.read(cdl_string_1, cdl_string=True, file_type="netCDF") + cf.read(cdl_string_1, cdl_string=True, file_type=file_type) # If the user forgets the cdl_string=True argument they will # accidentally attempt to create a file with a very long name @@ -777,7 +768,7 @@ def test_read_broken_bounds(self): self.assertEqual(len(f), 2) def test_write_coordinates(self): - f = cf.example_field(0) + f = self.f0 cf.write(f, tmpfile, coordinates=True) g = cf.read(tmpfile) @@ -786,7 +777,7 @@ def test_write_coordinates(self): self.assertTrue(g[0].equals(f)) def test_read_write_domain(self): - f = cf.read(self.filename)[0] + f = self.f d = f.domain # 1 domain @@ -833,7 +824,7 @@ def test_read_write_domain(self): def test_write_omit_data(self): """Test the `omit_data` parameter to `write`.""" - f = cf.example_field(1) + f = self.f1 cf.write(f, tmpfile) cf.write(f, tmpfile, omit_data="all") @@ -865,12 +856,12 @@ def test_write_omit_data(self): self.assertTrue(np.ma.count(g.construct("grid_latitude").array)) @unittest.skipUnless( - False, "URL TEST: UNRELIABLE FLAKEY URL DESTINATION. TODO REPLACE URL" + True, "URL TEST: UNRELIABLE FLAKEY URL DESTINATION. TODO REPLACE URL" ) def test_read_url(self): """Test reading urls.""" for scheme in ("http", "https"): - remote = f"{scheme}://psl.noaa.gov/thredds/dodsC/Datasets/cru/crutem5/Monthlies/air.mon.anom.nobs.nc" + remote = f"{scheme}:///psl.noaa.gov/thredds/dodsC/Datasets/cru/crutem5/Monthlies/air.mon.anom.nobs.nc" # Check that cf can access it f = cf.read(remote) self.assertEqual(len(f), 1) @@ -883,12 +874,12 @@ def test_read_file_type(self): "netCDF", ("netCDF",), ("netCDF", "CDL"), - ("netCDF", "CDL", "bad value"), + ("netCDF", "bad value"), ): f = cf.read(self.filename, file_type=file_type) self.assertEqual(len(f), 1) - for file_type in ("CDL", "bad value"): + for file_type in ("CDL", "bad value", ()): f = cf.read(self.filename, file_type=file_type) self.assertEqual(len(f), 0) @@ -902,46 +893,32 @@ def test_read_file_type(self): None, "CDL", ("netCDF", "CDL"), - ("netCDF", "CDL", "bad value"), + ("CDL", "bad value"), ): f = cf.read(tmpfile, file_type=file_type) self.assertEqual(len(f), 1) - for file_type in ("netCDF", "bad value"): + for file_type in ("netCDF", "bad value", ()): f = cf.read(tmpfile, file_type=file_type) self.assertEqual(len(f), 0) # UM file - for file_type in (None, "UM"): - print ('file_type=', file_type) + for file_type in (None, "UM", ("UM",), ("UM", "bad value")): f = cf.read("umfile.pp", file_type=file_type) self.assertEqual(len(f), 1) - for file_type in ("netCDF", "bad value"): - print ('NNNNNNNNNNNNNNNNNN', file_type) + for file_type in ("netCDF", "bad value", ()): f = cf.read("umfile.pp", file_type=file_type) self.assertEqual(len(f), 0) # Not a netCDF, CDL, or UM file - with self.assertRaises(UnknownFileFormatError): - f = cfdm.read("test_read_write.py") + with self.assertRaises(FileTypeError): + f = cf.read("test_read_write.py") - for file_type in ("netCDF", "CDL", "bad value"): - f = cfdm.read("test_read_write.py", file_type=file_type) + for file_type in ("netCDF", "CDL", "bad value", ()): + f = cf.read("test_read_write.py", file_type=file_type) self.assertEqual(len(f), 0) -# def test_read_ignore_unknown_type(self): -# """Test the cf.read 'ignore_unknown_type' keyword.""" -# # netCDF file -# f = cf.read(self.filename) -# -# # Unresocgnised type -# f = cf.read("test_read_write.py", ignore_unknown_type=True) -# self.assertEqual(len(f), 0) -# -# with self.assertRaises(UnknownFileFormatError): -# cf.read("test_read_write.py") - if __name__ == "__main__": print("Run date:", datetime.datetime.now()) diff --git a/cf/umread_lib/umfile.py b/cf/umread_lib/umfile.py index 166c059ede..5f46e14db2 100644 --- a/cf/umread_lib/umfile.py +++ b/cf/umread_lib/umfile.py @@ -2,7 +2,7 @@ from functools import cmp_to_key import numpy -from cfdm.read_write.exceptions import UnknownFileFormatError +from cfdm.read_write.exceptions import FileTypeError from . import cInterface from .extraData import ExtraDataUnpacker @@ -133,7 +133,7 @@ def _detect_file_type(self): file_type_obj = c.detect_file_type(self.fd) except Exception: self.close_fd() - raise UnknownFileFormatError( + raise FileTypeError( f"Can't open {self.path} as a PP or UM dataset" ) From bd5e5084940c49e1ebe0ded0ac1a13dfa941a0f5 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 7 Feb 2025 16:43:27 +0000 Subject: [PATCH 42/51] PP unpack --- cf/read_write/read.py | 1 + cf/read_write/um/umread.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 9669226fbd..e589f08f6e 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -763,6 +763,7 @@ def __new__( unsqueeze=unsqueeze, domain=domain, file_type=file_type, + unpack=unpack, ) except DatasetTypeError as error: if file_type is None: diff --git a/cf/read_write/um/umread.py b/cf/read_write/um/umread.py index a0ee16ba2e..4dc3970d55 100644 --- a/cf/read_write/um/umread.py +++ b/cf/read_write/um/umread.py @@ -491,6 +491,7 @@ def __init__( info=False, squeeze=False, unsqueeze=False, + unpack=True, **kwargs, ): """**Initialisation** @@ -559,6 +560,17 @@ def __init__( .. versionadded:: NEXTVERSION + unpack: `bool`, optional + If True, the default, then unpack arrays by convention + when the data is read from disk. + + Unpacking is determined by netCDF conventions for the + following variable attributes ``add_offset`` and + ``scale_factor``, as applied to lookup header entries + BDATUM and BMKS repectively. + + .. versionadded:: NEXTVERSION + kwargs: *optional* Keyword arguments providing extra CF properties for each return field construct. @@ -579,6 +591,7 @@ def __init__( self.height_at_top_of_model = height_at_top_of_model self.byte_ordering = byte_ordering self.word_size = word_size + self.unpack = unpack self.atol = cf_atol() @@ -2025,6 +2038,7 @@ def create_data(self): klass_name = UMArray().__class__.__name__ fmt = self.fmt + unpack = self.unpack if len(recs) == 1: # -------------------------------------------------------- @@ -2050,6 +2064,7 @@ def create_data(self): word_size=self.word_size, byte_ordering=self.byte_ordering, attributes=attributes, + unpack=unpack, ) key = f"{klass_name}-{tokenize(subarray)}" @@ -2103,6 +2118,7 @@ def create_data(self): word_size=word_size, byte_ordering=byte_ordering, attributes=attributes, + unpack=unpack, ) key = f"{klass_name}-{tokenize(subarray)}" @@ -2153,6 +2169,7 @@ def create_data(self): word_size=word_size, byte_ordering=byte_ordering, attributes=attributes, + unpack=unpack, ) key = f"{klass_name}-{tokenize(subarray)}" @@ -3392,6 +3409,7 @@ def read( domain=False, file_type=None, ignore_unknown_type=False, + unpack=True, ): """Read fields from a PP file or UM fields file. @@ -3474,6 +3492,17 @@ def read( .. versionadded:: NEXTVERSION + unpack: `bool`, optional + If True, the default, then unpack arrays by convention + when the data is read from disk. + + Unpacking is determined by netCDF conventions for the + following variable attributes ``add_offset`` and + ``scale_factor``, as applied to lookup header entries + BDATUM and BMKS repectively. + + .. versionadded:: NEXTVERSION + :Returns: `list` @@ -3555,6 +3584,7 @@ def read( implementation=self.implementation, select=select, info=info, + unpack=unpack, ) for var in f.vars ] From 5d282ba1884acaa9097a49302cc43ad2a7acd8aa Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 17 Feb 2025 17:04:12 +0000 Subject: [PATCH 43/51] dev --- cf/data/array/mixin/arraymixin.py | 44 -- cf/data/array/mixin/cfamixin.py | 858 ------------------------------ cf/test/test_Field.py | 2 +- 3 files changed, 1 insertion(+), 903 deletions(-) delete mode 100644 cf/data/array/mixin/arraymixin.py delete mode 100644 cf/data/array/mixin/cfamixin.py diff --git a/cf/data/array/mixin/arraymixin.py b/cf/data/array/mixin/arraymixin.py deleted file mode 100644 index 4f83fa0d98..0000000000 --- a/cf/data/array/mixin/arraymixin.py +++ /dev/null @@ -1,44 +0,0 @@ -import numpy as np - -from ....units import Units - - -class ArrayMixin: - """Mixin class for a container of an array. - - .. versionadded:: 3.14.0 - - """ - - def __array_function__(self, func, types, args, kwargs): - """Implement the `numpy` ``__array_function__`` protocol. - - .. versionadded:: 3.14.0 - - """ - return NotImplemented - - @property - def _meta(self): - """Normalise the array to an appropriate Dask meta object. - - The Dask meta can be thought of as a suggestion to Dask. Dask - uses this meta to generate the task graph until it can infer - the actual metadata from the values. It does not force the - output to have the structure or dtype of the specified meta. - - .. versionadded:: 1.11.2.0 - - .. seealso:: `dask.utils.meta_from_array` - - """ - return np.array((), dtype=self.dtype) - - @property - def Units(self): - """The `cf.Units` object containing the units of the array. - - .. versionadded:: 3.14.0 - - """ - return Units(self.get_units(None), self.get_calendar(None)) diff --git a/cf/data/array/mixin/cfamixin.py b/cf/data/array/mixin/cfamixin.py deleted file mode 100644 index ecb1f82c2e..0000000000 --- a/cf/data/array/mixin/cfamixin.py +++ /dev/null @@ -1,858 +0,0 @@ -from copy import deepcopy -from functools import partial -from itertools import accumulate, product - -import numpy as np -from cfdm.data.utils import chunk_locations, chunk_positions - - -class CFAMixin: - """Mixin class for a CFA array. - - .. versionadded:: 1.11.2.0 - - """ - - def __new__(cls, *args, **kwargs): - """Store fragment array classes. - - .. versionadded:: 1.11.2.0 - - """ - # Import fragment array classes. Do this here (as opposed to - # outside the class) to avoid a circular import. - from ...fragment import ( - FullFragmentArray, - NetCDFFragmentArray, - UMFragmentArray, - ) - - instance = super().__new__(cls) - instance._FragmentArray = { - "nc": NetCDFFragmentArray, - "um": UMFragmentArray, - "full": FullFragmentArray, - } - return instance - - def __init__( - self, - filename=None, - address=None, - dtype=None, - mask=True, - unpack=True, - instructions=None, - substitutions=None, - term=None, - attributes=None, - storage_options=None, - source=None, - copy=True, - x=None, - ): - """**Initialisation** - - :Parameters: - - filename: (sequence of) `str`, optional - The name of the CFA file containing the array. If a - sequence then it must contain one element. - - address: (sequence of) `str`, optional - The name of the CFA aggregation variable for the - array. If a sequence then it must contain one element. - - dtype: `numpy.dtype` - The data type of the aggregated data array. May be - `None` if the numpy data-type is not known (which can - be the case for some string types, for example). - - mask: `bool` - If True (the default) then mask by convention when - reading data from disk. - - A array is masked depending on the values of any of - the variable attributes ``valid_min``, ``valid_max``, - ``valid_range``, ``_FillValue`` and ``missing_value``. - - {{init unpack: `bool`, optional}} - - .. versionadded:: 1.11.2.0 - - instructions: `str`, optional - The ``aggregated_data`` attribute value as found on - the CFA variable. If set then this will be used to - improve the performance of `__dask_tokenize__`. - - substitutions: `dict`, optional - A dictionary whose key/value pairs define text - substitutions to be applied to the fragment file - names. Each key must be specified with the ``${...}`` - syntax, for instance ``{'${base}': 'sub'}``. - - .. versionadded:: 3.15.0 - - term: `str`, optional - The name of a non-standard aggregation instruction - term from which the array is to be created, instead of - creating the aggregated data in the standard terms. If - set then *address* must be the name of the term's - aggregation instruction variable, which must be - defined on the fragment dimensions and no others. Each - value of the aggregation instruction variable will be - broadcast across the shape of the corresponding - fragment. - - *Parameter example:* - ``address='cfa_tracking_id', term='tracking_id'`` - - .. versionadded:: 3.15.0 - - storage_options: `dict` or `None`, optional - Key/value pairs to be passed on to the creation of - `s3fs.S3FileSystem` file systems to control the - opening of fragment files in S3 object stores. Ignored - for files not in an S3 object store, i.e. those whose - names do not start with ``s3:``. - - By default, or if `None`, then *storage_options* is - taken as ``{}``. - - If the ``'endpoint_url'`` key is not in - *storage_options* or is not in a dictionary defined by - the ``'client_kwargs`` key (which is always the case - when *storage_options* is `None`), then one will be - automatically inserted for accessing a fragment S3 - file. For example, for a file name of - ``'s3://store/data/file.nc'``, an ``'endpoint_url'`` - key with value ``'https://store'`` would be created. - - *Parameter example:* - ``{'key: 'scaleway-api-key...', 'secret': - 'scaleway-secretkey...', 'endpoint_url': - 'https://s3.fr-par.scw.cloud', 'client_kwargs': - {'region_name': 'fr-par'}}`` - - .. versionadded:: 1.11.2.0 - - {{init attributes: `dict` or `None`, optional}} - - If *attributes* is `None`, the default, then the - attributes will be set from the netCDF variable during - the first `__getitem__` call. - - .. versionaddedd:: 1.11.2.0 - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - units: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - calendar: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - """ - if source is not None: - super().__init__(source=source, copy=copy) - - try: - fragment_shape = source.get_fragment_shape() - except AttributeError: - fragment_shape = None - - try: - instructions = source._get_component("instructions") - except AttributeError: - instructions = None - - try: - aggregated_data = source.get_aggregated_data(copy=False) - except AttributeError: - aggregated_data = {} - - try: - substitutions = source.get_substitutions() - except AttributeError: - substitutions = None - - try: - term = source.get_term() - except AttributeError: - term = None - - elif filename is not None: - shape, fragment_shape, aggregated_data = self._parse_cfa( - x, term, substitutions - ) - super().__init__( - filename=filename, - address=address, - shape=shape, - dtype=dtype, - mask=mask, - attributes=attributes, - copy=copy, - ) - else: - super().__init__( - filename=filename, - address=address, - dtype=dtype, - mask=mask, - attributes=attributes, - copy=copy, - ) - - fragment_shape = None - aggregated_data = None - instructions = None - term = None - - self._set_component("fragment_shape", fragment_shape, copy=False) - self._set_component("aggregated_data", aggregated_data, copy=False) - self._set_component("instructions", instructions, copy=False) - self._set_component("term", term, copy=False) - - if substitutions is not None: - self._set_component( - "substitutions", substitutions.copy(), copy=False - ) - - def _parse_cfa(self, x, term, substitutions): - """Parse the CFA aggregation instructions. - - .. versionadded:: 1.11.2.0 - - :Parameters: - - x: `dict` - - term: `str` or `None` - The name of a non-standard aggregation instruction - term from which the array is to be created, instead of - creating the aggregated data in the standard - terms. Each value of the aggregation instruction - variable will be broadcast across the shape of the - corresponding fragment. - - substitutions: `dict` or `None` - A dictionary whose key/value pairs define text - substitutions to be applied to the fragment file - names. Each key must be specified with the ``${...}`` - syntax, for instance ``{'${base}': 'sub'}``. - - :Returns: - - 3-`tuple` - 1. The shape of the aggregated data. - 2. The shape of the array of fragments. - 3. The parsed aggregation instructions. - - """ - aggregated_data = {} - - location = x["location"] - ndim = location.shape[0] - compressed = np.ma.compressed - chunks = [compressed(i).tolist() for i in location] - shape = [sum(c) for c in chunks] - positions = chunk_positions(chunks) - locations = chunk_locations(chunks) - - if term is not None: - # -------------------------------------------------------- - # Each fragment contains a constant value, not file - # locations. - # -------------------------------------------------------- - term = x[term] - fragment_shape = term.shape - aggregated_data = { - frag_loc: { - "location": loc, - "fill_value": term[frag_loc].item(), - "format": "full", - } - for frag_loc, loc in zip(positions, locations) - } - else: - # -------------------------------------------------------- - # Each fragment contains file locations - # -------------------------------------------------------- - a = x["address"] - f = x["file"] - file_fmt = x["format"] - - extra_dimension = f.ndim > ndim - if extra_dimension: - # There is an extra non-fragment dimension - fragment_shape = f.shape[:-1] - else: - fragment_shape = f.shape - - if not a.ndim: - a = (a.item(),) - scalar_address = True - else: - scalar_address = False - - if not file_fmt.ndim: - file_fmt = file_fmt.item() - scalar_fmt = True - else: - scalar_fmt = False - - for frag_loc, location in zip(positions, locations): - if extra_dimension: - filename = compressed(f[frag_loc]).tolist() - if scalar_address: - address = a * len(filename) - else: - address = compressed(a[frag_loc].tolist()) - - if scalar_fmt: - fmt = file_fmt - else: - fmt = compressed(file_fmt[frag_loc]).tolist() - else: - filename = (f[frag_loc].item(),) - if scalar_address: - address = a - else: - address = (a[frag_loc].item(),) - - if scalar_fmt: - fmt = file_fmt - else: - fmt = file_fmt[frag_loc].item() - - aggregated_data[frag_loc] = { - "location": location, - "filename": filename, - "address": address, - "format": fmt, - } - - # Apply string substitutions to the fragment filenames - if substitutions: - for value in aggregated_data.values(): - filenames2 = [] - for filename in value["filename"]: - for base, sub in substitutions.items(): - filename = filename.replace(base, sub) - - filenames2.append(filename) - - value["filename"] = filenames2 - - return shape, fragment_shape, aggregated_data - - def __dask_tokenize__(self): - """Used by `dask.base.tokenize`. - - .. versionadded:: 3.14.0 - - """ - out = super().__dask_tokenize__() - aggregated_data = self._get_component("instructions", None) - if aggregated_data is None: - aggregated_data = self.get_aggregated_data(copy=False) - - return out + (aggregated_data,) - - def __getitem__(self, indices): - """x.__getitem__(indices) <==> x[indices]""" - return NotImplemented # pragma: no cover - - def get_aggregated_data(self, copy=True): - """Get the aggregation data dictionary. - - The aggregation data dictionary contains the definitions of - the fragments and the instructions on how to aggregate them. - The keys are indices of the CFA fragment dimensions, - e.g. ``(1, 0, 0 ,0)``. - - .. versionadded:: 3.14.0 - - :Parameters: - - copy: `bool`, optional - Whether or not to return a copy of the aggregation - dictionary. By default a deep copy is returned. - - .. warning:: If False then changing the returned - dictionary in-place will change the - aggregation dictionary stored in the - {{class}} instance, **as well as in any - copies of it**. - - :Returns: - - `dict` - The aggregation data dictionary. - - **Examples** - - >>> a.shape - (12, 1, 73, 144) - >>> a.get_fragment_shape() - (2, 1, 1, 1) - >>> a.get_aggregated_data() - {(0, 0, 0, 0): { - 'file': ('January-June.nc',), - 'address': ('temp',), - 'format': 'nc', - 'location': [(0, 6), (0, 1), (0, 73), (0, 144)]}, - (1, 0, 0, 0): { - 'file': ('July-December.nc',), - 'address': ('temp',), - 'format': 'nc', - 'location': [(6, 12), (0, 1), (0, 73), (0, 144)]}} - - """ - aggregated_data = self._get_component("aggregated_data") - if copy: - aggregated_data = deepcopy(aggregated_data) - - return aggregated_data - - def get_fragmented_dimensions(self): - """Get the positions of dimensions that have two or more fragments. - - .. versionadded:: 3.14.0 - - :Returns: - - `list` - The dimension positions. - - **Examples** - - >>> a.get_fragment_shape() - (20, 1, 40, 1) - >>> a.get_fragmented_dimensions() - [0, 2] - - >>> a.get_fragment_shape() - (1, 1, 1) - >>> a.get_fragmented_dimensions() - [] - - """ - return [ - i for i, size in enumerate(self.get_fragment_shape()) if size > 1 - ] - - def get_fragment_shape(self): - """Get the sizes of the fragment dimensions. - - The fragment dimension sizes are given in the same order as - the aggregated dimension sizes given by `shape`. - - .. versionadded:: 3.14.0 - - :Returns: - - `tuple` - The shape of the fragment dimensions. - - """ - return self._get_component("fragment_shape") - - def get_storage_options(self): - """Return `s3fs.S3FileSystem` options for accessing S3 fragment files. - - .. versionadded:: 1.11.2.0 - - :Returns: - - `dict` or `None` - The `s3fs.S3FileSystem` options. - - **Examples** - - >>> f.get_storage_options() - {} - - >>> f.get_storage_options() - {'anon': True} - - >>> f.get_storage_options() - {'key: 'scaleway-api-key...', - 'secret': 'scaleway-secretkey...', - 'endpoint_url': 'https://s3.fr-par.scw.cloud', - 'client_kwargs': {'region_name': 'fr-par'}} - - """ - return super().get_storage_options(create_endpoint_url=False) - - def get_term(self, default=ValueError()): - """The CFA aggregation instruction term for the data, if set. - - .. versionadded:: 3.15.0 - - :Parameters: - - default: optional - Return the value of the *default* parameter if the - term has not been set. If set to an `Exception` - instance then it will be raised instead. - - :Returns: - - `str` - The CFA aggregation instruction term name. - - """ - return self._get_component("term", default=default) - - def subarray_shapes(self, shapes): - """Create the subarray shapes. - - A fragmented dimension (i.e. one spanned by two or more - fragments) will always have a subarray size equal to the - size of each of its fragments, overriding any other size - implied by the *shapes* parameter. - - .. versionadded:: 3.14.0 - - .. seealso:: `subarrays` - - :Parameters: - - shapes: `int`, sequence, `dict` or `str`, optional - Define the subarray shapes. - - Any value accepted by the *chunks* parameter of the - `dask.array.from_array` function is allowed. - - The subarray sizes implied by *chunks* for a dimension - that has been fragmented are ignored, so their - specification is arbitrary. - - :Returns: - - `tuple` - The subarray sizes along each dimension. - - **Examples** - - >>> a.shape - (12, 1, 73, 144) - >>> a.get_fragment_shape() - (2, 1, 1, 1) - >>> a.fragmented_dimensions() - [0] - >>> a.subarray_shapes(-1) - ((6, 6), (1,), (73,), (144,)) - >>> a.subarray_shapes(None) - ((6, 6), (1,), (73,), (144,)) - >>> a.subarray_shapes("auto") - ((6, 6), (1,), (73,), (144,)) - >>> a.subarray_shapes((None, 1, 40, 50)) - ((6, 6), (1,), (40, 33), (50, 50, 44)) - >>> a.subarray_shapes((None, None, "auto", 50)) - ((6, 6), (1,), (73,), (50, 50, 44)) - >>> a.subarray_shapes({2: 40}) - ((6, 6), (1,), (40, 33), (144,)) - - """ - from numbers import Number - - from dask.array.core import normalize_chunks - - # Positions of fragmented dimensions (i.e. those spanned by - # two or more fragments) - f_dims = self.get_fragmented_dimensions() - - shape = self.shape - aggregated_data = self.get_aggregated_data(copy=False) - - # Create the base chunks. - chunks = [] - ndim = self.ndim - for dim, (n_fragments, size) in enumerate( - zip(self.get_fragment_shape(), self.shape) - ): - if dim in f_dims: - # This aggregated dimension is spanned by two or more - # fragments => set the chunks to be the same size as - # each fragment. - c = [] - index = [0] * ndim - for j in range(n_fragments): - index[dim] = j - loc = aggregated_data[tuple(index)]["location"][dim] - chunk_size = loc[1] - loc[0] - c.append(chunk_size) - - chunks.append(tuple(c)) - else: - # This aggregated dimension is spanned by exactly one - # fragment => store `None` for now. This will get - # overwritten from 'shapes'. - chunks.append(None) - - if isinstance(shapes, (str, Number)) or shapes is None: - chunks = [ - c if i in f_dims else shapes for i, c in enumerate(chunks) - ] - elif isinstance(shapes, dict): - chunks = [ - chunks[i] if i in f_dims else shapes.get(i, "auto") - for i, c in enumerate(chunks) - ] - else: - # chunks is a sequence - if len(shapes) != ndim: - raise ValueError( - f"Wrong number of 'shapes' elements in {shapes}: " - f"Got {len(shapes)}, expected {self.ndim}" - ) - - chunks = [ - c if i in f_dims else shapes[i] for i, c in enumerate(chunks) - ] - - return normalize_chunks(chunks, shape=shape, dtype=self.dtype) - - def subarrays(self, subarray_shapes): - """Return descriptors for every subarray. - - .. versionadded:: 3.14.0 - - .. seealso:: `subarray_shapes` - - :Parameters: - - subarray_shapes: `tuple` - The subarray sizes along each dimension, as returned - by a prior call to `subarray_shapes`. - - :Returns: - - 6-`tuple` of iterators - Each iterator iterates over a particular descriptor - from each subarray. - - 1. The indices of the aggregated array that correspond - to each subarray. - - 2. The shape of each subarray. - - 3. The indices of the fragment that corresponds to each - subarray (some subarrays may be represented by a - part of a fragment). - - 4. The location of each subarray. - - 5. The location on the fragment dimensions of the - fragment that corresponds to each subarray. - - 6. The shape of each fragment that overlaps each chunk. - - **Examples** - - An aggregated array with shape (12, 73, 144) has two - fragments, both with with shape (6, 73, 144). - - >>> a.shape - (12, 73, 144) - >>> a.get_fragment_shape() - (2, 1, 1) - >>> a.fragmented_dimensions() - [0] - >>> subarray_shapes = a.subarray_shapes({1: 40}) - >>> print(subarray_shapes) - ((6, 6), (40, 33), (144,)) - >>> ( - ... u_indices, - ... u_shapes, - ... f_indices, - ... s_locations, - ... f_locations, - ... f_shapes, - ... ) = a.subarrays(subarray_shapes) - >>> for i in u_indices: - ... print(i) - ... - (slice(0, 6, None), slice(0, 40, None), slice(0, 144, None)) - (slice(0, 6, None), slice(40, 73, None), slice(0, 144, None)) - (slice(6, 12, None), slice(0, 40, None), slice(0, 144, None)) - (slice(6, 12, None), slice(40, 73, None), slice(0, 144, None)) - - >>> for i in u_shapes - ... print(i) - ... - (6, 40, 144) - (6, 33, 144) - (6, 40, 144) - (6, 33, 144) - >>> for i in f_indices: - ... print(i) - ... - (slice(None, None, None), slice(0, 40, None), slice(0, 144, None)) - (slice(None, None, None), slice(40, 73, None), slice(0, 144, None)) - (slice(None, None, None), slice(0, 40, None), slice(0, 144, None)) - (slice(None, None, None), slice(40, 73, None), slice(0, 144, None)) - >>> for i in s_locations: - ... print(i) - ... - (0, 0, 0) - (0, 1, 0) - (1, 0, 0) - (1, 1, 0) - >>> for i in f_locations: - ... print(i) - ... - (0, 0, 0) - (0, 0, 0) - (1, 0, 0) - (1, 0, 0) - >>> for i in f_shapes: - ... print(i) - ... - (6, 73, 144) - (6, 73, 144) - (6, 73, 144) - (6, 73, 144) - - """ - f_dims = self.get_fragmented_dimensions() - - # The indices of the uncompressed array that correspond to - # each subarray, the shape of each uncompressed subarray, and - # the location of each subarray - s_locations = [] - u_shapes = [] - u_indices = [] - f_locations = [] - for dim, c in enumerate(subarray_shapes): - nc = len(c) - s_locations.append(tuple(range(nc))) - u_shapes.append(c) - - if dim in f_dims: - f_locations.append(tuple(range(nc))) - else: - # No fragmentation along this dimension - f_locations.append((0,) * nc) - - c = tuple(accumulate((0,) + c)) - u_indices.append([slice(i, j) for i, j in zip(c[:-1], c[1:])]) - - # For each subarray, the part of the fragment that corresponds - # to it. - f_indices = [ - (slice(None),) * len(u) if dim in f_dims else u - for dim, u in enumerate(u_indices) - ] - - # For each subarray, the shape of the fragment that - # corresponds to it. - f_shapes = [ - u_shape if dim in f_dims else (size,) * len(u_shape) - for dim, (u_shape, size) in enumerate(zip(u_shapes, self.shape)) - ] - - return ( - product(*u_indices), - product(*u_shapes), - product(*f_indices), - product(*s_locations), - product(*f_locations), - product(*f_shapes), - ) - - def to_dask_array(self, chunks="auto"): - """Create a dask array with `FragmentArray` chunks. - - .. versionadded:: 3.14.0 - - :Parameters: - - chunks: `int`, `tuple`, `dict` or `str`, optional - Specify the chunking of the returned dask array. - - Any value accepted by the *chunks* parameter of the - `dask.array.from_array` function is allowed. - - The chunk sizes implied by *chunks* for a dimension that - has been fragmented are ignored and replaced with values - that are implied by that dimensions fragment sizes. - - :Returns: - - `dask.array.Array` - - """ - import dask.array as da - from dask.array.core import getter - from dask.base import tokenize - - name = (f"{self.__class__.__name__}-{tokenize(self)}",) - - dtype = self.dtype - units = self.get_units(None) - calendar = self.get_calendar(None) - aggregated_data = self.get_aggregated_data(copy=False) - - # Set the chunk sizes for the dask array - chunks = self.subarray_shapes(chunks) - - fragment_arrays = self._FragmentArray - if not self.get_mask(): - fragment_arrays = fragment_arrays.copy() - fragment_arrays["nc"] = partial(fragment_arrays["nc"], mask=False) - - storage_options = self.get_storage_options() - - dsk = {} - for ( - u_indices, - u_shape, - f_indices, - chunk_location, - fragment_location, - fragment_shape, - ) in zip(*self.subarrays(chunks)): - kwargs = aggregated_data[fragment_location].copy() - kwargs.pop("location", None) - - fragment_format = kwargs.pop("format", None) - try: - FragmentArray = fragment_arrays[fragment_format] - except KeyError: - raise ValueError( - "Can't get FragmentArray class for unknown " - f"fragment dataset format: {fragment_format!r}" - ) - - if storage_options and kwargs["address"] == "nc": - # Pass on any file system options - kwargs["storage_options"] = storage_options - - fragment = FragmentArray( - dtype=dtype, - shape=fragment_shape, - aggregated_units=units, - aggregated_calendar=calendar, - **kwargs, - ) - - key = f"{fragment.__class__.__name__}-{tokenize(fragment)}" - dsk[key] = fragment - dsk[name + chunk_location] = ( - getter, - key, - f_indices, - False, - False, - ) - - # Return the dask array - return da.Array(dsk, name[0], chunks=chunks, dtype=dtype) diff --git a/cf/test/test_Field.py b/cf/test/test_Field.py index 1046ddad99..c1122ac338 100644 --- a/cf/test/test_Field.py +++ b/cf/test/test_Field.py @@ -2550,7 +2550,7 @@ def test_Field_percentile(self): # TODO: add loop to check get same shape and close enough data # for every possible axis combo (see also test_Data_percentile). - def test_Field__aaa_grad_xy(self): + def test_Field_grad_xy(self): f = cf.example_field(0) # theta=0 is at the north pole From 62c322d45196a2844d89c9c0217b7e9f0bb3253c Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 18 Feb 2025 16:29:16 +0000 Subject: [PATCH 44/51] fix up redundant files --- cf/data/array/cfah5netcdfarray.py | 10 - cf/data/array/cfanetcdf4array.py | 10 - cf/data/array/h5netcdfarray.py | 7 +- cf/data/array/mixin/activestoragemixin.py | 4 +- cf/data/array/netcdf4array.py | 9 +- cf/data/array/netcdfarray.py | 4 +- cf/data/array/umarray.py | 14 +- cf/data/collapse/collapse_active.py | 4 +- cf/data/collapse/dask_collapse.py | 2 +- cf/data/dask_utils.py | 4 +- cf/data/data.py | 2 +- cf/data/fragment/fragmentfilearray.py | 7 +- cf/data/fragment/fullfragmentarray.py | 91 ------- cf/data/fragment/h5netcdffragmentarray.py | 97 ------- cf/data/fragment/mixin/fragmentarraymixin.py | 258 ------------------- cf/data/fragment/netcdf4fragmentarray.py | 108 -------- cf/data/fragment/netcdffragmentarray.py | 239 ----------------- cf/data/fragment/umfragmentarray.py | 105 -------- 18 files changed, 24 insertions(+), 951 deletions(-) delete mode 100644 cf/data/array/cfah5netcdfarray.py delete mode 100644 cf/data/array/cfanetcdf4array.py delete mode 100644 cf/data/fragment/fullfragmentarray.py delete mode 100644 cf/data/fragment/h5netcdffragmentarray.py delete mode 100644 cf/data/fragment/mixin/fragmentarraymixin.py delete mode 100644 cf/data/fragment/netcdf4fragmentarray.py delete mode 100644 cf/data/fragment/netcdffragmentarray.py delete mode 100644 cf/data/fragment/umfragmentarray.py diff --git a/cf/data/array/cfah5netcdfarray.py b/cf/data/array/cfah5netcdfarray.py deleted file mode 100644 index 6e73d84bd9..0000000000 --- a/cf/data/array/cfah5netcdfarray.py +++ /dev/null @@ -1,10 +0,0 @@ -from .h5netcdfarray import H5netcdfArray -from .mixin import CFAMixin - - -class CFAH5netcdfArray(CFAMixin, H5netcdfArray): - """A CFA-netCDF array accessed with `h5netcdf` - - .. versionadded:: 1.11.2.0 - - """ diff --git a/cf/data/array/cfanetcdf4array.py b/cf/data/array/cfanetcdf4array.py deleted file mode 100644 index 475bb5fb28..0000000000 --- a/cf/data/array/cfanetcdf4array.py +++ /dev/null @@ -1,10 +0,0 @@ -from .mixin import CFAMixin -from .netcdf4array import NetCDF4Array - - -class CFANetCDF4Array(CFAMixin, NetCDF4Array): - """A CFA-netCDF array accessed with `netCDF4`. - - .. versionadded:: 1.11.2.0 - - """ diff --git a/cf/data/array/h5netcdfarray.py b/cf/data/array/h5netcdfarray.py index e5a1194316..8b7d7e8685 100644 --- a/cf/data/array/h5netcdfarray.py +++ b/cf/data/array/h5netcdfarray.py @@ -11,11 +11,6 @@ class H5netcdfArray( ): """A netCDF array accessed with `h5netcdf`. - **Active storage reductions** - - An active storage reduction may be enabled with the `actify` - method. See `cf.data.collapse.Collapse` for details. - - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 """ diff --git a/cf/data/array/mixin/activestoragemixin.py b/cf/data/array/mixin/activestoragemixin.py index 5666338871..eaf8e5bb99 100644 --- a/cf/data/array/mixin/activestoragemixin.py +++ b/cf/data/array/mixin/activestoragemixin.py @@ -1,7 +1,7 @@ class ActiveStorageMixin: """Mixin class for enabling active storage operations. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 """ @@ -12,7 +12,7 @@ def active_storage(self): Currently, active storage operations are allowed unless the data are numerically packed. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 :Returns: diff --git a/cf/data/array/netcdf4array.py b/cf/data/array/netcdf4array.py index 19a05beb60..49c2c05c50 100644 --- a/cf/data/array/netcdf4array.py +++ b/cf/data/array/netcdf4array.py @@ -9,11 +9,4 @@ class NetCDF4Array( Container, cfdm.NetCDF4Array, ): - """A netCDF array accessed with `netCDF4`. - - **Active storage reductions** - - An active storage reduction may be enabled with the `actify` - method. See `cf.data.collapse.Collapse` for details. - - """ + """A netCDF array accessed with `netCDF4`.""" diff --git a/cf/data/array/netcdfarray.py b/cf/data/array/netcdfarray.py index 6f56e2d930..67b497a78b 100644 --- a/cf/data/array/netcdfarray.py +++ b/cf/data/array/netcdfarray.py @@ -1,7 +1,7 @@ class NetCDFArray: """A netCDF array accessed with `netCDF4`. - Deprecated at version 1.11.2.0 and is no longer available. Use + Deprecated at version 3.16.3 and is no longer available. Use `cf.NetCDF4Array` instead. """ @@ -11,6 +11,6 @@ def __init__(self, *args, **kwargs): from ...functions import DeprecationError raise DeprecationError( - f"{self.__class__.__name__} was deprecated at version 1.11.2.0 " + f"{self.__class__.__name__} was deprecated at version 3.16.3 " "and is no longer available. Use cf.NetCDF4Array instead." ) diff --git a/cf/data/array/umarray.py b/cf/data/array/umarray.py index 3b37663240..b71a920f89 100644 --- a/cf/data/array/umarray.py +++ b/cf/data/array/umarray.py @@ -68,7 +68,7 @@ def __init__( already been set will be inferred from the lookup header and cached for future use. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 {{init source: optional}} @@ -91,11 +91,11 @@ def __init__( Deprecated at version 3.15.0. units: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the + Deprecated at version 3.16.3. Use the *attributes* parameter instead. calendar: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the + Deprecated at version 3.16.3. Use the *attributes* parameter instead. """ @@ -143,7 +143,7 @@ def __init__( def _get_array(self, index=None): """Returns a subspace of the dataset variable. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 .. seealso:: `__array__`, `index` @@ -243,7 +243,7 @@ def _get_rec(self, f, header_offset): def _set_FillValue(self, int_hdr, real_hdr, attributes): """Set the ``_FillValue`` attribute. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 :Parameters: @@ -281,8 +281,6 @@ def _set_units(self, int_hdr, attributes): .. versionadded:: 3.14.0 - .. versionadded:: 1.11.2.0 - :Parameters: int_hdr: `numpy.ndarray` @@ -339,7 +337,7 @@ def _set_units(self, int_hdr, attributes): def _set_unpack(self, int_hdr, real_hdr, attributes): """Set the ``add_offset`` and ``scale_factor`` attributes. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 :Parameters: diff --git a/cf/data/collapse/collapse_active.py b/cf/data/collapse/collapse_active.py index acf5e60b6e..51ac197076 100644 --- a/cf/data/collapse/collapse_active.py +++ b/cf/data/collapse/collapse_active.py @@ -40,7 +40,7 @@ def actify(method): can be done in active storage, or the active storage reduction failed) then the computations will be done locally "as usual". - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 .. seealso:: `active_chunk_function` @@ -92,7 +92,7 @@ def active_chunk_function(method, *args, **kwargs): reduction components, similar to that returned by a ``cf_*_chunk`` method, is returned. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 .. seealso:: `actify` diff --git a/cf/data/collapse/dask_collapse.py b/cf/data/collapse/dask_collapse.py index 412c91935f..a9245ff10e 100644 --- a/cf/data/collapse/dask_collapse.py +++ b/cf/data/collapse/dask_collapse.py @@ -1133,7 +1133,7 @@ def cf_sum_of_weights2_chunk( This function is passed to `dask.array.reduction` as its *chunk* parameter. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 :Parameters: diff --git a/cf/data/dask_utils.py b/cf/data/dask_utils.py index 4903d2ac2e..4c6923541d 100644 --- a/cf/data/dask_utils.py +++ b/cf/data/dask_utils.py @@ -413,7 +413,7 @@ def cf_units(a, from_units, to_units): def cf_is_masked(a): """Determine whether an array has masked values. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 :Parameters: @@ -436,7 +436,7 @@ def cf_is_masked(a): def cf_filled(a, fill_value=None): """Replace masked elements with a fill value. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 :Parameters: diff --git a/cf/data/data.py b/cf/data/data.py index 03cdcb33be..8962a9d9e2 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -3660,7 +3660,7 @@ def masked_where(self, condition, inplace=False): `masked_where` causes all delayed operations to be executed. - .. versionadded:: 1.11.2.0 + .. versionadded:: 3.16.3 .. seealso:: `mask`, `masked_values`, `where` diff --git a/cf/data/fragment/fragmentfilearray.py b/cf/data/fragment/fragmentfilearray.py index 0cb17b7394..b71aeb2460 100644 --- a/cf/data/fragment/fragmentfilearray.py +++ b/cf/data/fragment/fragmentfilearray.py @@ -1,7 +1,12 @@ import cfdm +from ...mixin_container import Container +from ..array.mixin import ActiveStorageMixin -class FragmentFileArray(cfdm.data.fragment.FragmentFileArray): + +class FragmentFileArray( + ActiveStorageMixin, Container, cfdm.data.fragment.FragmentFileArray +): """Fragment of aggregated data in a file. .. versionadded:: NEXTVERSION diff --git a/cf/data/fragment/fullfragmentarray.py b/cf/data/fragment/fullfragmentarray.py deleted file mode 100644 index 52760f24f7..0000000000 --- a/cf/data/fragment/fullfragmentarray.py +++ /dev/null @@ -1,91 +0,0 @@ -from ..array.fullarray import FullArray -from .mixin import FragmentArrayMixin - - -class FullFragmentArray(FragmentArrayMixin, FullArray): - """A CFA fragment array that is filled with a value. - - .. versionadded:: 3.15.0 - - """ - - def __init__( - self, - fill_value=None, - dtype=None, - shape=None, - aggregated_units=False, - aggregated_calendar=False, - attributes=None, - source=None, - copy=True, - ): - """**Initialisation** - - :Parameters: - - fill_value: scalar - The fill value. - - dtype: `numpy.dtype` - The data type of the aggregated array. May be `None` - if the numpy data-type is not known (which can be the - case for netCDF string types, for example). This may - differ from the data type of the netCDF fragment - variable. - - shape: `tuple` - The shape of the fragment within the aggregated - array. This may differ from the shape of the netCDF - fragment variable in that the latter may have fewer - size 1 dimensions. - - {{init attributes: `dict` or `None`, optional}} - - .. versionadded:: 1.11.2.0 - - {{aggregated_units: `str` or `None`, optional}} - - {{aggregated_calendar: `str` or `None`, optional}} - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - units: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - calendar: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - """ - super().__init__( - fill_value=fill_value, - dtype=dtype, - shape=shape, - attributes=attributes, - source=source, - copy=False, - ) - - if source is not None: - try: - aggregated_units = source._get_component( - "aggregated_units", False - ) - except AttributeError: - aggregated_units = False - - try: - aggregated_calendar = source._get_component( - "aggregated_calendar", False - ) - except AttributeError: - aggregated_calendar = False - - self._set_component("aggregated_units", aggregated_units, copy=False) - self._set_component( - "aggregated_calendar", aggregated_calendar, copy=False - ) diff --git a/cf/data/fragment/h5netcdffragmentarray.py b/cf/data/fragment/h5netcdffragmentarray.py deleted file mode 100644 index 99ac398b09..0000000000 --- a/cf/data/fragment/h5netcdffragmentarray.py +++ /dev/null @@ -1,97 +0,0 @@ -from ..array.h5netcdfarray import H5netcdfArray -from .mixin import FragmentArrayMixin - - -class H5netcdfFragmentArray(FragmentArrayMixin, H5netcdfArray): - """A netCDF fragment array accessed with `h5netcdf`. - - .. versionadded:: 1.11.2.0 - - """ - - def __init__( - self, - filename=None, - address=None, - dtype=None, - shape=None, - aggregated_units=False, - aggregated_calendar=False, - attributes=None, - storage_options=None, - source=None, - copy=True, - ): - """**Initialisation** - - :Parameters: - - filename: (sequence of `str`), optional - The names of the netCDF fragment files containing the - array. - - address: (sequence of `str`), optional - The name of the netCDF variable containing the - fragment array. Required unless *varid* is set. - - dtype: `numpy.dtype`, optional - The data type of the aggregated array. May be `None` - if the numpy data-type is not known (which can be the - case for netCDF string types, for example). This may - differ from the data type of the netCDF fragment - variable. - - shape: `tuple`, optional - The shape of the fragment within the aggregated - array. This may differ from the shape of the netCDF - fragment variable in that the latter may have fewer - size 1 dimensions. - - {{init attributes: `dict` or `None`, optional}} - - If *attributes* is `None`, the default, then the - attributes will be set from the netCDF variable during - the first `__getitem__` call. - - {{aggregated_units: `str` or `None`, optional}} - - {{aggregated_calendar: `str` or `None`, optional}} - - {{init storage_options: `dict` or `None`, optional}} - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - """ - super().__init__( - filename=filename, - address=address, - dtype=dtype, - shape=shape, - mask=True, - attributes=attributes, - storage_options=storage_options, - source=source, - copy=copy, - ) - - if source is not None: - try: - aggregated_units = source._get_component( - "aggregated_units", False - ) - except AttributeError: - aggregated_units = False - - try: - aggregated_calendar = source._get_component( - "aggregated_calendar", False - ) - except AttributeError: - aggregated_calendar = False - - self._set_component("aggregated_units", aggregated_units, copy=False) - self._set_component( - "aggregated_calendar", aggregated_calendar, copy=False - ) diff --git a/cf/data/fragment/mixin/fragmentarraymixin.py b/cf/data/fragment/mixin/fragmentarraymixin.py deleted file mode 100644 index e43caec626..0000000000 --- a/cf/data/fragment/mixin/fragmentarraymixin.py +++ /dev/null @@ -1,258 +0,0 @@ -from math import prod - -import numpy as np - -from ....units import Units - - -class FragmentArrayMixin: - """Mixin class for a CFA fragment array. - - .. versionadded:: 3.15.0 - - """ - - def _get_array(self, index=None): - """Returns a subspace of the dataset variable. - - .. versionadded:: 1.11.2.0 - - .. seealso:: `__array__`, `index` - - :Parameters: - - {{index: `tuple` or `None`, optional}} - - It is important that there is a distinct value for each - fragment dimension, which is guaranteed when the - default of the `index` attribute is being used. - - :Returns: - - `numpy.ndarray` - The subspace. - - """ - if index is None: - index = self.index() - - try: - array = super()._get_array(index) - except ValueError: - # A ValueError is expected to be raised when the fragment - # variable has fewer than 'self.ndim' dimensions (we know - # that this is the case because 'index' has 'self.ndim' - # elements). - axis = self._size_1_axis(index) - if axis is not None: - # There is a unique size 1 index that must correspond - # to the missing dimension => Remove it from the - # indices, get the fragment array with the new - # indices; and then insert the missing size one - # dimension. - index = list(index) - index.pop(axis) - array = super()._get_array(tuple(index)) - array = np.expand_dims(array, axis) - else: - # There are multiple size 1 indices so we don't know - # how many missing dimensions the fragment has, nor - # their positions => Get the full fragment array and - # then reshape it to the shape of the dask compute - # chunk; and then apply the index. - array = super()._get_array(Ellipsis) - if array.size > prod(self.original_shape): - raise ValueError( - f"Can't get CFA fragment data from ({self}) when " - "the fragment has two or more missing size 1 " - "dimensions, whilst also spanning two or more " - "Dask compute chunks." - "\n\n" - "Consider re-creating the data with exactly one " - "Dask compute chunk per fragment (e.g. by setting " - "'chunks=None' as a keyword to cf.read)." - ) - - array = array.reshape(self.original_shape) - array = array[index] - - array = self._conform_to_aggregated_units(array) - return array - - def _conform_to_aggregated_units(self, array): - """Conform the array to have the aggregated units. - - .. versionadded:: 3.15.0 - - :Parameters: - - array: `numpy.ndarray` or `dict` - The array to be conformed. If *array* is a `dict` with - `numpy` array values then selected values are - conformed. - - :Returns: - - `numpy.ndarray` or `dict` - The conformed array. The returned array may or may not - be the input array updated in-place, depending on its - data type and the nature of its units and the - aggregated units. - - If *array* is a `dict` then a dictionary of conformed - arrays is returned. - - """ - units = self.Units - if units: - aggregated_units = self.aggregated_Units - if not units.equivalent(aggregated_units): - raise ValueError( - f"Can't convert fragment data with units {units!r} to " - f"have aggregated units {aggregated_units!r}" - ) - - if units != aggregated_units: - if isinstance(array, dict): - # 'array' is a dictionary. - raise ValueError( - "TODOACTIVE. Placeholder notification that " - "we can't yet deal with active " - "storage reductions on CFA fragments." - ) - else: - # 'array' is a numpy array - array = Units.conform( - array, units, aggregated_units, inplace=True - ) - - return array - - def _size_1_axis(self, indices): - """Find the position of a unique size 1 index. - - .. versionadded:: 3.15.0 - - .. seealso:: `_parse_indices`, `__getitem__` - - :Paramealso:: `_parse_indices`, `__getitem__` - - :Parameters: - - indices: sequence of index - The array indices to be parsed, as returned by - `_parse_indices`. - - :Returns: - - `int` or `None` - The position of the unique size 1 index, or `None` if - there are zero or at least two of them. - - **Examples** - - >>> a._size_1_axis(([2, 4, 5], slice(0, 1), slice(0, 73))) - 1 - >>> a._size_1_axis(([2, 4, 5], slice(3, 4), slice(0, 73))) - 1 - >>> a._size_1_axis(([2, 4, 5], [0], slice(0, 73))) - 1 - >>> a._size_1_axis(([2, 4, 5], slice(0, 144), slice(0, 73))) - None - >>> a._size_1_axis(([2, 4, 5], slice(3, 7), [0, 1])) - None - >>> a._size_1_axis(([2, 4, 5], slice(0, 1), [0])) - None - - """ - original_shape = self.original_shape - if original_shape.count(1): - return original_shape.index(1) - - return - - @property - def aggregated_Units(self): - """The units of the aggregated data. - - .. versionadded:: 3.15.0 - - :Returns: - - `Units` - The units of the aggregated data. - - """ - return Units( - self.get_aggregated_units(), self.get_aggregated_calendar(None) - ) - - def get_aggregated_calendar(self, default=ValueError()): - """The calendar of the aggregated array. - - If the calendar is `None` then the CF default calendar is - assumed, if applicable. - - .. versionadded:: 3.15.0 - - :Parameters: - - default: optional - Return the value of the *default* parameter if the - aggregated calendar has not been set. If set to an - `Exception` instance then it will be raised instead. - - :Returns: - - `str` or `None` - The calendar value. - - """ - calendar = self._get_component("aggregated_calendar", False) - if calendar is False: - if default is None: - return - - return self._default( - default, - f"{self.__class__.__name__} 'aggregated_calendar' has not " - "been set", - ) - - return calendar - - def get_aggregated_units(self, default=ValueError()): - """The units of the aggregated array. - - If the units are `None` then the aggregated array has no - defined units. - - .. versionadded:: 3.15.0 - - .. seealso:: `get_aggregated_calendar` - - :Parameters: - - default: optional - Return the value of the *default* parameter if the - aggregated units have not been set. If set to an - `Exception` instance then it will be raised instead. - - :Returns: - - `str` or `None` - The units value. - - """ - units = self._get_component("aggregated_units", False) - if units is False: - if default is None: - return - - return self._default( - default, - f"{self.__class__.__name__} 'aggregated_units' have not " - "been set", - ) - - return units diff --git a/cf/data/fragment/netcdf4fragmentarray.py b/cf/data/fragment/netcdf4fragmentarray.py deleted file mode 100644 index f93a13dc18..0000000000 --- a/cf/data/fragment/netcdf4fragmentarray.py +++ /dev/null @@ -1,108 +0,0 @@ -from ..array.netcdf4array import NetCDF4Array -from .mixin import FragmentArrayMixin - - -class NetCDF4FragmentArray(FragmentArrayMixin, NetCDF4Array): - """A netCDF fragment array accessed with `netCDF4`. - - .. versionadded:: 1.11.2.0 - - """ - - def __init__( - self, - filename=None, - address=None, - dtype=None, - shape=None, - aggregated_units=False, - aggregated_calendar=False, - attributes=None, - storage_options=None, - source=None, - copy=True, - ): - """**Initialisation** - - :Parameters: - - filename: (sequence of `str`), optional - The names of the netCDF fragment files containing the - array. - - address: (sequence of `str`), optional - The name of the netCDF variable containing the - fragment array. Required unless *varid* is set. - - dtype: `numpy.dtype`, optional - The data type of the aggregated array. May be `None` - if the numpy data-type is not known (which can be the - case for netCDF string types, for example). This may - differ from the data type of the netCDF fragment - variable. - - shape: `tuple`, optional - The shape of the fragment within the aggregated - array. This may differ from the shape of the netCDF - fragment variable in that the latter may have fewer - size 1 dimensions. - - units: `str` or `None`, optional - The units of the fragment data. Set to `None` to - indicate that there are no units. If unset then the - units will be set during the first `__getitem__` call. - - calendar: `str` or `None`, optional - The calendar of the fragment data. Set to `None` to - indicate the CF default calendar, if applicable. If - unset then the calendar will be set during the first - `__getitem__` call. - - {{init attributes: `dict` or `None`, optional}} - - If *attributes* is `None`, the default, then the - attributes will be set from the netCDF variable during - the first `__getitem__` call. - - {{aggregated_units: `str` or `None`, optional}} - - {{aggregated_calendar: `str` or `None`, optional}} - - {{init storage_options: `dict` or `None`, optional}} - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - """ - super().__init__( - filename=filename, - address=address, - dtype=dtype, - shape=shape, - mask=True, - attributes=attributes, - storage_options=storage_options, - source=source, - copy=copy, - ) - - if source is not None: - try: - aggregated_units = source._get_component( - "aggregated_units", False - ) - except AttributeError: - aggregated_units = False - - try: - aggregated_calendar = source._get_component( - "aggregated_calendar", False - ) - except AttributeError: - aggregated_calendar = False - - self._set_component("aggregated_units", aggregated_units, copy=False) - self._set_component( - "aggregated_calendar", aggregated_calendar, copy=False - ) diff --git a/cf/data/fragment/netcdffragmentarray.py b/cf/data/fragment/netcdffragmentarray.py deleted file mode 100644 index cd24b07c9d..0000000000 --- a/cf/data/fragment/netcdffragmentarray.py +++ /dev/null @@ -1,239 +0,0 @@ -import cfdm - -from ..array.abstract import Array -from ..array.mixin import FileArrayMixin -from .h5netcdffragmentarray import H5netcdfFragmentArray -from .mixin import FragmentArrayMixin -from .netcdf4fragmentarray import NetCDF4FragmentArray - - -class NetCDFFragmentArray( - FragmentArrayMixin, - cfdm.data.mixin.NetCDFFileMixin, - FileArrayMixin, - cfdm.data.mixin.IndexMixin, - cfdm.data.mixin.FileArrayMixin, - Array, -): - """A netCDF fragment array. - - Access will be with either `netCDF4` or `h5netcdf`. - - .. versionadded:: 3.15.0 - - """ - - def __init__( - self, - filename=None, - address=None, - dtype=None, - shape=None, - aggregated_units=False, - aggregated_calendar=False, - attributes=None, - storage_options=None, - source=None, - copy=True, - ): - """**Initialisation** - - :Parameters: - - filename: (sequence of `str`), optional - The names of the netCDF fragment files containing the - array. - - address: (sequence of `str`), optional - The name of the netCDF variable containing the - fragment array. Required unless *varid* is set. - - dtype: `numpy.dtype`, optional - The data type of the aggregated array. May be `None` - if the numpy data-type is not known (which can be the - case for netCDF string types, for example). This may - differ from the data type of the netCDF fragment - variable. - - shape: `tuple`, optional - The shape of the fragment within the aggregated - array. This may differ from the shape of the netCDF - fragment variable in that the latter may have fewer - size 1 dimensions. - - {{init attributes: `dict` or `None`, optional}} - - If *attributes* is `None`, the default, then the - attributes will be set from the netCDF variable during - the first `__getitem__` call. - - .. versionadded:: 1.11.2.0 - - {{aggregated_units: `str` or `None`, optional}} - - {{aggregated_calendar: `str` or `None`, optional}} - - {{init storage_options: `dict` or `None`, optional}} - - .. versionadded:: 1.11.2.0 - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - units: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - calendar: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - """ - super().__init__( - source=source, - copy=copy, - ) - - if source is not None: - try: - shape = source._get_component("shape", None) - except AttributeError: - shape = None - - try: - filename = source._get_component("filename", None) - except AttributeError: - filename = None - - try: - address = source._get_component("address", None) - except AttributeError: - address = None - - try: - dtype = source._get_component("dtype", None) - except AttributeError: - dtype = None - - try: - attributes = source._get_component("attributes", None) - except AttributeError: - attributes = None - - try: - aggregated_units = source._get_component( - "aggregated_units", False - ) - except AttributeError: - aggregated_units = False - - try: - aggregated_calendar = source._get_component( - "aggregated_calendar", False - ) - except AttributeError: - aggregated_calendar = False - - try: - storage_options = source._get_component( - "storage_options", None - ) - except AttributeError: - storage_options = None - - if filename is not None: - if isinstance(filename, str): - filename = (filename,) - else: - filename = tuple(filename) - - self._set_component("filename", filename, copy=False) - - if address is not None: - if isinstance(address, int): - address = (address,) - else: - address = tuple(address) - - self._set_component("address", address, copy=False) - - if storage_options is not None: - self._set_component("storage_options", storage_options, copy=False) - - self._set_component("shape", shape, copy=False) - self._set_component("dtype", dtype, copy=False) - self._set_component("attributes", attributes, copy=False) - self._set_component("mask", True, copy=False) - - self._set_component("aggregated_units", aggregated_units, copy=False) - self._set_component( - "aggregated_calendar", aggregated_calendar, copy=False - ) - - # By default, close the file after data array access - self._set_component("close", True, copy=False) - - def _get_array(self, index=None): - """Returns a subspace of the dataset variable. - - The method acts as a factory for either a - `NetCDF4FragmentArray` or a `H5netcdfFragmentArray` class, and - it is the result of calling `!_get_array` on the newly created - instance that is returned. - - `H5netcdfFragmentArray` will only be used if - `NetCDF4FragmentArray` returns a `FileNotFoundError` exception. - - .. versionadded:: 1.11.2.0 - - .. seealso:: `__array__`, `index` - - :Parameters: - - {{index: `tuple` or `None`, optional}} - - It is important that there is a distinct value for each - fragment dimension, which is guaranteed when the - default of the `index` attribute is being used. - - :Returns: - - `numpy.ndarray` - The subspace. - - """ - kwargs = { - "dtype": self.dtype, - "shape": self.shape, - "aggregated_units": self.get_aggregated_units(None), - "aggregated_calendar": self.get_aggregated_calendar(None), - "attributes": self.get_attributes(None), - "copy": False, - } - - # Loop round the files, returning as soon as we find one that - # is accessible. - filenames = self.get_filenames() - for filename, address in zip(filenames, self.get_addresses()): - kwargs["filename"] = filename - kwargs["address"] = address - kwargs["storage_options"] = self.get_storage_options( - create_endpoint_url=False - ) - - try: - return NetCDF4FragmentArray(**kwargs)._get_array(index) - except FileNotFoundError: - pass - except Exception: - return H5netcdfFragmentArray(**kwargs)._get_array(index) - - # Still here? - if not filenames: - raise FileNotFoundError("No fragment files") - - if len(filenames) == 1: - raise FileNotFoundError(f"No such fragment file: {filenames[0]}") - - raise FileNotFoundError(f"No such fragment files: {filenames}") diff --git a/cf/data/fragment/umfragmentarray.py b/cf/data/fragment/umfragmentarray.py deleted file mode 100644 index 7eed4fd0a4..0000000000 --- a/cf/data/fragment/umfragmentarray.py +++ /dev/null @@ -1,105 +0,0 @@ -from ..array.umarray import UMArray -from .mixin import FragmentArrayMixin - - -class UMFragmentArray(FragmentArrayMixin, UMArray): - """A CFA fragment array stored in a UM or PP file. - - .. versionadded:: 3.14.0 - - """ - - def __init__( - self, - filename=None, - address=None, - dtype=None, - shape=None, - aggregated_units=False, - aggregated_calendar=False, - attributes=None, - storage_options=None, - source=None, - copy=True, - ): - """**Initialisation** - - :Parameters: - - filename: (sequence of `str`), optional - The names of the UM or PP files containing the fragment. - - address: (sequence of `str`), optional - The start words in the files of the header. - - dtype: `numpy.dtype` - The data type of the aggregated array. May be `None` - if the numpy data-type is not known (which can be the - case for netCDF string types, for example). This may - differ from the data type of the netCDF fragment - variable. - - shape: `tuple` - The shape of the fragment within the aggregated - array. This may differ from the shape of the netCDF - fragment variable in that the latter may have fewer - size 1 dimensions. - - {{init attributes: `dict` or `None`, optional}} - - During the first `__getitem__` call, any of the - ``_FillValue``, ``add_offset``, ``scale_factor``, - ``units``, and ``calendar`` attributes which haven't - already been set will be inferred from the lookup - header and cached for future use. - - .. versionadded:: 1.11.2.0 - - {{aggregated_units: `str` or `None`, optional}} - - {{aggregated_calendar: `str` or `None`, optional}} - - {{init storage_options: `dict` or `None`, optional}} - - {{init source: optional}} - - {{init copy: `bool`, optional}} - - units: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - calendar: `str` or `None`, optional - Deprecated at version 1.11.2.0. Use the - *attributes* parameter instead. - - """ - super().__init__( - filename=filename, - address=address, - dtype=dtype, - shape=shape, - attributes=attributes, - source=source, - copy=False, - ) - - if source is not None: - try: - aggregated_units = source._get_component( - "aggregated_units", False - ) - except AttributeError: - aggregated_units = False - - try: - aggregated_calendar = source._get_component( - "aggregated_calendar", False - ) - except AttributeError: - aggregated_calendar = False - - self._set_component("aggregated_units", aggregated_units, copy=False) - self._set_component( - "aggregated_calendar", aggregated_calendar, copy=False - ) From 2c4e00dfdeb9eb156f46066c3f34a619543bfd0d Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 24 Feb 2025 07:37:59 +0000 Subject: [PATCH 45/51] Fix typos Co-authored-by: Sadie L. Bartholomew --- cf/aggregate.py | 4 ++-- cf/data/data.py | 6 +++--- cf/read_write/um/umread.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cf/aggregate.py b/cf/aggregate.py index 732c0bd4f0..952861b210 100644 --- a/cf/aggregate.py +++ b/cf/aggregate.py @@ -4732,7 +4732,7 @@ def _aggregate_2_fields( anc0["hash_value"] = hash_value0 + hash_value1 # The result of aggregating a promoted amd non-promoted - # field ancillary is a non-promoted fierld ancillary + # field ancillary is a non-promoted field ancillary if ( key0 in m0.promoted_field_ancillaries and key1 not in m1.promoted_field_ancillaries @@ -5011,7 +5011,7 @@ def _fix_promoted_field_ancillaries(output_meta, axes_aggregated): output_meta: `list` The list of `_Meta` objects. If any include promoted field - ancillaries then thses will be updated in-place. + ancillaries then these will be updated in-place. :Returns: diff --git a/cf/data/data.py b/cf/data/data.py index 8962a9d9e2..44d193c1bc 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -2804,7 +2804,7 @@ def _concatenate_conform_units(cls, data1, units0, relaxed_units, copy): """Check and conform the units of data prior to concatenation. This is a helper function for `concatenate` that may be easily - overridden in sublcasses, to allow for customisation of the + overridden in subclasses, to allow for customisation of the concatenation process. .. versionadded:: NEXTVERSION @@ -2853,7 +2853,7 @@ def _concatenate_conform_units(cls, data1, units0, relaxed_units, copy): else: raise ValueError( "Can't concatenate: All the input arrays must have " - "equivalent units. Got {units0!r} and {units1!r}" + f"equivalent units. Got {units0!r} and {units1!r}" ) return data1 @@ -2865,7 +2865,7 @@ def _concatenate_post_process( """Post-process concatenated data. This is a helper function for `concatenate` that may be easily - overridden in sublcasses, to allow for customisation of the + overridden in subclasses, to allow for customisation of the concatenation process. .. versionadded:: NEXTVERSION diff --git a/cf/read_write/um/umread.py b/cf/read_write/um/umread.py index 4dc3970d55..112c0b857a 100644 --- a/cf/read_write/um/umread.py +++ b/cf/read_write/um/umread.py @@ -567,7 +567,7 @@ def __init__( Unpacking is determined by netCDF conventions for the following variable attributes ``add_offset`` and ``scale_factor``, as applied to lookup header entries - BDATUM and BMKS repectively. + BDATUM and BMKS respectively. .. versionadded:: NEXTVERSION @@ -3499,7 +3499,7 @@ def read( Unpacking is determined by netCDF conventions for the following variable attributes ``add_offset`` and ``scale_factor``, as applied to lookup header entries - BDATUM and BMKS repectively. + BDATUM and BMKS respectively. .. versionadded:: NEXTVERSION From 32a722bbcde52b8d444f96623c389dfaaf1fbee4 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 24 Feb 2025 08:01:10 +0000 Subject: [PATCH 46/51] Plurals in message Co-authored-by: Sadie L. Bartholomew --- cf/read_write/read.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index e589f08f6e..4d268dab6d 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -848,8 +848,8 @@ def __new__( if nfields is not None and len(out) != nfields: raise ValueError( f"{nfields} field{cls._plural(nfields)} requested but " - f"{len(out)} field/domain constucts found in " - f"file{cls._plural(file_counter)}" + f"{len(out)} field/domain constuct{cls._plural(len(out))}" + f" found in file{cls._plural(file_counter)}" ) return out From 09cf4c717c6ffec3bbc9c977a3c80f4fc4a1f125 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 24 Feb 2025 08:03:05 +0000 Subject: [PATCH 47/51] Remove dead code Co-authored-by: Sadie L. Bartholomew --- cf/data/data.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/cf/data/data.py b/cf/data/data.py index 44d193c1bc..21d0bb8602 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -2905,15 +2905,6 @@ def _concatenate_post_process( ) concatenated_data.cyclic(axes=axis, iscyclic=False) - # # Set whether or not the concatenated name is deterministic - # deterministic = True - # for d in conformed_data: - # if not d.has_deterministic_name(): - # deterministic = False - # break# - # - # concatenated_data._update_deterministic(deterministic) - return concatenated_data @_inplace_enabled(default=False) From d80000598f15231a13ccc48b8b081767f3b06159 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 24 Feb 2025 08:07:43 +0000 Subject: [PATCH 48/51] h5py>=3.12.0 --- Changelog.rst | 1 + cf/functions.py | 4 ++++ docs/source/installation.rst | 2 +- requirements.txt | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Changelog.rst b/Changelog.rst index b092e58d8b..53e8886d84 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -8,6 +8,7 @@ version 3.17.0 * New keyword parameter to `cf.Field.compute_vertical_coordinates`: ``key`` (https://github.com/NCAS-CMS/cf-python/issues/802) * Changed dependency: ``1.12.0.0<=cfdm<1.12.1.0`` +* Changed dependency: ``h5py>=3.12.0`` ---- diff --git a/cf/functions.py b/cf/functions.py index eed7bebdd0..bc86d41bfd 100644 --- a/cf/functions.py +++ b/cf/functions.py @@ -2754,6 +2754,10 @@ def dirname(path, normalise=False, uri=None, isdir=False, sep=False): dirname.__doc__ = cfdm.dirname.__doc__.replace("cfdm.", "cf.") +from functools import partial +dirname2 = partial(cfdm.dirname) +dirname2.__doc__ = cfdm.dirname.__doc__.replace("cfdm.", "cf.") + def pathjoin(path1, path2): """Join two file path components intelligently. diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 505d3578b9..8ea7cd5d11 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -206,7 +206,7 @@ Required * `h5netcdf `_, version 1.3.0 newer. -* `h5py `_, version 3.10.0 or newer. +* `h5py `_, version 3.12.0 or newer. * `s3fs `_, version 2024.6.0 or newer. diff --git a/requirements.txt b/requirements.txt index 0df237167d..755493c32b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,5 +8,5 @@ dask>=2024.6.0,<=2024.7.1 packaging>=20.0 scipy>=1.10.0 h5netcdf>=1.3.0 -h5py>=3.10.0 +h5py>=3.12.0 s3fs>=2024.6.0 From 7ad74d095f90fce1f17c21ad233894bc3259ff1a Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 24 Feb 2025 08:23:30 +0000 Subject: [PATCH 49/51] Update cf/test/test_docstring.py Co-authored-by: Sadie L. Bartholomew --- cf/test/test_docstring.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/cf/test/test_docstring.py b/cf/test/test_docstring.py index 53402e6f83..1f467c36bc 100644 --- a/cf/test/test_docstring.py +++ b/cf/test/test_docstring.py @@ -87,14 +87,21 @@ def test_docstring(self): if name.startswith("__") and not inspect.isfunction(f): continue - if f.__doc__ is not None: - self.assertNotIn( - "{{", - f.__doc__, - f"\nCLASS: {klass}" - f"\nMETHOD NAME: {name}" - f"\nMETHOD: {f}", - ) + self.assertIsNotNone( + f.__doc__, + f"\nCLASS: {klass}" + f"\nMETHOD NAME: {name}" + f"\nMETHOD: {f}" + f"\n__doc__: {f.__doc__}", + ) + + self.assertNotIn( + "{{", + f.__doc__, + f"\nCLASS: {klass}" + f"\nMETHOD NAME: {name}" + f"\nMETHOD: {f}", + ) def test_docstring_package(self): string = f">>> f = {self.package}." From 5da6db9f29636b5e6ade7515050bfaff22359c58 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 24 Feb 2025 14:44:26 +0000 Subject: [PATCH 50/51] dev --- cf/regrid/regrid.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index cbe53e616a..e2e0e32787 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -1429,6 +1429,9 @@ def spherical_grid( ) set_grid_type(grid) + + print (grid) + print () return grid From 55be346fc969b5b4d7d94bd9d2dd3603d71c50ff Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 24 Feb 2025 14:45:37 +0000 Subject: [PATCH 51/51] dev --- cf/regrid/regrid.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index e2e0e32787..cbe53e616a 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -1429,9 +1429,6 @@ def spherical_grid( ) set_grid_type(grid) - - print (grid) - print () return grid