From 89fbb67987a78b9b9f5bec8768613a89d1123aa4 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 2 Jun 2025 19:20:00 +0100 Subject: [PATCH 1/8] dev --- Changelog.rst | 2 + cf/__init__.py | 1 + cf/cfimplementation.py | 3 + cf/field.py | 3 +- cf/fieldancillary.py | 39 +++++- cf/quantization.py | 59 +++++++++ cf/test/test_Quantization.py | 42 +++++++ cf/test/test_quantization.py | 231 +++++++++++++++++++++++++++++++++++ 8 files changed, 377 insertions(+), 3 deletions(-) create mode 100644 cf/quantization.py create mode 100644 cf/test/test_Quantization.py create mode 100644 cf/test/test_quantization.py diff --git a/Changelog.rst b/Changelog.rst index c8baf9913b..9f6ab30322 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -3,6 +3,8 @@ Version NEXTVERSION **2025-??-??** +* Implement lossy compression via quantization + (https://github.com/NCAS-CMS/cf-python/issues/870) * Read Zarr datasets with `cf.read` (https://github.com/NCAS-CMS/cf-python/issues/863) * Update CF aggregation keywords diff --git a/cf/__init__.py b/cf/__init__.py index 2df23b25e5..4004955cbb 100644 --- a/cf/__init__.py +++ b/cf/__init__.py @@ -241,6 +241,7 @@ from .nodecountproperties import NodeCountProperties from .partnodecountproperties import PartNodeCountProperties from .interiorring import InteriorRing +from .quantization import Quantization from .tiepointindex import TiePointIndex from .bounds import Bounds diff --git a/cf/cfimplementation.py b/cf/cfimplementation.py index ece90bde2c..543a1fca9b 100644 --- a/cf/cfimplementation.py +++ b/cf/cfimplementation.py @@ -23,6 +23,7 @@ List, NodeCountProperties, PartNodeCountProperties, + Quantization, TiePointIndex, ) from .data import Data @@ -147,6 +148,7 @@ def set_construct(self, parent, construct, axes=None, copy=True, **kwargs): H5netcdfArray=H5netcdfArray, NetCDF4Array=NetCDF4Array, PointTopologyArray=PointTopologyArray, + Quantization=Quantization, RaggedContiguousArray=RaggedContiguousArray, RaggedIndexedArray=RaggedIndexedArray, RaggedIndexedContiguousArray=RaggedIndexedContiguousArray, @@ -203,6 +205,7 @@ def implementation(): 'H5netcdfArray': cf.data.array.h5netcdfarray.H5netcdfArray, 'NetCDF4Array': cf.data.array.netcdf4array.NetCDF4Array, 'PointTopologyArray': , + 'Quantization': cf.quantization.Quantization, 'RaggedContiguousArray': cf.data.array.raggedcontiguousarray.RaggedContiguousArray, 'RaggedIndexedArray': cf.data.array.raggedindexedarray.RaggedIndexedArray, 'RaggedIndexedContiguousArray': cf.data.array.raggedindexedcontiguousarray.RaggedIndexedContiguousArray, diff --git a/cf/field.py b/cf/field.py index c9eb0c19bd..c8b2bc9a48 100644 --- a/cf/field.py +++ b/cf/field.py @@ -22,6 +22,7 @@ Flags, Index, List, + Quantization, mixin, ) from .constants import masked as cf_masked @@ -280,7 +281,7 @@ def __new__(cls, *args, **kwargs): instance._Domain = Domain instance._DomainAncillary = DomainAncillary instance._DomainAxis = DomainAxis - # instance._Data = Data + instance._Quantization = Quantization instance._RaggedContiguousArray = RaggedContiguousArray instance._RaggedIndexedArray = RaggedIndexedArray instance._RaggedIndexedContiguousArray = RaggedIndexedContiguousArray diff --git a/cf/fieldancillary.py b/cf/fieldancillary.py index ca32eeda3c..90a1643535 100644 --- a/cf/fieldancillary.py +++ b/cf/fieldancillary.py @@ -1,7 +1,42 @@ import cfdm -from . import mixin +from . import Quantization, mixin class FieldAncillary(mixin.PropertiesData, cfdm.FieldAncillary): - pass + """A field ancillary construct of the CF data model. + + The field ancillary construct provides metadata which are + distributed over the same sampling domain as the field itself. For + example, if a data variable holds a variable retrieved from a + satellite instrument, a related ancillary data variable might + provide the uncertainty estimates for those retrievals (varying + over the same spatiotemporal domain). + + The field ancillary construct consists of an array of the + ancillary data, which is zero-dimensional or which depends on one + or more of the domain axes, and properties to describe the + data. It is assumed that the data do not depend on axes of the + domain which are not spanned by the array, along which the values + are implicitly propagated. CF-netCDF ancillary data variables + correspond to field ancillary constructs. Note that a field + ancillary construct is constrained by the domain definition of the + parent field construct but does not contribute to the domain's + definition, unlike, for instance, an auxiliary coordinate + construct or domain ancillary construct. + + **NetCDF interface** + + {{netCDF variable}} + + {{netCDF dataset chunks}} + + .. versionadded:: 3.0.0 + + """ + + def __new__(cls, *args, **kwargs): + """Store component classes.""" + instance = super().__new__(cls) + instance._Quantization = Quantization + return instance diff --git a/cf/quantization.py b/cf/quantization.py new file mode 100644 index 0000000000..4c36183f85 --- /dev/null +++ b/cf/quantization.py @@ -0,0 +1,59 @@ +import cfdm + + +class Quantization(cfdm.Quantization): + """A quantization variable. + + A quantization variable describes a quantization algorithm via a + collection of parameters. + + The ``algorithm`` parameter names a specific quantization + algorithm via one of the keys in the `algorithm_parameters` + dictionary. + + The ``implementation`` parameter contains unstandardised text that + concisely conveys the algorithm provenance including the name of + the library or client that performed the quantization, the + software version, and any other information required to + disambiguate the source of the algorithm employed. The text must + take the form ``software-name version version-string + [(optional-information)]``. + + The retained precision of the algorthm is defined with either the + ``quantization_nsb`` or ``quantization_nsd`` parameter. + + For instance, the following parameters describe quantization via + the BitRound algorithm, retaining 6 significant bits, and + implemented by libnetcdf:: + + >>> q = {{package}}.{{class}}( + ... parameters={'algorithm': 'bitround', + ... 'quantization_nsb': 6, + ... 'implementation': 'libnetcdf version 4.9.4'} + ... ) + >>> q.parameters() + {'algorithm': 'bitround', + 'quantization_nsb': 6, + 'implementation': 'libnetcdf version 4.9.4'} + + See CF section 8.4. "Lossy Compression via Quantization". + + **NetCDF interface** + + {{netCDF variable}} + + {{netCDF group attributes}} + + .. versionadded:: NEXTVERSION + + """ + + def __repr__(self): + """Called by the `repr` built-in function. + + x.__repr__() <==> repr(x) + + .. versionadded:: NEXTVERSION + + """ + return super().__repr__().replace("<", " Date: Tue, 3 Jun 2025 12:09:19 +0100 Subject: [PATCH 2/8] dev --- docs/source/class.rst | 1 + docs/source/class/cf.Quantization.rst | 120 ++++++++++++++++ docs/source/tutorial.rst | 189 +++++++++++++++++++++++++- 3 files changed, 306 insertions(+), 4 deletions(-) create mode 100644 docs/source/class/cf.Quantization.rst diff --git a/docs/source/class.rst b/docs/source/class.rst index d7941ac067..6031d91c94 100644 --- a/docs/source/class.rst +++ b/docs/source/class.rst @@ -102,6 +102,7 @@ Classes that support the creation and storage of compressed arrays. cf.RaggedIndexedArray cf.RaggedIndexedContiguousArray cf.SubsampledArray + cf.Quantization Data UGRID classes ------------------ diff --git a/docs/source/class/cf.Quantization.rst b/docs/source/class/cf.Quantization.rst new file mode 100644 index 0000000000..50709e93a4 --- /dev/null +++ b/docs/source/class/cf.Quantization.rst @@ -0,0 +1,120 @@ +.. currentmodule:: cf +.. default-role:: obj + +cf.Quantization +=============== + +---- + +.. autoclass:: cf.Quantization + :no-members: + :no-inherited-members: + +Inspection +---------- + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.Quantization.dump + +Parameter terms +--------------- + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.Quantization.del_parameter + ~cf.Quantization.get_parameter + ~cf.Quantization.has_parameter + ~cf.Quantization.set_parameter + ~cf.Quantization.parameters + ~cf.Quantization.set_parameters + ~cf.Quantization.clear_parameters + +Miscellaneous +------------- + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.Quantization.algorithm_parameters + ~cf.Quantization.copy + ~cf.Quantization.equals + ~cf.Quantization.get_original_filenames + ~cf.Quantization.creation_commands + +NetCDF +------ + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.Quantization.nc_del_variable + ~cf.Quantization.nc_get_variable + ~cf.Quantization.nc_has_variable + ~cf.Quantization.nc_set_variable + ~cf.Quantization.nc_group_attributes + ~cf.Quantization.nc_clear_group_attributes + ~cf.Quantization.nc_set_group_attribute + ~cf.Quantization.nc_set_group_attributes + +Groups +^^^^^^ + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.Quantization.nc_variable_groups + ~cf.Quantization.nc_set_variable_groups + ~cf.Quantization.nc_clear_variable_groups + +Special +------- + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.Quantization.__bool__ + ~cf.Quantization.__deepcopy__ + ~cf.Quantization.__repr__ + ~cf.Quantization.__str__ + +Docstring substitutions +----------------------- + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.Quantization._docstring_special_substitutions + ~cf.Quantization._docstring_substitutions + ~cf.Quantization._docstring_package_depth + ~cf.Quantization._docstring_method_exclusions diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index c741aed4e1..21894bac96 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -6692,16 +6692,197 @@ The content of the new file is: 4, 0, 5 ; } ----- - .. _Coordinate-subampling: Coordinate subsampling ^^^^^^^^^^^^^^^^^^^^^^ `Lossy compression by coordinate subsampling`_ was introduced into the -CF conventions at CF-1.9, but is not yet available in cfdm. It will be -ready in a future 3.x.0 release. +CF conventions at CF-1.10 for applications for which the coordinates +can require considerably more storage than the data itself. Space may +be saved in the netCDF file by storing a subsample of the coordinates +that describe the data, and the uncompressed coordinate and auxiliary +coordinate variables are reconstituted by interpolation, from the +subsampled coordinate values to the domain of the data + +This is illustrated with the file ``subsampled.nc`` (found in the +:ref:`sample datasets `): + + +.. code-block:: console + :caption: *Inspect the compressed dataset with the ncdump command + line tool.* + + $ ncdump -h subsampled.nc + netcdf subsampled { + dimensions: + time = 2 ; + lat = 18 ; + lon = 12 ; + tp_lat = 4 ; + tp_lon = 5 ; + variables: + float time(time) ; + time:standard_name = "time" ; + time:units = "days since 2000-01-01" ; + float lat(tp_lat, tp_lon) ; + lat:standard_name = "latitude" ; + lat:units = "degrees_north" ; + lat:bounds_tie_points = "lat_bounds" ; + float lon(tp_lat, tp_lon) ; + lon:standard_name = "longitude" ; + lon:units = "degrees_east" ; + lon:bounds_tie_points = "lon_bounds" ; + float lat_bounds(tp_lat, tp_lon) ; + float lon_bounds(tp_lat, tp_lon) ; + int lat_indices(tp_lat) ; + lat_indices:long_name = "Tie point indices for latitude dimension" ; + int lon_indices(tp_lon) ; + lon_indices:long_name = "Tie point indices for longitude dimension" ; + int bilinear ; + bilinear:interpolation_name = "bi_linear" ; + bilinear:computational_precision = "64" ; + bilinear:tie_point_mapping = + "lat: lat_indices tp_lat lon: lon_indices tp_lon" ; + float q(time, lat, lon) ; + q:standard_name = "specific_humidity" ; + q:units = "1" ; + q:coordinate_interpolation = "lat: lon: bilinear" ; + + // global attributes: + :Conventions = "CF-1.11" ; + } + + +Reading and inspecting this file shows the latitude and longitude +coordinates in uncompressed form, whilst their underlying arrays are +still in subsampled representation described in the file: + +.. code-block:: python + :caption: *Read a field construct from a dataset that has been + compressed by corodinate subsampling, and inspect + coordinates.* + + >>> f = cf.read('subsampled.nc')[0] + >>> print(f) + Field: specific_humidity (ncvar%q) + ---------------------------------- + Data : specific_humidity(time(2), ncdim%lat(18), ncdim%lon(12)) 1 + Dimension coords: time(2) = [2000-01-01 00:00:00, 2000-02-01 00:00:00] + Auxiliary coords: latitude(ncdim%lat(18), ncdim%lon(12)) = [[-85.0, ..., 85.0]] degrees_north + : longitude(ncdim%lat(18), ncdim%lon(12)) = [[15.0, ..., 345.0]] degrees_east + >>> lon = f.construct('longitude') + >>> lon + + >>> lon.data.source() + + >>> print(lon.array) + [[15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0] + [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0] + [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0] + [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0] + [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0] + [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0] + [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0] + [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0] + [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0] + [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0] + [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0] + [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0] + [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0] + [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0] + [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0] + [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0] + [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0] + [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]] + >>> lon.data.source().source() + + >>> print(lon.data.source().source().array) + [[ 15. 135. 225. 255. 345.] + [ 15. 135. 225. 255. 345.] + [ 15. 135. 225. 255. 345.] + [ 15. 135. 225. 255. 345.]] + +As with all other forms of compression, the field may be treated as if +were not compressed: + +.. code-block:: python + :caption: *Get subspaces based on indices of the uncompressed + data.* + + >>> g = f[0, 6, :] + >>> print(g) + Field: specific_humidity (ncvar%q) + ---------------------------------- + Data : specific_humidity(time(1), ncdim%lat(1), ncdim%lon(12)) 1 + Dimension coords: time(1) = [2000-01-01 00:00:00] + Auxiliary coords: latitude(ncdim%lat(1), ncdim%lon(12)) = [[-25.0, ..., -25.0]] degrees_north + : longitude(ncdim%lat(1), ncdim%lon(12)) = [[15.0, ..., 345.0]] degrees_east + >>> print(g.construct('longitude').array) + [[15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]] + + +The metadata that define the subsampling are contained within the +coordinate's `Data` object: + +.. code-block:: python + :caption: *Get subspaces based on indices of the uncompressed + data.* + + >>> lon = f.construct('longitude') + >>> d = lon.data.source() + >>> d.get_tie_point_indices() + {0: , + 1: } + >>> d.get_computational_precision() + '64' + +It is not yet, as of version 1.10.0.0, possible to write to disk a +field construct with compression by coordinate subsampling. + +.. _Lossy-compression-via-quantization: + +Lossy compression via quantization +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +`Lossy compression via quantization`_ eliminates false precision, +usually by rounding the least significant bits of floating-point +mantissas to zeros, so that a subsequent compression on disk is more +efficient. Quantization is described by the following parameters: + +* The ``algorithm`` parameter names a specific quantization algorithm. + +* The ``implementation`` parameter contains unstandardised text that + concisely conveys the algorithm provenance including the name of the + library or client that performed the quantization, the software + version, and any other information required to disambiguate the + source of the algorithm employed. The text must take the form + ``software-name version version-string [(optional-information)]``. + +* The retained precision of the algortqhm is defined with either the + ``quantization_nsb`` or ``quantization_nsd`` parameter. + +If quantization has been applied to the data, then it may be described +with in a `Quantization` object, accessed via the construct's +`!get_quantization` method. To apply quantization at the time of +writing the data to disk, use the construct's `!set_quantize_on_write` +method: + +.. code-block:: python + :caption: *Lossy compression via quantization.* + + >>> q, t = cf.read('file.nc') + >>> t.set_quantize_on_write(algorithm='bitgroom', quantization_nsd=6) + >>> cf.write(t, 'quantized.nc') + >>> quantized = cf.read('quantized.nc')[0] + >>> c = quantized.get_quantization() + >>> c + + >>> c.parameters() + {'algorithm': 'bitgroom', + 'implementation': 'libnetcdf version 4.9.4-development', + '_QuantizeBitGroomNumberOfSignificantDigits': np.int32(6), + 'quantization_nsd': np.int64(6)} ---- From db2f87346e21a7cdb7ca39927ce7bbe37f74260d Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 4 Jun 2025 14:10:10 +0100 Subject: [PATCH 3/8] dev --- Changelog.rst | 6 ++++++ README.md | 2 ++ cf/fieldancillary.py | 2 +- setup.py | 5 ++++- 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Changelog.rst b/Changelog.rst index 9f6ab30322..1463a9c4e5 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -5,6 +5,12 @@ Version NEXTVERSION * Implement lossy compression via quantization (https://github.com/NCAS-CMS/cf-python/issues/870) +* New quantization class: `cf.Quantization` + (https://github.com/NCAS-CMS/cf-python/issues/870) +* New quantization methods: `cf.Field.get_quantization`, + `cf.Field.get_quantize_on_write`, `cf.Field.set_quantize_on_write`, + `cf.Field.del_quantize_on_write` + (https://github.com/NCAS-CMS/cf-python/issues/870) * Read Zarr datasets with `cf.read` (https://github.com/NCAS-CMS/cf-python/issues/863) * Update CF aggregation keywords diff --git a/README.md b/README.md index 9f5c75a8b6..ce752fd0af 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,8 @@ of its array manipulation and can: * create running means from field constructs, * apply differential operators to field constructs, * create derived quantities (such as relative vorticity). +* read and write that data that are quantized to eliminate false + precision. Visualization ============= diff --git a/cf/fieldancillary.py b/cf/fieldancillary.py index 90a1643535..6df6dcaa7a 100644 --- a/cf/fieldancillary.py +++ b/cf/fieldancillary.py @@ -31,7 +31,7 @@ class FieldAncillary(mixin.PropertiesData, cfdm.FieldAncillary): {{netCDF dataset chunks}} - .. versionadded:: 3.0.0 + .. versionadded:: 2.0 """ diff --git a/setup.py b/setup.py index 706d89af63..495dfa08a8 100755 --- a/setup.py +++ b/setup.py @@ -227,7 +227,10 @@ def compile(): * apply differential operators to field constructs, -* create derived quantities (such as relative vorticity). +* create derived quantities (such as relative vorticity), + +* read and write that data that are quantized to eliminate false + precision. """ From e7b0421af3cffa2e9be8225f632387e664ff833e Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 5 Jun 2025 09:25:35 +0100 Subject: [PATCH 4/8] Zarr and quantization updates --- docs/source/spelling_false_positives.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/source/spelling_false_positives.txt b/docs/source/spelling_false_positives.txt index 6d324f615e..6f7ff66d65 100644 --- a/docs/source/spelling_false_positives.txt +++ b/docs/source/spelling_false_positives.txt @@ -1,6 +1,7 @@ absoluted absolutised abspath +actify addattr aggregatable al @@ -293,6 +294,7 @@ lbproc lbtim le len +libnetcdf linebreaks Lineplot ln @@ -379,6 +381,8 @@ programmatically pseudolevels py pyplot +Quantization +quantization radd RaggedContiguousArray RaggedIndexedArray @@ -521,6 +525,7 @@ unmapped unselected Unselected unsqueeze +unstandardised url uri var @@ -534,8 +539,11 @@ Voronoi vorticity wi wo +xarray xor xy +Zarr +ZarrArray Zimmermann δu δv From 68ac81c828305725706ee5bb353da0dcc73687bd Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 5 Jun 2025 10:09:23 +0100 Subject: [PATCH 5/8] quantisation --- cf/quantization.py | 2 +- docs/source/class/cf.AuxiliaryCoordinate.rst | 13 +++++++++++++ docs/source/class/cf.Bounds.rst | 13 +++++++++++++ docs/source/class/cf.CellConnectivity.rst | 13 +++++++++++++ docs/source/class/cf.CellMeasure.rst | 13 +++++++++++++ docs/source/class/cf.CoordinateConversion.rst | 13 +++++++++++++ docs/source/class/cf.Count.rst | 13 +++++++++++++ docs/source/class/cf.Datum.rst | 13 +++++++++++++ docs/source/class/cf.DimensionCoordinate.rst | 13 +++++++++++++ docs/source/class/cf.DomainAncillary.rst | 13 +++++++++++++ docs/source/class/cf.DomainTopology.rst | 13 +++++++++++++ docs/source/class/cf.Field.rst | 17 +++++++++++++++++ docs/source/class/cf.FieldAncillary.rst | 17 +++++++++++++++++ docs/source/class/cf.Index.rst | 13 +++++++++++++ docs/source/class/cf.List.rst | 13 +++++++++++++ docs/source/tutorial.rst | 14 +++++++++----- 16 files changed, 200 insertions(+), 6 deletions(-) diff --git a/cf/quantization.py b/cf/quantization.py index 4c36183f85..26e2861ac6 100644 --- a/cf/quantization.py +++ b/cf/quantization.py @@ -19,7 +19,7 @@ class Quantization(cfdm.Quantization): take the form ``software-name version version-string [(optional-information)]``. - The retained precision of the algorthm is defined with either the + The retained precision of the algorithm is defined with either the ``quantization_nsb`` or ``quantization_nsd`` parameter. For instance, the following parameters describe quantization via diff --git a/docs/source/class/cf.AuxiliaryCoordinate.rst b/docs/source/class/cf.AuxiliaryCoordinate.rst index e5af934219..6216de643f 100644 --- a/docs/source/class/cf.AuxiliaryCoordinate.rst +++ b/docs/source/class/cf.AuxiliaryCoordinate.rst @@ -342,6 +342,19 @@ Cyclicity ~cf.AuxiliaryCoordinate.get_original_filenames ~cf.AuxiliaryCoordinate.persist +Quantization +^^^^^^^^^^^^ + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.AuxiliaryCoordinate.get_quantization + ~cf.AuxiliaryCoordinate.get_quantize_on_write + Miscellaneous ------------- diff --git a/docs/source/class/cf.Bounds.rst b/docs/source/class/cf.Bounds.rst index 98d8caec77..27a29b1010 100644 --- a/docs/source/class/cf.Bounds.rst +++ b/docs/source/class/cf.Bounds.rst @@ -239,6 +239,19 @@ Data ~cf.Bounds.has_bounds ~cf.Bounds.persist +Quantization +^^^^^^^^^^^^ + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.Bounds.get_quantization + ~cf.Bounds.get_quantize_on_write + Miscellaneous ------------- diff --git a/docs/source/class/cf.CellConnectivity.rst b/docs/source/class/cf.CellConnectivity.rst index 0723d8f9af..791479de51 100644 --- a/docs/source/class/cf.CellConnectivity.rst +++ b/docs/source/class/cf.CellConnectivity.rst @@ -261,6 +261,19 @@ Data ~cf.CellConnectivity.has_bounds ~cf.CellConnectivity.persist +Quantization +^^^^^^^^^^^^ + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.CellConnectivity.get_quantization + ~cf.CellConnectivity.get_quantize_on_write + Miscellaneous ------------- diff --git a/docs/source/class/cf.CellMeasure.rst b/docs/source/class/cf.CellMeasure.rst index bdb3ef502e..8e61434557 100644 --- a/docs/source/class/cf.CellMeasure.rst +++ b/docs/source/class/cf.CellMeasure.rst @@ -262,6 +262,19 @@ Data ~cf.CellMeasure.has_bounds ~cf.CellMeasure.persist +Quantization +^^^^^^^^^^^^ + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.CellMeasure.get_quantization + ~cf.CellMeasure.get_quantize_on_write + Miscellaneous ------------- diff --git a/docs/source/class/cf.CoordinateConversion.rst b/docs/source/class/cf.CoordinateConversion.rst index cdc9f293ad..82c178e9e7 100644 --- a/docs/source/class/cf.CoordinateConversion.rst +++ b/docs/source/class/cf.CoordinateConversion.rst @@ -10,6 +10,18 @@ cf.CoordinateConversion :no-members: :no-inherited-members: +Inspection +---------- + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.CoordinateConversion.dump + Parameter terms --------------- @@ -58,6 +70,7 @@ Miscellaneous ~cf.CoordinateConversion.copy ~cf.CoordinateConversion.equals + ~cf.CoordinateConversion.creation_commands Special ------- diff --git a/docs/source/class/cf.Count.rst b/docs/source/class/cf.Count.rst index 011938c558..27d04c6cf0 100644 --- a/docs/source/class/cf.Count.rst +++ b/docs/source/class/cf.Count.rst @@ -233,6 +233,19 @@ Data ~cf.Count.has_bounds ~cf.Count.persist +Quantization +^^^^^^^^^^^^ + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.Count.get_quantization + ~cf.Count.get_quantize_on_write + Miscellaneous ------------- diff --git a/docs/source/class/cf.Datum.rst b/docs/source/class/cf.Datum.rst index 5dff1d8dfe..ca81f442fd 100644 --- a/docs/source/class/cf.Datum.rst +++ b/docs/source/class/cf.Datum.rst @@ -10,6 +10,18 @@ cf.Datum :no-members: :no-inherited-members: +Inspection +---------- + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.Datum.dump + Parameters ---------- @@ -57,6 +69,7 @@ Miscellaneous ~cf.Datum.copy ~cf.Datum.get_original_filenames ~cf.Datum.equals + ~cf.Datum.creation_commands Groups ^^^^^^ diff --git a/docs/source/class/cf.DimensionCoordinate.rst b/docs/source/class/cf.DimensionCoordinate.rst index d3e4944747..7defc250e6 100644 --- a/docs/source/class/cf.DimensionCoordinate.rst +++ b/docs/source/class/cf.DimensionCoordinate.rst @@ -349,6 +349,19 @@ Cyclicity ~cf.DimensionCoordinate.anchor ~cf.DimensionCoordinate.persist +Quantization +^^^^^^^^^^^^ + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.DimensionCoordinate.get_quantization + ~cf.DimensionCoordinate.get_quantize_on_write + Miscellaneous ------------- diff --git a/docs/source/class/cf.DomainAncillary.rst b/docs/source/class/cf.DomainAncillary.rst index 13f4f78f6f..61d8f795e6 100644 --- a/docs/source/class/cf.DomainAncillary.rst +++ b/docs/source/class/cf.DomainAncillary.rst @@ -306,6 +306,19 @@ Data ~cf.DomainAncillary.get_original_filenames ~cf.DomainAncillary.persist +Quantization +^^^^^^^^^^^^ + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.DomainAncillary.get_quantization + ~cf.DomainAncillary.get_quantize_on_write + Miscellaneous ------------- diff --git a/docs/source/class/cf.DomainTopology.rst b/docs/source/class/cf.DomainTopology.rst index 9e7e84e22b..a651bd72ad 100644 --- a/docs/source/class/cf.DomainTopology.rst +++ b/docs/source/class/cf.DomainTopology.rst @@ -262,6 +262,19 @@ Data ~cf.DomainTopology.has_bounds ~cf.DomainTopology.persist +Quantization +^^^^^^^^^^^^ + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.DomainTopology.get_quantization + ~cf.DomainTopology.get_quantize_on_write + Miscellaneous ------------- diff --git a/docs/source/class/cf.Field.rst b/docs/source/class/cf.Field.rst index 72f9780687..8183b61434 100644 --- a/docs/source/class/cf.Field.rst +++ b/docs/source/class/cf.Field.rst @@ -265,6 +265,23 @@ Miscellaneous data operations ~cf.Field.persist ~cf.Field.persist_metadata +Quantization +^^^^^^^^^^^^ + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.Field.get_quantization + ~cf.Field._set_quantization + ~cf.Field._del_quantization + ~cf.Field.get_quantize_on_write + ~cf.Field.set_quantize_on_write + ~cf.Field.del_quantize_on_write + Metadata constructs ------------------- diff --git a/docs/source/class/cf.FieldAncillary.rst b/docs/source/class/cf.FieldAncillary.rst index 679574045a..62bdbb2381 100644 --- a/docs/source/class/cf.FieldAncillary.rst +++ b/docs/source/class/cf.FieldAncillary.rst @@ -237,6 +237,23 @@ Data ~cf.FieldAncillary.has_bounds ~cf.FieldAncillary.persist +Quantization +^^^^^^^^^^^^ + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.FieldAncillary.get_quantization + ~cf.FieldAncillary._set_quantization + ~cf.FieldAncillary._del_quantization + ~cf.FieldAncillary.get_quantize_on_write + ~cf.FieldAncillary.set_quantize_on_write + ~cf.FieldAncillary.del_quantize_on_write + Miscellaneous ------------- diff --git a/docs/source/class/cf.Index.rst b/docs/source/class/cf.Index.rst index 4a9a431127..2717ebba13 100644 --- a/docs/source/class/cf.Index.rst +++ b/docs/source/class/cf.Index.rst @@ -234,6 +234,19 @@ Data ~cf.Index.has_bounds ~cf.Index.persist +Quantization +^^^^^^^^^^^^ + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.Index.get_quantization + ~cf.Index.get_quantize_on_write + Miscellaneous ------------- diff --git a/docs/source/class/cf.List.rst b/docs/source/class/cf.List.rst index 4f343ecc4c..d96f2f48a5 100644 --- a/docs/source/class/cf.List.rst +++ b/docs/source/class/cf.List.rst @@ -234,6 +234,19 @@ Data ~cf.List.has_bounds ~cf.List.persist +Quantization +^^^^^^^^^^^^ + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.List.get_quantization + ~cf.List.get_quantize_on_write + Miscellaneous ------------- diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index 21894bac96..4b6806b619 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -6859,7 +6859,7 @@ efficient. Quantization is described by the following parameters: source of the algorithm employed. The text must take the form ``software-name version version-string [(optional-information)]``. -* The retained precision of the algortqhm is defined with either the +* The retained precision of the algorithm is defined with either the ``quantization_nsb`` or ``quantization_nsd`` parameter. If quantization has been applied to the data, then it may be described @@ -6872,17 +6872,21 @@ method: :caption: *Lossy compression via quantization.* >>> q, t = cf.read('file.nc') - >>> t.set_quantize_on_write(algorithm='bitgroom', quantization_nsd=6) + >>> t.set_quantize_on_write(algorithm='bitgroom', quantization_nsd=1) >>> cf.write(t, 'quantized.nc') >>> quantized = cf.read('quantized.nc')[0] >>> c = quantized.get_quantization() >>> c - + >>> c.parameters() {'algorithm': 'bitgroom', 'implementation': 'libnetcdf version 4.9.4-development', - '_QuantizeBitGroomNumberOfSignificantDigits': np.int32(6), - 'quantization_nsd': np.int64(6)} + '_QuantizeBitGroomNumberOfSignificantDigits': np.int32(1), + 'quantization_nsd': np.int64(1)} + >>> t[0, 0, 0].array + array([[[262.8]]]) + >>> quantized[0, 0, 0].array + array([[[256.]]]) ---- From 11f3f3c7bb81760679200e947a3e086bc9e6fd9c Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 5 Jun 2025 10:30:05 +0100 Subject: [PATCH 6/8] cfdm versions --- RELEASE.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 070b545b3d..b3cf931b4a 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -17,10 +17,11 @@ - [ ] Change the version and date in `cf/__init__.py` (`__version__` and `__date__` variables) -- [ ] Ensure that the requirements on dependencies & their versions are - up-to-date and consistent in both the `requirements.txt` and in - `docs/source/installation.rst`; and in the `_requires` list and - `Version` checks in `cf/__init__.py`. +- [ ] Ensure that the requirements on dependencies & their versions + are up-to-date and consistent in both the `requirements.txt` and in + `docs/source/installation.rst` (paying particular attention to + `cfdm`); and in the `_requires` list and `Version` checks in + `cf/__init__.py`. - [ ] Make sure that `README.md` is up to date. From e382d4b4178f39c8dbb67d58076c701dfbbd3453 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 5 Jun 2025 10:37:24 +0100 Subject: [PATCH 7/8] cfdm versions --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9d875bb541..1bfcaec175 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ netCDF4>=1.7.2 cftime>=1.6.4 numpy>=2.0.0 -cfdm>=1.12.1.0, <1.12.2.0 +cfdm>=1.12.2.0, <1.12.3.0 psutil>=0.6.0 cfunits>=3.3.7 dask>=2025.5.1 From d892bd23d2a6e2bb38981d5a6e315aba480c8f09 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 5 Jun 2025 10:37:43 +0100 Subject: [PATCH 8/8] quantisation --- docs/source/introduction.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst index 1e9c66ed0d..635aa81157 100644 --- a/docs/source/introduction.rst +++ b/docs/source/introduction.rst @@ -61,8 +61,7 @@ may nonetheless be modified in memory. : longitude(128) = [0.0, ..., 357.1875] degrees_east : height(1) = [2.0] m -The `cf` package uses :ref:`Dask ` for all of its array -manipulation and can: +The `cf` package can: * read :term:`field constructs ` and :term:`domain constructs ` from netCDF, CDL, PP and UM datasets @@ -120,7 +119,10 @@ manipulation and can: * apply differential operators to field constructs, -* create derived quantities (such as relative vorticity). +* create derived quantities (such as relative vorticity), + +* read and write that data that are quantized to eliminate false + precision. ---- @@ -146,9 +148,9 @@ of plotting possibilities with example code. **Performance** --------------- -As of version 3.14.0 (released 2023-01-31), cf uses :ref:`Dask -` for all of its data manipulations, which provides lazy, -parallelised, and out-of-core computations of array operations. +The `cf` package uses :ref:`Dask ` for all of its data +manipulations, which provides lazy, parallelised, and out-of-core +computations of array operations. ----