Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ version 3.17.0

**2024-??-??**

* New methods: `cf.Field.pad_missing` and `cf.Data.pad_missing`
(https://github.com/NCAS-CMS/cf-python/issues/717)
* Fix occasional bug when calculating UGRID cell areas when
non-spatial coordinates span the discrete axis
(https://github.com/NCAS-CMS/cf-python/issues/721)
Expand Down
17 changes: 16 additions & 1 deletion cf/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3920,7 +3920,22 @@ def _sort_indices(m, canonical_axes):
"""
canonical_axes = [m.id_to_axis[identity] for identity in canonical_axes]
sort_indices = tuple([m.sort_indices[axis] for axis in canonical_axes])
needs_sorting = sort_indices != (slice(None),) * len(sort_indices)

# Whether or not one or more of the axes needs sorting
needs_sorting = False
for sort_index in sort_indices:
# Note: sort_index can only be a slice object or a numpy array
# (see `_create_hash_and_first_values`)
if isinstance(sort_index, slice):
if sort_index != slice(None):
# sort_index is a slice other than slice(None)
needs_sorting = True
break
elif sort_index.size > 1:
# sort_index is an array of 2 or more integers
needs_sorting = True
break

return sort_indices, needs_sorting


Expand Down
103 changes: 103 additions & 0 deletions cf/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2180,6 +2180,109 @@ def mean_of_upper_decile(

return d

@_inplace_enabled(default=False)
def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False):
"""Pad an axis with missing data.

:Parameters:

axis: `int`
Select the axis for which the padding is to be
applied.

*Parameter example:*
Pad second axis: ``axis=1``.

*Parameter example:*
Pad the last axis: ``axis=-1``.

{{pad_width: sequence of `int`, optional}}

{{to_size: `int`, optional}}

{{inplace: `bool`, optional}}

:Returns:

`Data` or `None`
The padded data, or `None` if the operation was
in-place.

**Examples**

>>> d = cf.Data(np.arange(6).reshape(2, 3))
>>> print(d.array)
[[0 1 2]
[3 4 5]]
>>> e = d.pad_missing(1, (1, 2))
>>> print(e.array)
[[-- 0 1 2 -- --]
[-- 3 4 5 -- --]]
>>> f = e.pad_missing(0, (0, 1))
>>> print(f.array)
[[-- 0 1 2 -- --]
[-- 3 4 5 -- --]
[-- -- -- -- -- --]]

>>> g = d.pad_missing(1, to_size=5)
>>> print(g.array)
[[0 1 2 -- --]
[3 4 5 -- --]]

"""
if not 0 <= axis < self.ndim:
raise ValueError(
f"'axis' must be a valid dimension position. Got {axis}"
)

if to_size is not None:
# Set pad_width from to_size
if pad_width is not None:
raise ValueError("Can't set both 'pad_width' and 'to_size'")

pad_width = (0, to_size - self.shape[axis])
elif pad_width is None:
raise ValueError("Must set either 'pad_width' or 'to_size'")

pad_width = np.asarray(pad_width)
if pad_width.shape != (2,) or not pad_width.dtype.kind == "i":
raise ValueError(
"'pad_width' must be a sequence of two integers. "
f"Got: {pad_width}"
)

pad_width = tuple(pad_width)
if any(n < 0 for n in pad_width):
if to_size is not None:
raise ValueError(
f"'to_size' ({to_size}) must not be smaller than the "
f"original axis size ({self.shape[axis]})"
)

raise ValueError(
f"Can't set a negative number of pad values. Got: {pad_width}"
)

d = _inplace_enabled_define_and_cleanup(self)

dx = d.to_dask_array()
mask0 = da.ma.getmaskarray(dx)

pad = [(0, 0)] * dx.ndim
pad[axis] = pad_width

# Pad the data with zero. This will lose the original mask.
dx = da.pad(dx, pad, mode="constant", constant_values=0)

# Pad the mask with True
mask = da.pad(mask0, pad, mode="constant", constant_values=True)

# Set the mask
dx = da.ma.masked_where(mask, dx)

d._set_dask(dx)
return d

@_inplace_enabled(default=False)
def percentile(
self,
Expand Down
8 changes: 8 additions & 0 deletions cf/docstring/docstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,14 @@
"{{weights auto: `bool`, optional}}": """auto: `bool`, optional
If True then return `False` if weights can't be found,
rather than raising an exception.""",
# pad_width
"{{pad_width: sequence of `int`, optional}}": """pad_width: sequence of `int`, optional
Number of values to pad before and after the edges of
the axis.""",
# to_size
"{{to_size: `int`, optional}}": """to_size: `int`, optional
Pad the axis after so that the new axis has the given
size.""",
# ----------------------------------------------------------------
# Method description substitutions (4 levels of indentation)
# ----------------------------------------------------------------
Expand Down
130 changes: 130 additions & 0 deletions cf/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -11953,6 +11953,136 @@ def halo(

return f

@_inplace_enabled(default=False)
def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False):
"""Pad an axis with missing data.

The field's data and all metadata constructs that span the
axis are padded.

.. versionadded:: 3.17.0

:Parameters:

axis: `str` or `int`
Select the domain axis which is to be padded, defined
by that which would be selected by passing the given
axis description to a call of the field construct's
`domain_axis` method. For example, for a value of
``'X'``, the domain axis construct returned by
``f.domain_axis('X')`` is selected.

{{pad_width: sequence of `int`, optional}}

{{to_size: `int`, optional}}

{{inplace: `bool`, optional}}

:Returns:

`Field` or `None`
The padded field construct, or `None` if the operation
was in-place.

**Examples*

>>> f = cf.example_field(6)
>>> print(f)
Field: precipitation_amount (ncvar%pr)
--------------------------------------
Data : precipitation_amount(cf_role=timeseries_id(2), time(4))
Dimension coords: time(4) = [2000-01-16 12:00:00, ..., 2000-04-15 00:00:00] gregorian
Auxiliary coords: latitude(cf_role=timeseries_id(2)) = [25.0, 7.0] degrees_north
: longitude(cf_role=timeseries_id(2)) = [10.0, 40.0] degrees_east
: cf_role=timeseries_id(cf_role=timeseries_id(2)) = [x1, y2]
: altitude(cf_role=timeseries_id(2), 3, 4) = [[[1.0, ..., --]]] m
Coord references: grid_mapping_name:latitude_longitude
>>> print(f.array)
[[1. 2. 3. 4.]
[5. 6. 7. 8.]]
>>> g = f.pad_missing('T', (0, 5))
>>> print(g)
Field: precipitation_amount (ncvar%pr)
--------------------------------------
Data : precipitation_amount(cf_role=timeseries_id(2), time(9))
Dimension coords: time(9) = [2000-01-16 12:00:00, ..., --] gregorian
Auxiliary coords: latitude(cf_role=timeseries_id(2)) = [25.0, 7.0] degrees_north
: longitude(cf_role=timeseries_id(2)) = [10.0, 40.0] degrees_east
: cf_role=timeseries_id(cf_role=timeseries_id(2)) = [x1, y2]
: altitude(cf_role=timeseries_id(2), 3, 4) = [[[1.0, ..., --]]] m
Coord references: grid_mapping_name:latitude_longitude
>>> print(g.array)
[[1.0 2.0 3.0 4.0 -- -- -- -- --]
[5.0 6.0 7.0 8.0 -- -- -- -- --]]
>>> h = g.pad_missing('cf_role=timeseries_id', (0, 1))
>>> print(h)
Field: precipitation_amount (ncvar%pr)
--------------------------------------
Data : precipitation_amount(cf_role=timeseries_id(3), time(9))
Dimension coords: time(9) = [2000-01-16 12:00:00, ..., --] gregorian
Auxiliary coords: latitude(cf_role=timeseries_id(3)) = [25.0, 7.0, --] degrees_north
: longitude(cf_role=timeseries_id(3)) = [10.0, 40.0, --] degrees_east
: cf_role=timeseries_id(cf_role=timeseries_id(3)) = [x1, y2, --]
: altitude(cf_role=timeseries_id(3), 3, 4) = [[[1.0, ..., --]]] m
Coord references: grid_mapping_name:latitude_longitude
>>> print(h.array)
[[1.0 2.0 3.0 4.0 -- -- -- -- --]
[5.0 6.0 7.0 8.0 -- -- -- -- --]
[ -- -- -- -- -- -- -- -- --]]

>>> print(f.pad_missing('time', to_size=6))
Field: precipitation_amount (ncvar%pr)
--------------------------------------
Data : precipitation_amount(cf_role=timeseries_id(2), time(6))
Dimension coords: time(6) = [2000-01-16 12:00:00, ..., --] gregorian
Auxiliary coords: latitude(cf_role=timeseries_id(2)) = [25.0, 7.0] degrees_north
: longitude(cf_role=timeseries_id(2)) = [10.0, 40.0] degrees_east
: cf_role=timeseries_id(cf_role=timeseries_id(2)) = [x1, y2]
: altitude(cf_role=timeseries_id(2), 3, 4) = [[[1.0, ..., --]]] m
Coord references: grid_mapping_name:latitude_longitude

"""
f = _inplace_enabled_define_and_cleanup(self)

try:
axis1 = f._parse_axes(axis)
except ValueError:
raise ValueError(
f"Can't pad_missing: Bad axis specification: {axis!r}"
)

if len(axis1) != 1:
raise ValueError(
f"Can't pad_missing: Bad axis specification: {axis!r}"
)

data_axes = f.get_data_axes()
axis = axis1[0]
iaxis = data_axes.index(axis)

# Pad the field
super(Field, f).pad_missing(
iaxis, pad_width=pad_width, to_size=to_size, inplace=True
)

# Set new domain axis size
domain_axis = f.domain_axis(axis)
domain_axis.set_size(f.shape[iaxis])

data_axes = f.constructs.data_axes()
for key, construct in f.constructs.filter_by_data(todict=True).items():
construct_axes = data_axes[key]
if axis not in construct_axes:
continue

# Pad the construct
iaxis = construct_axes.index(axis)
construct.pad_missing(
iaxis, pad_width=pad_width, to_size=to_size, inplace=True
)

return f

def percentile(
self,
ranks,
Expand Down
32 changes: 32 additions & 0 deletions cf/mixin/propertiesdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1888,6 +1888,38 @@ def minimum(self):
"ERROR: Can't get the minimum when there is no data array"
)

@_inplace_enabled(default=False)
def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False):
"""Pad an axis with missing data.

:Parameters:

axis: `int`
Select the axis for which the padding is to be
applied.

{{pad_width: sequence of `int`, optional}}

{{to_size: `int`, optional}}

{{inplace: `bool`, optional}}

:Returns:

`{{class}}` or `None`
The {{class}} with padded data, or `None` if the
operation was in-place.

"""
return self._apply_data_oper(
_inplace_enabled_define_and_cleanup(self),
"pad_missing",
axis=axis,
pad_width=pad_width,
to_size=to_size,
inplace=inplace,
)

def period(self, *value, **config):
"""Return or set the period of the data.

Expand Down
34 changes: 34 additions & 0 deletions cf/mixin/propertiesdatabounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -3874,6 +3874,40 @@ def inspect(self):
"""
print(cf_inspect(self)) # pragma: no cover

@_inplace_enabled(default=False)
def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False):
"""Pad an axis with missing data.

:Parameters:

axis: `int`
Select the axis for which the padding is to be
applied.

{{pad_width: sequence of `int`, optional}}

{{to_size: `int`, optional}}

{{inplace: `bool`, optional}}

:Returns:

`{{class}}` or `None`
The {{class}} with padded data, or `None` if the
operation was in-place.

"""
return self._apply_superclass_data_oper(
_inplace_enabled_define_and_cleanup(self),
"pad_missing",
bounds=True,
interior_ring=True,
axis=axis,
pad_width=pad_width,
to_size=to_size,
inplace=inplace,
)

def period(self, *value, **config):
"""Return or set the period for cyclic values.

Expand Down
Loading