Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 78 additions & 4 deletions src/climatebenchpress/compressor/scripts/create_error_bounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,49 @@
# derived from the ERA5 ensembles.
ERROR_BOUNDS = "https://gist.githubusercontent.com/juntyr/bbe2780256e5f91d8f2cb2f606b7935f/raw/table-raw.csv"

# Bitwise real information for no2 data computed from the CAMS dataset.
# The numbers are taken from the supplementary material of:
# https://www.nature.com/articles/s43588-021-00156-2
# "Compressing atmospheric data into its real information content"
# Milan Klöwer, Miha Razinger, Juan J. Dominguez, Peter D. Düben & Tim N. Palmer
# Exact data was extracted from the CSV file at:
# https://static-content.springer.com/esm/art%3A10.1038%2Fs43588-021-00156-2/MediaObjects/43588_2021_156_MOESM2_ESM.zip
# Accessed on: 29/08/2025.
NO2_REAL_INFORMATION = [
0.0,
0.0,
0.0,
0.82609236,
0.82609236,
0.8401368,
0.7781777,
0.65409344,
0.47173682,
0.2915112,
0.16627097,
0.08410827,
0.03733299,
0.015123328,
0.0058184885,
0.0020212175,
0.00053772517,
9.423521e-5,
8.782874e-6,
5.8530753e-7,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
]


VAR_NAME_TO_ERA5 = {
# NextGEMS Icon Outgoing Longwave Radiation (OLR).
Expand Down Expand Up @@ -60,15 +103,15 @@
ABS_ERROR = "abs_error"
REL_ERROR = "rel_error"
VAR_NAME_TO_ERROR_BOUND = {
"rlut": REL_ERROR,
"rlut": ABS_ERROR,
"agb": REL_ERROR,
"pr": REL_ERROR,
"ta": ABS_ERROR,
"tos": ABS_ERROR,
"10m_u_component_of_wind": REL_ERROR,
"10m_v_component_of_wind": REL_ERROR,
"10m_u_component_of_wind": ABS_ERROR,
"10m_v_component_of_wind": ABS_ERROR,
"mean_sea_level_pressure": ABS_ERROR,
"no2": ABS_ERROR,
"no2": REL_ERROR,
}


Expand Down Expand Up @@ -111,6 +154,10 @@ def create_error_bounds(
low_error_bounds[v], mid_error_bounds[v], high_error_bounds[v] = (
get_agb_bound(datasets, percentiles=[1.00, 0.99, 0.95])
)
elif v == "no2":
low_error_bounds[v], mid_error_bounds[v], high_error_bounds[v] = (
get_no2_bounds(percentiles=[1.00, 0.99, 0.95])
)
else:
data_range: float = (ds[v].max() - ds[v].min()).values.item() # type: ignore
low_error_bounds[v] = {
Expand Down Expand Up @@ -177,6 +224,33 @@ def get_error_bounds(
return var_ebs


def get_no2_bounds(percentiles=[1.00, 0.99, 0.95]) -> list[dict[str, Optional[float]]]:
# First we need to transform the bitwise real information into a cumulative
# distribution function.
real_information_dist = np.cumsum(NO2_REAL_INFORMATION) / np.sum(
NO2_REAL_INFORMATION
)
no2_bounds = []
for p in percentiles:
# Find the first position where cumulative distribution exceeds p.
# Add one for 1-based indexing.
keepbits = np.argmax(real_information_dist > p) + 1
Comment thread
milankl marked this conversation as resolved.
# There are 9 non-mantissa bits in the 32-bit floating point representation.
mantissa_keepbits = max(int(keepbits - 9), 0)
# 2^(-mantissa_keepbits) indicates the spacing between representable values.
# 2^(-mantissa_keepbits) / 2 = 2 ** (-mantissa_keepbits - 1)
# then is the maximum rounding error.
# See: https://en.wikipedia.org/wiki/Machine_epsilon
rel_error = 2 ** (-mantissa_keepbits - 1)
no2_bounds.append(
{
"abs_error": None,
"rel_error": float(rel_error),
}
)
return no2_bounds


def get_agb_bound(
datasets: Path, percentiles=[1.00, 0.99, 0.95]
) -> list[dict[str, Optional[float]]]:
Expand Down