Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ If upgrading from v2.x, see the [v3.0.0 release notes](https://github.com/flixOp
### 💥 Breaking Changes

### ♻️ Changed
- Greatly sped up the resampling of a FlowSystem (x20 - x40) by converting to dataarray internally

### 🗑️ Deprecated

Expand Down
94 changes: 74 additions & 20 deletions flixopt/flow_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import logging
import warnings
from collections import defaultdict
from itertools import chain
from typing import TYPE_CHECKING, Any, Literal, Optional

Expand Down Expand Up @@ -982,6 +983,65 @@ def isel(
selected_dataset = ds.isel(**indexers)
return self.__class__.from_dataset(selected_dataset)

def _resample_by_dimension_groups(
self,
time_dataset: xr.Dataset,
time: str,
method: str,
**kwargs: Any,
) -> xr.Dataset:
"""
Resample variables grouped by their dimension structure to avoid broadcasting.

This method groups variables by their non-time dimensions before resampling,
which provides two key benefits:

1. **Performance**: Resampling many variables with the same dimensions together
is significantly faster than resampling each variable individually.

2. **Safety**: Prevents xarray from broadcasting variables with different
dimensions into a larger dimensional space filled with NaNs, which would
cause memory bloat and computational inefficiency.

Example:
Without grouping (problematic):
var1: (time, location, tech) shape (8000, 10, 2)
var2: (time, region) shape (8000, 5)
concat → (variable, time, location, tech, region) ← Unwanted broadcasting!

With grouping (safe and fast):
Group 1: [var1, var3, ...] with dims (time, location, tech)
Group 2: [var2, var4, ...] with dims (time, region)
Each group resampled separately → No broadcasting, optimal performance!

Args:
time_dataset: Dataset containing only variables with time dimension
time: Resampling frequency (e.g., '2h', '1D', '1M')
method: Resampling method name (e.g., 'mean', 'sum', 'first')
**kwargs: Additional arguments passed to xarray.resample()

Returns:
Resampled dataset with original dimension structure preserved
"""
# Group variables by dimensions (excluding time)
dim_groups = defaultdict(list)
for var_name, var in time_dataset.data_vars.items():
dims_key = tuple(sorted(d for d in var.dims if d != 'time'))
dim_groups[dims_key].append(var_name)

# Handle empty case: no time-dependent variables
if not dim_groups:
return getattr(time_dataset.resample(time=time, **kwargs), method)()

# Resample each group separately
resampled_groups = []
for var_names in dim_groups.values():
grouped_dataset = time_dataset[var_names]
resampled_group = getattr(grouped_dataset.resample(time=time, **kwargs), method)()
resampled_groups.append(resampled_group)

return xr.merge(resampled_groups)

def resample(
self,
time: str,
Expand All @@ -1007,34 +1067,28 @@ def resample(
if not self.connected_and_transformed:
self.connect_and_transform()

dataset = self.to_dataset()
# Validate method before resampling
available_methods = ['mean', 'sum', 'max', 'min', 'first', 'last', 'std', 'var', 'median', 'count']
if method not in available_methods:
raise ValueError(f'Unsupported resampling method: {method}. Available: {available_methods}')

# Separate variables with and without time dimension
time_vars = {}
non_time_vars = {}
dataset = self.to_dataset()

for var_name, var in dataset.data_vars.items():
if 'time' in var.dims:
time_vars[var_name] = var
else:
non_time_vars[var_name] = var
time_var_names = [v for v in dataset.data_vars if 'time' in dataset[v].dims]
non_time_var_names = [v for v in dataset.data_vars if v not in time_var_names]

# Only resample variables that have time dimension
time_dataset = dataset[list(time_vars.keys())]
resampler = time_dataset.resample(time=time, **kwargs)
time_dataset = dataset[time_var_names]

if hasattr(resampler, method):
resampled_time_data = getattr(resampler, method)()
else:
available_methods = ['mean', 'sum', 'max', 'min', 'first', 'last', 'std', 'var', 'median', 'count']
raise ValueError(f'Unsupported resampling method: {method}. Available: {available_methods}')
# Resample with dimension grouping to avoid broadcasting
resampled_time_dataset = self._resample_by_dimension_groups(time_dataset, time, method, **kwargs)

# Combine resampled time variables with non-time variables
if non_time_vars:
non_time_dataset = dataset[list(non_time_vars.keys())]
resampled_dataset = xr.merge([resampled_time_data, non_time_dataset])
if non_time_var_names:
non_time_dataset = dataset[non_time_var_names]
resampled_dataset = xr.merge([resampled_time_dataset, non_time_dataset])
else:
resampled_dataset = resampled_time_data
resampled_dataset = resampled_time_dataset

# Let FlowSystem recalculate or use explicitly set value
resampled_dataset.attrs['hours_of_last_timestep'] = hours_of_last_timestep
Expand Down