Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 21 additions & 11 deletions flixopt/transform_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,31 +898,42 @@ def _calculate_clustering_weights(ds) -> dict[str, float]:
def _build_cluster_config_with_weights(
cluster: ClusterConfig | None,
auto_weights: dict[str, float],
available_columns: set[str] | None = None,
) -> ClusterConfig:
"""Merge auto-calculated weights into ClusterConfig.

Args:
cluster: Optional user-provided ClusterConfig.
auto_weights: Automatically calculated weights based on data variance.
available_columns: Column names present in the clustering DataFrame.
If provided, weights are filtered to only include these columns.
This prevents tsam errors when some time series are dropped
(e.g., constant arrays removed before clustering).

Returns:
ClusterConfig with weights set (either user-provided or auto-calculated).
"""
from tsam import ClusterConfig

# User provided ClusterConfig with weights - use as-is
# Determine weights: user-provided take priority over auto-calculated
if cluster is not None and cluster.weights is not None:
return cluster
weights = dict(cluster.weights)
else:
weights = auto_weights

# Filter weights to only include columns present in the clustering data
if available_columns is not None:
weights = {name: w for name, w in weights.items() if name in available_columns}

# No ClusterConfig provided - use defaults with auto-calculated weights
# No ClusterConfig provided - use defaults with weights
if cluster is None:
return ClusterConfig(weights=auto_weights)
return ClusterConfig(weights=weights)

# ClusterConfig provided without weights - add auto-calculated weights
# ClusterConfig provided - use its settings with (possibly filtered) weights
return ClusterConfig(
method=cluster.method,
representation=cluster.representation,
weights=auto_weights,
weights=weights,
normalize_column_means=cluster.normalize_column_means,
use_duration_curves=cluster.use_duration_curves,
include_period_sums=cluster.include_period_sums,
Expand Down Expand Up @@ -1762,12 +1773,11 @@ def to_clean_key(period_label, scenario_label) -> tuple:
with warnings.catch_warnings():
warnings.filterwarnings('ignore', category=UserWarning, message='.*minimal value.*exceeds.*')

# Build ClusterConfig with auto-calculated weights
# Build ClusterConfig with auto-calculated weights, filtered to available columns
clustering_weights = self._calculate_clustering_weights(ds_slice)
filtered_weights = {
name: w for name, w in clustering_weights.items() if name in df_for_clustering.columns
}
cluster_config = self._build_cluster_config_with_weights(cluster, filtered_weights)
cluster_config = self._build_cluster_config_with_weights(
cluster, clustering_weights, available_columns=set(df_for_clustering.columns)
)

# Perform clustering based on selected data_vars (or all if not specified)
aggregation_results[key] = tsam.aggregate(
Expand Down
124 changes: 124 additions & 0 deletions tests/test_clustering/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,130 @@ def test_tsam_kwargs_passthrough(self, basic_flow_system):
)
assert len(fs_clustered.clusters) == 2

def test_extra_weight_keys_filtered(self, basic_flow_system):
"""Test that extra keys in ClusterConfig.weights are filtered out.

Regression test: tsam raises errors when weights contain keys not present
in the clustering DataFrame. Extra keys can arise when constant columns
are dropped before clustering, or when the user specifies weights for
variables not in the FlowSystem.
"""
from tsam import ClusterConfig

# Get actual clustering column names
clustering_data = basic_flow_system.transform.clustering_data()
real_columns = list(clustering_data.data_vars)

# Build weights with real keys + extra bogus keys
weights = {col: 1.0 for col in real_columns}
weights['nonexistent_variable'] = 0.5
weights['another_missing_col'] = 0.3

# Must not raise despite extra weight keys
fs_clustered = basic_flow_system.transform.cluster(
n_clusters=2,
cluster_duration='1D',
cluster=ClusterConfig(weights=weights),
)
assert len(fs_clustered.clusters) == 2

def test_extra_weight_keys_filtered_with_constant_column(self):
"""Test that weights for constant (dropped) columns are filtered out.

When a time series is constant over time it is removed before clustering.
User-provided weights referencing such columns must be silently dropped.
"""
pytest.importorskip('tsam')
from tsam import ClusterConfig

from flixopt import Bus, Flow, Sink, Source
from flixopt.core import TimeSeriesData

n_hours = 168 # 7 days
fs = FlowSystem(timesteps=pd.date_range('2024-01-01', periods=n_hours, freq='h'))

demand_data = np.sin(np.linspace(0, 14 * np.pi, n_hours)) + 2
bus = Bus('electricity')
grid_flow = Flow('grid_in', bus='electricity', size=100)
# One varying profile, one constant profile
demand_flow = Flow(
'demand_out',
bus='electricity',
size=100,
fixed_relative_profile=TimeSeriesData(demand_data / 100),
)
constant_flow = Flow(
'constant_out',
bus='electricity',
size=50,
fixed_relative_profile=TimeSeriesData(np.full(n_hours, 0.8)),
)
source = Source('grid', outputs=[grid_flow])
sink = Sink('demand', inputs=[demand_flow])
constant_sink = Sink('constant_load', inputs=[constant_flow])
fs.add_elements(source, sink, constant_sink, bus)

# The constant column name (find it from clustering_data)
all_data = fs.transform.clustering_data()
all_columns = set(all_data.data_vars)

# Build weights that reference ALL columns (including the constant one
# that will be dropped) plus an extra nonexistent one
weights = {col: 1.0 for col in all_columns}
weights['totally_fake_column'] = 0.5

# Before the fix, this would raise in tsam due to extra weight keys
fs_clustered = fs.transform.cluster(
n_clusters=2,
cluster_duration='1D',
cluster=ClusterConfig(weights=weights),
)
assert len(fs_clustered.clusters) == 2

def test_extra_weight_keys_filtered_multiperiod(self):
"""Test that extra weight keys are filtered in multi-period clustering.

Each period is clustered independently; weights must be filtered per
slice so no extra keys leak through to tsam.
"""
pytest.importorskip('tsam')
from tsam import ClusterConfig

from flixopt import Bus, Flow, Sink, Source
from flixopt.core import TimeSeriesData

n_hours = 168 # 7 days
fs = FlowSystem(
timesteps=pd.date_range('2024-01-01', periods=n_hours, freq='h'),
periods=pd.Index([2025, 2030], name='period'),
)

demand_data = np.sin(np.linspace(0, 14 * np.pi, n_hours)) + 2
bus = Bus('electricity')
grid_flow = Flow('grid_in', bus='electricity', size=100)
demand_flow = Flow(
'demand_out',
bus='electricity',
size=100,
fixed_relative_profile=TimeSeriesData(demand_data / 100),
)
source = Source('grid', outputs=[grid_flow])
sink = Sink('demand', inputs=[demand_flow])
fs.add_elements(source, sink, bus)

# Weights with extra keys that don't exist in any period slice
clustering_data = fs.transform.clustering_data()
weights = {col: 1.0 for col in clustering_data.data_vars}
weights['nonexistent_period_var'] = 0.7

fs_clustered = fs.transform.cluster(
n_clusters=2,
cluster_duration='1D',
cluster=ClusterConfig(weights=weights),
)
assert len(fs_clustered.clusters) == 2
assert 'period' in fs_clustered.clustering.metrics.dims

def test_metrics_with_periods(self):
"""Test that metrics have period dimension for multi-period FlowSystems."""
pytest.importorskip('tsam')
Expand Down
Loading