flixOpt · FBumann · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026
diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py
@@ -898,31 +898,42 @@ def _calculate_clustering_weights(ds) -> dict[str, float]:
     def _build_cluster_config_with_weights(
         cluster: ClusterConfig | None,
         auto_weights: dict[str, float],
+        available_columns: set[str] | None = None,
     ) -> ClusterConfig:
         """Merge auto-calculated weights into ClusterConfig.
 
         Args:
             cluster: Optional user-provided ClusterConfig.
             auto_weights: Automatically calculated weights based on data variance.
+            available_columns: Column names present in the clustering DataFrame.
+                If provided, weights are filtered to only include these columns.
+                This prevents tsam errors when some time series are dropped
+                (e.g., constant arrays removed before clustering).
 
         Returns:
             ClusterConfig with weights set (either user-provided or auto-calculated).
         """
         from tsam import ClusterConfig
 
-        # User provided ClusterConfig with weights - use as-is
+        # Determine weights: user-provided take priority over auto-calculated
         if cluster is not None and cluster.weights is not None:
-            return cluster
+            weights = dict(cluster.weights)
+        else:
+            weights = auto_weights
+
+        # Filter weights to only include columns present in the clustering data
+        if available_columns is not None:
+            weights = {name: w for name, w in weights.items() if name in available_columns}
 
-        # No ClusterConfig provided - use defaults with auto-calculated weights
+        # No ClusterConfig provided - use defaults with weights
         if cluster is None:
-            return ClusterConfig(weights=auto_weights)
+            return ClusterConfig(weights=weights)
 
-        # ClusterConfig provided without weights - add auto-calculated weights
+        # ClusterConfig provided - use its settings with (possibly filtered) weights
         return ClusterConfig(
             method=cluster.method,
             representation=cluster.representation,
-            weights=auto_weights,
+            weights=weights,
             normalize_column_means=cluster.normalize_column_means,
             use_duration_curves=cluster.use_duration_curves,
             include_period_sums=cluster.include_period_sums,
@@ -1762,12 +1773,11 @@ def to_clean_key(period_label, scenario_label) -> tuple:
                 with warnings.catch_warnings():
                     warnings.filterwarnings('ignore', category=UserWarning, message='.*minimal value.*exceeds.*')
 
-                    # Build ClusterConfig with auto-calculated weights
+                    # Build ClusterConfig with auto-calculated weights, filtered to available columns
                     clustering_weights = self._calculate_clustering_weights(ds_slice)
-                    filtered_weights = {
-                        name: w for name, w in clustering_weights.items() if name in df_for_clustering.columns
-                    }
-                    cluster_config = self._build_cluster_config_with_weights(cluster, filtered_weights)
+                    cluster_config = self._build_cluster_config_with_weights(
+                        cluster, clustering_weights, available_columns=set(df_for_clustering.columns)
+                    )
 
                     # Perform clustering based on selected data_vars (or all if not specified)
                     aggregation_results[key] = tsam.aggregate(

diff --git a/tests/test_clustering/test_integration.py b/tests/test_clustering/test_integration.py
@@ -335,6 +335,130 @@ def test_tsam_kwargs_passthrough(self, basic_flow_system):
         )
         assert len(fs_clustered.clusters) == 2
 
+    def test_extra_weight_keys_filtered(self, basic_flow_system):
+        """Test that extra keys in ClusterConfig.weights are filtered out.
+
+        Regression test: tsam raises errors when weights contain keys not present
+        in the clustering DataFrame. Extra keys can arise when constant columns
+        are dropped before clustering, or when the user specifies weights for
+        variables not in the FlowSystem.
+        """
+        from tsam import ClusterConfig
+
+        # Get actual clustering column names
+        clustering_data = basic_flow_system.transform.clustering_data()
+        real_columns = list(clustering_data.data_vars)
+
+        # Build weights with real keys + extra bogus keys
+        weights = {col: 1.0 for col in real_columns}
+        weights['nonexistent_variable'] = 0.5
+        weights['another_missing_col'] = 0.3
+
+        # Must not raise despite extra weight keys
+        fs_clustered = basic_flow_system.transform.cluster(
+            n_clusters=2,
+            cluster_duration='1D',
+            cluster=ClusterConfig(weights=weights),
+        )
+        assert len(fs_clustered.clusters) == 2
+
+    def test_extra_weight_keys_filtered_with_constant_column(self):
+        """Test that weights for constant (dropped) columns are filtered out.
+
+        When a time series is constant over time it is removed before clustering.
+        User-provided weights referencing such columns must be silently dropped.
+        """
+        pytest.importorskip('tsam')
+        from tsam import ClusterConfig
+
+        from flixopt import Bus, Flow, Sink, Source
+        from flixopt.core import TimeSeriesData
+
+        n_hours = 168  # 7 days
+        fs = FlowSystem(timesteps=pd.date_range('2024-01-01', periods=n_hours, freq='h'))
+
+        demand_data = np.sin(np.linspace(0, 14 * np.pi, n_hours)) + 2
+        bus = Bus('electricity')
+        grid_flow = Flow('grid_in', bus='electricity', size=100)
+        # One varying profile, one constant profile
+        demand_flow = Flow(
+            'demand_out',
+            bus='electricity',
+            size=100,
+            fixed_relative_profile=TimeSeriesData(demand_data / 100),
+        )
+        constant_flow = Flow(
+            'constant_out',
+            bus='electricity',
+            size=50,
+            fixed_relative_profile=TimeSeriesData(np.full(n_hours, 0.8)),
+        )
+        source = Source('grid', outputs=[grid_flow])
+        sink = Sink('demand', inputs=[demand_flow])
+        constant_sink = Sink('constant_load', inputs=[constant_flow])
+        fs.add_elements(source, sink, constant_sink, bus)
+
+        # The constant column name (find it from clustering_data)
+        all_data = fs.transform.clustering_data()
+        all_columns = set(all_data.data_vars)
+
+        # Build weights that reference ALL columns (including the constant one
+        # that will be dropped) plus an extra nonexistent one
+        weights = {col: 1.0 for col in all_columns}
+        weights['totally_fake_column'] = 0.5
+
+        # Before the fix, this would raise in tsam due to extra weight keys
+        fs_clustered = fs.transform.cluster(
+            n_clusters=2,
+            cluster_duration='1D',
+            cluster=ClusterConfig(weights=weights),
+        )
+        assert len(fs_clustered.clusters) == 2
+
+    def test_extra_weight_keys_filtered_multiperiod(self):
+        """Test that extra weight keys are filtered in multi-period clustering.
+
+        Each period is clustered independently; weights must be filtered per
+        slice so no extra keys leak through to tsam.
+        """
+        pytest.importorskip('tsam')
+        from tsam import ClusterConfig
+
+        from flixopt import Bus, Flow, Sink, Source
+        from flixopt.core import TimeSeriesData
+
+        n_hours = 168  # 7 days
+        fs = FlowSystem(
+            timesteps=pd.date_range('2024-01-01', periods=n_hours, freq='h'),
+            periods=pd.Index([2025, 2030], name='period'),
+        )
+
+        demand_data = np.sin(np.linspace(0, 14 * np.pi, n_hours)) + 2
+        bus = Bus('electricity')
+        grid_flow = Flow('grid_in', bus='electricity', size=100)
+        demand_flow = Flow(
+            'demand_out',
+            bus='electricity',
+            size=100,
+            fixed_relative_profile=TimeSeriesData(demand_data / 100),
+        )
+        source = Source('grid', outputs=[grid_flow])
+        sink = Sink('demand', inputs=[demand_flow])
+        fs.add_elements(source, sink, bus)
+
+        # Weights with extra keys that don't exist in any period slice
+        clustering_data = fs.transform.clustering_data()
+        weights = {col: 1.0 for col in clustering_data.data_vars}
+        weights['nonexistent_period_var'] = 0.7
+
+        fs_clustered = fs.transform.cluster(
+            n_clusters=2,
+            cluster_duration='1D',
+            cluster=ClusterConfig(weights=weights),
+        )
+        assert len(fs_clustered.clusters) == 2
+        assert 'period' in fs_clustered.clustering.metrics.dims
+
     def test_metrics_with_periods(self):
         """Test that metrics have period dimension for multi-period FlowSystems."""
         pytest.importorskip('tsam')