@@ -23,7 +23,7 @@ def naive_dataset_resample(dataset: xr.Dataset, freq: str, method: str) -> xr.Da
2323 return getattr (dataset .resample (time = freq ), method )()
2424
2525
26- def create_dataset_with_mixed_dimensions (n_timesteps = 100 ):
26+ def create_dataset_with_mixed_dimensions (n_timesteps = 48 ):
2727 """
2828 Create a dataset with variables having different dimension structures.
2929
@@ -37,7 +37,7 @@ def create_dataset_with_mixed_dimensions(n_timesteps=100):
3737 ds = xr .Dataset (
3838 coords = {
3939 'time' : timesteps ,
40- 'component' : ['comp1' , 'comp2' , 'comp3' ],
40+ 'component' : ['comp1' , 'comp2' ],
4141 'bus' : ['bus1' , 'bus2' ],
4242 'scenario' : ['base' , 'alt' ],
4343 }
@@ -51,7 +51,7 @@ def create_dataset_with_mixed_dimensions(n_timesteps=100):
5151
5252 # Variable with time + component
5353 ds ['component_flow' ] = xr .DataArray (
54- np .random .randn (n_timesteps , 3 ),
54+ np .random .randn (n_timesteps , 2 ),
5555 dims = ['time' , 'component' ],
5656 )
5757
@@ -63,7 +63,7 @@ def create_dataset_with_mixed_dimensions(n_timesteps=100):
6363
6464 # Variable with time + component + bus
6565 ds ['flow_on_bus' ] = xr .DataArray (
66- np .random .randn (n_timesteps , 3 , 2 ),
66+ np .random .randn (n_timesteps , 2 , 2 ),
6767 dims = ['time' , 'component' , 'bus' ],
6868 )
6969
@@ -75,7 +75,7 @@ def create_dataset_with_mixed_dimensions(n_timesteps=100):
7575
7676 # Variable with time + component + scenario
7777 ds ['component_scenario_flow' ] = xr .DataArray (
78- np .random .randn (n_timesteps , 3 , 2 ),
78+ np .random .randn (n_timesteps , 2 , 2 ),
7979 dims = ['time' , 'component' , 'scenario' ],
8080 )
8181
@@ -109,12 +109,12 @@ def test_resample_equivalence_single_dimension(method):
109109 """
110110 Test with variables having only time dimension.
111111 """
112- timesteps = pd .date_range ('2020-01-01' , periods = 100 , freq = 'h' )
112+ timesteps = pd .date_range ('2020-01-01' , periods = 48 , freq = 'h' )
113113
114114 ds = xr .Dataset (coords = {'time' : timesteps })
115- ds ['var1' ] = xr .DataArray (np .random .randn (100 ), dims = ['time' ])
116- ds ['var2' ] = xr .DataArray (np .random .randn (100 ) * 10 , dims = ['time' ])
117- ds ['var3' ] = xr .DataArray (np .random .randn (100 ) / 5 , dims = ['time' ])
115+ ds ['var1' ] = xr .DataArray (np .random .randn (48 ), dims = ['time' ])
116+ ds ['var2' ] = xr .DataArray (np .random .randn (48 ) * 10 , dims = ['time' ])
117+ ds ['var3' ] = xr .DataArray (np .random .randn (48 ) / 5 , dims = ['time' ])
118118
119119 # Optimized approach
120120 result_optimized = fx .FlowSystem ._resample_by_dimension_groups (ds , '2h' , method )
@@ -130,7 +130,7 @@ def test_resample_equivalence_empty_dataset():
130130 """
131131 Test with an empty dataset (edge case).
132132 """
133- timesteps = pd .date_range ('2020-01-01' , periods = 100 , freq = 'h' )
133+ timesteps = pd .date_range ('2020-01-01' , periods = 48 , freq = 'h' )
134134 ds = xr .Dataset (coords = {'time' : timesteps })
135135
136136 # Both should handle empty dataset gracefully
@@ -144,9 +144,9 @@ def test_resample_equivalence_single_variable():
144144 """
145145 Test with a single variable.
146146 """
147- timesteps = pd .date_range ('2020-01-01' , periods = 100 , freq = 'h' )
147+ timesteps = pd .date_range ('2020-01-01' , periods = 48 , freq = 'h' )
148148 ds = xr .Dataset (coords = {'time' : timesteps })
149- ds ['single_var' ] = xr .DataArray (np .random .randn (100 ), dims = ['time' ])
149+ ds ['single_var' ] = xr .DataArray (np .random .randn (48 ), dims = ['time' ])
150150
151151 # Test multiple methods
152152 for method in ['mean' , 'sum' , 'max' , 'min' ]:
@@ -160,14 +160,14 @@ def test_resample_equivalence_with_nans():
160160 """
161161 Test with NaN values to ensure they're handled consistently.
162162 """
163- timesteps = pd .date_range ('2020-01-01' , periods = 100 , freq = 'h' )
163+ timesteps = pd .date_range ('2020-01-01' , periods = 48 , freq = 'h' )
164164
165165 ds = xr .Dataset (coords = {'time' : timesteps , 'component' : ['a' , 'b' ]})
166166
167167 # Create variable with some NaN values
168- data = np .random .randn (100 , 2 )
169- data [10 : 20 , 0 ] = np .nan
170- data [50 : 55 , 1 ] = np .nan
168+ data = np .random .randn (48 , 2 )
169+ data [5 : 10 , 0 ] = np .nan
170+ data [20 : 25 , 1 ] = np .nan
171171
172172 ds ['var_with_nans' ] = xr .DataArray (data , dims = ['time' , 'component' ])
173173
@@ -183,31 +183,31 @@ def test_resample_equivalence_different_dimension_orders():
183183 """
184184 Test that dimension order doesn't affect the equivalence.
185185 """
186- timesteps = pd .date_range ('2020-01-01' , periods = 100 , freq = 'h' )
186+ timesteps = pd .date_range ('2020-01-01' , periods = 48 , freq = 'h' )
187187
188188 ds = xr .Dataset (
189189 coords = {
190190 'time' : timesteps ,
191- 'x' : ['x1' , 'x2' , 'x3' ],
191+ 'x' : ['x1' , 'x2' ],
192192 'y' : ['y1' , 'y2' ],
193193 }
194194 )
195195
196196 # Variable with time first
197197 ds ['var_time_first' ] = xr .DataArray (
198- np .random .randn (100 , 3 , 2 ),
198+ np .random .randn (48 , 2 , 2 ),
199199 dims = ['time' , 'x' , 'y' ],
200200 )
201201
202202 # Variable with time in middle
203203 ds ['var_time_middle' ] = xr .DataArray (
204- np .random .randn (3 , 100 , 2 ),
204+ np .random .randn (2 , 48 , 2 ),
205205 dims = ['x' , 'time' , 'y' ],
206206 )
207207
208208 # Variable with time last
209209 ds ['var_time_last' ] = xr .DataArray (
210- np .random .randn (3 , 2 , 100 ),
210+ np .random .randn (2 , 2 , 48 ),
211211 dims = ['x' , 'y' , 'time' ],
212212 )
213213
@@ -225,14 +225,14 @@ def test_resample_equivalence_multiple_variables_same_dims():
225225 This is the key optimization case - variables with same dims should be
226226 grouped and resampled together.
227227 """
228- timesteps = pd .date_range ('2020-01-01' , periods = 100 , freq = 'h' )
228+ timesteps = pd .date_range ('2020-01-01' , periods = 48 , freq = 'h' )
229229
230230 ds = xr .Dataset (coords = {'time' : timesteps , 'location' : ['A' , 'B' , 'C' ]})
231231
232232 # Multiple variables with same dimensions (time, location)
233- for i in range (5 ):
233+ for i in range (3 ):
234234 ds [f'var_{ i } ' ] = xr .DataArray (
235- np .random .randn (100 , 3 ),
235+ np .random .randn (48 , 3 ),
236236 dims = ['time' , 'location' ],
237237 )
238238
@@ -247,21 +247,21 @@ def test_resample_equivalence_large_dataset():
247247 """
248248 Test with a larger, more realistic dataset.
249249 """
250- timesteps = pd .date_range ('2020-01-01' , periods = 8760 , freq = 'h' ) # Full year
250+ timesteps = pd .date_range ('2020-01-01' , periods = 168 , freq = 'h' ) # One week
251251
252252 ds = xr .Dataset (
253253 coords = {
254254 'time' : timesteps ,
255- 'component' : [f'comp_{ i } ' for i in range (10 )],
256- 'bus' : [f'bus_{ i } ' for i in range (5 )],
255+ 'component' : [f'comp_{ i } ' for i in range (5 )],
256+ 'bus' : [f'bus_{ i } ' for i in range (3 )],
257257 }
258258 )
259259
260260 # Various variable types
261- ds ['simple_var' ] = xr .DataArray (np .random .randn (8760 ), dims = ['time' ])
262- ds ['component_var' ] = xr .DataArray (np .random .randn (8760 , 10 ), dims = ['time' , 'component' ])
263- ds ['bus_var' ] = xr .DataArray (np .random .randn (8760 , 5 ), dims = ['time' , 'bus' ])
264- ds ['complex_var' ] = xr .DataArray (np .random .randn (8760 , 10 , 5 ), dims = ['time' , 'component' , 'bus' ])
261+ ds ['simple_var' ] = xr .DataArray (np .random .randn (168 ), dims = ['time' ])
262+ ds ['component_var' ] = xr .DataArray (np .random .randn (168 , 5 ), dims = ['time' , 'component' ])
263+ ds ['bus_var' ] = xr .DataArray (np .random .randn (168 , 3 ), dims = ['time' , 'bus' ])
264+ ds ['complex_var' ] = xr .DataArray (np .random .randn (168 , 5 , 3 ), dims = ['time' , 'component' , 'bus' ])
265265
266266 # Test with a subset of methods (to keep test time reasonable)
267267 for method in ['mean' , 'sum' , 'first' ]:
0 commit comments