@@ -4524,23 +4524,75 @@ def to_array(self, dim="variable", name=None):
45244524 data , coords , dims , attrs = self .attrs , name = name , indexes = indexes
45254525 )
45264526
4527- def _to_dataframe (self , ordered_dims ):
4527+ def _normalize_dim_order (
4528+ self , dim_order : List [Hashable ] = None
4529+ ) -> Dict [Hashable , int ]:
4530+ """
4531+ Check the validity of the provided dimensions if any and return the mapping
4532+ between dimension name and their size.
4533+
4534+ Parameters
4535+ ----------
4536+ dim_order
4537+ Dimension order to validate (default to the alphabetical order if None).
4538+
4539+ Returns
4540+ -------
4541+ result
4542+ Validated dimensions mapping.
4543+
4544+ """
4545+ if dim_order is None :
4546+ dim_order = list (self .dims )
4547+ elif set (dim_order ) != set (self .dims ):
4548+ raise ValueError (
4549+ "dim_order {} does not match the set of dimensions of this "
4550+ "Dataset: {}" .format (dim_order , list (self .dims ))
4551+ )
4552+
4553+ ordered_dims = {k : self .dims [k ] for k in dim_order }
4554+
4555+ return ordered_dims
4556+
4557+ def _to_dataframe (self , ordered_dims : Mapping [Hashable , int ]):
45284558 columns = [k for k in self .variables if k not in self .dims ]
45294559 data = [
45304560 self ._variables [k ].set_dims (ordered_dims ).values .reshape (- 1 )
45314561 for k in columns
45324562 ]
4533- index = self .coords .to_index (ordered_dims )
4563+ index = self .coords .to_index ([ * ordered_dims ] )
45344564 return pd .DataFrame (dict (zip (columns , data )), index = index )
45354565
4536- def to_dataframe (self ) :
4566+ def to_dataframe (self , dim_order : List [ Hashable ] = None ) -> pd . DataFrame :
45374567 """Convert this dataset into a pandas.DataFrame.
45384568
45394569 Non-index variables in this dataset form the columns of the
4540- DataFrame. The DataFrame is be indexed by the Cartesian product of
4570+ DataFrame. The DataFrame is indexed by the Cartesian product of
45414571 this dataset's indices.
4572+
4573+ Parameters
4574+ ----------
4575+ dim_order
4576+ Hierarchical dimension order for the resulting dataframe. All
4577+ arrays are transposed to this order and then written out as flat
4578+ vectors in contiguous order, so the last dimension in this list
4579+ will be contiguous in the resulting DataFrame. This has a major
4580+ influence on which operations are efficient on the resulting
4581+ dataframe.
4582+
4583+ If provided, must include all dimensions of this dataset. By
4584+ default, dimensions are sorted alphabetically.
4585+
4586+ Returns
4587+ -------
4588+ result
4589+ Dataset as a pandas DataFrame.
4590+
45424591 """
4543- return self ._to_dataframe (self .dims )
4592+
4593+ ordered_dims = self ._normalize_dim_order (dim_order = dim_order )
4594+
4595+ return self ._to_dataframe (ordered_dims = ordered_dims )
45444596
45454597 def _set_sparse_data_from_dataframe (
45464598 self , idx : pd .Index , arrays : List [Tuple [Hashable , np .ndarray ]], dims : tuple
@@ -4694,11 +4746,11 @@ def to_dask_dataframe(self, dim_order=None, set_index=False):
46944746 influence on which operations are efficient on the resulting dask
46954747 dataframe.
46964748
4697- If provided, must include all dimensions on this dataset. By
4749+ If provided, must include all dimensions of this dataset. By
46984750 default, dimensions are sorted alphabetically.
46994751 set_index : bool, optional
47004752 If set_index=True, the dask DataFrame is indexed by this dataset's
4701- coordinate. Since dask DataFrames to not support multi-indexes,
4753+ coordinate. Since dask DataFrames do not support multi-indexes,
47024754 set_index only works if the dataset only contains one dimension.
47034755
47044756 Returns
@@ -4709,15 +4761,7 @@ def to_dask_dataframe(self, dim_order=None, set_index=False):
47094761 import dask .array as da
47104762 import dask .dataframe as dd
47114763
4712- if dim_order is None :
4713- dim_order = list (self .dims )
4714- elif set (dim_order ) != set (self .dims ):
4715- raise ValueError (
4716- "dim_order {} does not match the set of dimensions on this "
4717- "Dataset: {}" .format (dim_order , list (self .dims ))
4718- )
4719-
4720- ordered_dims = {k : self .dims [k ] for k in dim_order }
4764+ ordered_dims = self ._normalize_dim_order (dim_order = dim_order )
47214765
47224766 columns = list (ordered_dims )
47234767 columns .extend (k for k in self .coords if k not in self .dims )
@@ -4744,6 +4788,8 @@ def to_dask_dataframe(self, dim_order=None, set_index=False):
47444788 df = dd .concat (series_list , axis = 1 )
47454789
47464790 if set_index :
4791+ dim_order = [* ordered_dims ]
4792+
47474793 if len (dim_order ) == 1 :
47484794 (dim ,) = dim_order
47494795 df = df .set_index (dim )
0 commit comments