diff --git a/.rodare.json b/.rodare.json index dd52734dea..d405ca4a24 100644 --- a/.rodare.json +++ b/.rodare.json @@ -96,6 +96,11 @@ "name": "Ganyushin, Dmitry", "orcid": "0000-0001-7337-2161", "type": "Other" + }, + { + "affiliation": "NVIDIA", + "name": "Kirkham, John", + "type": "Other" } ], "title": "C++ & Python API for Scientific I/O with openPMD", diff --git a/README.md b/README.md index eb762fa78f..b2e83598b4 100644 --- a/README.md +++ b/README.md @@ -401,6 +401,8 @@ Further thanks go to improvements and contributions from: report on NVCC warnings * [Dmitry Ganyushin (ORNL)](https://github.com/dmitry-ganyushin): Dask dataframe support +* [John Kirkham (NVIDIA)](https://github.com/jakirkham): + Dask guidance & reviews ### Grants diff --git a/src/binding/python/openpmd_api/DaskDataFrame.py b/src/binding/python/openpmd_api/DaskDataFrame.py index fecdcc27f4..b942fa86fa 100644 --- a/src/binding/python/openpmd_api/DaskDataFrame.py +++ b/src/binding/python/openpmd_api/DaskDataFrame.py @@ -2,7 +2,7 @@ This file is part of the openPMD-api. Copyright 2021 openPMD contributors -Authors: Axel Huebl, Dmitry Ganyushin +Authors: Axel Huebl, Dmitry Ganyushin, John Kirkham License: LGPLv3+ """ import numpy as np @@ -19,6 +19,11 @@ found_pandas = False +def read_chunk_to_df(species, chunk): + stride = np.s_[chunk.offset[0]:chunk.extent[0]] + return species.to_df(stride) + + def particles_to_daskdataframe(particle_species): """ Load all records of a particle species into a Dask DataFrame. @@ -74,13 +79,9 @@ def particles_to_daskdataframe(particle_species): if chunks: break - def read_chunk(species, chunk): - stride = np.s_[chunk.offset[0]:chunk.extent[0]] - return species.to_df(stride) - # merge DataFrames dfs = [ - delayed(read_chunk)(particle_species, chunk) for chunk in chunks + delayed(read_chunk_to_df)(particle_species, chunk) for chunk in chunks ] df = dd.from_delayed(dfs)