From a6eabfb7f677509244c4f53daacdca85a182ce4c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Thu, 2 Mar 2023 13:34:07 +0100
Subject: [PATCH 01/27] Add chunk distribution algorithms

---
 include/openPMD/ChunkInfo.hpp                 | 237 +++++++++
 include/openPMD/benchmark/mpi/BlockSlicer.hpp |   4 +
 .../mpi/OneDimensionalBlockSlicer.hpp         |   2 +
 src/ChunkInfo.cpp                             | 492 ++++++++++++++++++
 .../mpi/OneDimensionalBlockSlicer.cpp         |   5 +
 5 files changed, 740 insertions(+)
diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp
index 9bc6e94972..b44379b2aa 100644
--- a/include/openPMD/ChunkInfo.hpp
+++ b/include/openPMD/ChunkInfo.hpp
@@ -23,6 +23,7 @@
 #include "openPMD/config.hpp"
 
 #include "openPMD/Dataset.hpp" // Offset, Extent
+#include "openPMD/benchmark/mpi/BlockSlicer.hpp"
 
 #if openPMD_HAVE_MPI
 #include <mpi.h>
@@ -84,7 +85,243 @@ using ChunkTable = std::vector<WrittenChunkInfo>;
 
 namespace chunk_assignment
 {
+    constexpr char const *HOSTFILE_VARNAME = "MPI_WRITTEN_HOSTFILE";
+
     using RankMeta = std::map<unsigned int, std::string>;
+
+    using Assignment = std::map<unsigned int, std::vector<WrittenChunkInfo>>;
+
+    struct PartialAssignment
+    {
+        ChunkTable notAssigned;
+        Assignment assigned;
+
+        explicit PartialAssignment() = default;
+        PartialAssignment(ChunkTable notAssigned);
+        PartialAssignment(ChunkTable notAssigned, Assignment assigned);
+    };
+
+    /**
+     * @brief Interface for a chunk distribution strategy.
+     *
+     * Used for implementing algorithms that read a ChunkTable as produced
+     * by BaseRecordComponent::availableChunks() and produce as result a
+     * ChunkTable that guides data sinks on how to load data into reading
+     * processes.
+     */
+    struct Strategy
+    {
+        Assignment assign(
+            ChunkTable,
+            RankMeta const &rankMetaIn,
+            RankMeta const &rankMetaOut);
+        /**
+         * @brief Assign chunks to be loaded to reading processes.
+         *
+         * @param partialAssignment Two chunktables, one of unassigned chunks
+         *        and one of chunks that might have already been assigned
+         *        previously.
+         *        Merge the unassigned chunks into the partially assigned table.
+         * @param in Meta information on writing processes, e.g. hostnames.
+         * @param out Meta information on reading processes, e.g. hostnames.
+         * @return ChunkTable A table that assigns chunks to reading processes.
+         */
+        virtual Assignment assign(
+            PartialAssignment partialAssignment,
+            RankMeta const &in,
+            RankMeta const &out) = 0;
+
+        virtual std::unique_ptr<Strategy> clone() const = 0;
+
+        virtual ~Strategy() = default;
+    };
+
+    /**
+     * @brief A chunk distribution strategy that guarantees no complete
+     *        distribution.
+     *
+     * Combine with a full Strategy using the FromPartialStrategy struct to
+     * obtain a Strategy that works in two phases:
+     * 1. Apply the partial strategy.
+     * 2. Apply the full strategy to assign unassigned leftovers.
+     *
+     */
+    struct PartialStrategy
+    {
+        PartialAssignment
+        assign(ChunkTable table, RankMeta const &in, RankMeta const &out);
+        /**
+         * @brief Assign chunks to be loaded to reading processes.
+         *
+         * @param partialAssignment Two chunktables, one of unassigned chunks
+         *        and one of chunks that might have already been assigned
+         *        previously.
+         *        Merge the unassigned chunks into the partially assigned table.
+         * @param in Meta information on writing processes, e.g. hostnames.
+         * @param out Meta information on reading processes, e.g. hostnames.
+         * @return PartialAssignment Two chunktables, one of leftover chunks
+         *         that were not assigned and one that assigns chunks to
+         *         reading processes.
+         */
+        virtual PartialAssignment assign(
+            PartialAssignment partialAssignment,
+            RankMeta const &in,
+            RankMeta const &out) = 0;
+
+        virtual std::unique_ptr<PartialStrategy> clone() const = 0;
+
+        virtual ~PartialStrategy() = default;
+    };
+
+    /**
+     * @brief Combine a PartialStrategy and a Strategy to obtain a Strategy
+     *        working in two phases.
+     *
+     * 1. Apply the PartialStrategy to obtain a PartialAssignment.
+     *    This may be a heuristic that will not work under all circumstances,
+     *    e.g. trying to distribute chunks within the same compute node.
+     * 2. Apply the Strategy to assign leftovers.
+     *    This guarantees correctness in case the heuristics in the first phase
+     *    were not applicable e.g. due to a suboptimal setup.
+     *
+     */
+    struct FromPartialStrategy : Strategy
+    {
+        FromPartialStrategy(
+            std::unique_ptr<PartialStrategy> firstPass,
+            std::unique_ptr<Strategy> secondPass);
+
+        virtual Assignment assign(
+            PartialAssignment,
+            RankMeta const &in,
+            RankMeta const &out) override;
+
+        virtual std::unique_ptr<Strategy> clone() const override;
+
+    private:
+        std::unique_ptr<PartialStrategy> m_firstPass;
+        std::unique_ptr<Strategy> m_secondPass;
+    };
+
+    /**
+     * @brief Simple strategy that assigns produced chunks to reading processes
+     *        in a round-Robin manner.
+     *
+     */
+    struct RoundRobin : Strategy
+    {
+        Assignment assign(
+            PartialAssignment,
+            RankMeta const &in,
+            RankMeta const &out) override;
+
+        virtual std::unique_ptr<Strategy> clone() const override;
+    };
+
+    /**
+     * @brief Strategy that assigns chunks to be read by processes within
+     *        the same host that produced the chunk.
+     *
+     * The distribution strategy within one such chunk can be flexibly
+     * chosen.
+     *
+     */
+    struct ByHostname : PartialStrategy
+    {
+        ByHostname(std::unique_ptr<Strategy> withinNode);
+
+        PartialAssignment assign(
+            PartialAssignment,
+            RankMeta const &in,
+            RankMeta const &out) override;
+
+        virtual std::unique_ptr<PartialStrategy> clone() const override;
+
+    private:
+        std::unique_ptr<Strategy> m_withinNode;
+    };
+
+    /**
+     * @brief Slice the n-dimensional dataset into hyperslabs and distribute
+     *        chunks according to them.
+     *
+     * This strategy only produces chunks in the returned ChunkTable for the
+     * calling parallel process.
+     * Incoming chunks are intersected with the hyperslab and assigned to the
+     * current parallel process in case this intersection is non-empty.
+     *
+     */
+    struct ByCuboidSlice : Strategy
+    {
+        ByCuboidSlice(
+            std::unique_ptr<BlockSlicer> blockSlicer,
+            Extent totalExtent,
+            unsigned int mpi_rank,
+            unsigned int mpi_size);
+
+        Assignment assign(
+            PartialAssignment,
+            RankMeta const &in,
+            RankMeta const &out) override;
+
+        virtual std::unique_ptr<Strategy> clone() const override;
+
+    private:
+        std::unique_ptr<BlockSlicer> blockSlicer;
+        Extent totalExtent;
+        unsigned int mpi_rank, mpi_size;
+    };
+
+    /**
+     * @brief Strategy that tries to assign chunks in a balanced manner without
+     *        arbitrarily cutting chunks.
+     *
+     * Idea:
+     * Calculate the ideal amount of data to be loaded per parallel process
+     * and cut chunks s.t. no chunk is larger than that ideal size.
+     * The resulting problem is an instance of the Bin-Packing problem which
+     * can be solved by a factor-2 approximation, meaning that a reading process
+     * will be assigned at worst twice the ideal amount of data.
+     *
+     */
+    struct BinPacking : Strategy
+    {
+        size_t splitAlongDimension = 0;
+
+        /**
+         * @param splitAlongDimension If a chunk needs to be split, split it
+         *        along this dimension.
+         */
+        BinPacking(size_t splitAlongDimension = 0);
+
+        Assignment assign(
+            PartialAssignment,
+            RankMeta const &in,
+            RankMeta const &out) override;
+
+        virtual std::unique_ptr<Strategy> clone() const override;
+    };
+
+    /**
+     * @brief Strategy that purposefully fails when the PartialAssignment has
+     *        leftover chunks.
+     *
+     * Useful as second phase in FromPartialStrategy to assert that the first
+     * pass of the strategy catches all blocks, e.g. to assert that all chunks
+     * can be assigned within the same compute node.
+     *
+     */
+    struct FailingStrategy : Strategy
+    {
+        explicit FailingStrategy();
+
+        Assignment assign(
+            PartialAssignment,
+            RankMeta const &in,
+            RankMeta const &out) override;
+
+        virtual std::unique_ptr<Strategy> clone() const override;
+    };
 } // namespace chunk_assignment
 
 namespace host_info
diff --git a/include/openPMD/benchmark/mpi/BlockSlicer.hpp b/include/openPMD/benchmark/mpi/BlockSlicer.hpp
index c66716217a..a720793b41 100644
--- a/include/openPMD/benchmark/mpi/BlockSlicer.hpp
+++ b/include/openPMD/benchmark/mpi/BlockSlicer.hpp
@@ -23,6 +23,8 @@
 
 #include "openPMD/Dataset.hpp"
 
+#include <memory>
+
 namespace openPMD
 {
 /**
@@ -42,6 +44,8 @@ class BlockSlicer
     virtual std::pair<Offset, Extent>
     sliceBlock(Extent &totalExtent, int size, int rank) = 0;
 
+    virtual std::unique_ptr<BlockSlicer> clone() const = 0;
+
     /** This class will be derived from
      */
     virtual ~BlockSlicer() = default;
diff --git a/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp b/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp
index 78f955524b..cb12da9350 100644
--- a/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp
+++ b/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp
@@ -35,5 +35,7 @@ class OneDimensionalBlockSlicer : public BlockSlicer
 
     std::pair<Offset, Extent>
     sliceBlock(Extent &totalExtent, int size, int rank) override;
+
+    virtual std::unique_ptr<BlockSlicer> clone() const override;
 };
 } // namespace openPMD
diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp
index 5acb1ea07e..7b6c1e32ca 100644
--- a/src/ChunkInfo.cpp
+++ b/src/ChunkInfo.cpp
@@ -23,6 +23,10 @@
 
 #include "openPMD/auxiliary/Mpi.hpp"
 
+#include <algorithm> // std::sort
+#include <iostream>
+#include <list>
+#include <map>
 #include <utility>
 
 #ifdef _WIN32
@@ -62,6 +66,494 @@ bool WrittenChunkInfo::operator==(WrittenChunkInfo const &other) const
         this->ChunkInfo::operator==(other);
 }
 
+namespace chunk_assignment
+{
+    namespace
+    {
+        std::map<std::string, std::list<unsigned int> >
+        ranksPerHost(RankMeta const &rankMeta)
+        {
+            std::map<std::string, std::list<unsigned int> > res;
+            for (auto const &pair : rankMeta)
+            {
+                auto &list = res[pair.second];
+                list.emplace_back(pair.first);
+            }
+            return res;
+        }
+    } // namespace
+
+    Assignment Strategy::assign(
+        ChunkTable table, RankMeta const &rankIn, RankMeta const &rankOut)
+    {
+        if (rankOut.size() == 0)
+        {
+            throw std::runtime_error("[assignChunks] No output ranks defined");
+        }
+        return this->assign(
+            PartialAssignment(std::move(table)), rankIn, rankOut);
+    }
+
+    PartialAssignment::PartialAssignment(
+        ChunkTable notAssigned_in, Assignment assigned_in)
+        : notAssigned(std::move(notAssigned_in))
+        , assigned(std::move(assigned_in))
+    {}
+
+    PartialAssignment::PartialAssignment(ChunkTable notAssigned_in)
+        : PartialAssignment(std::move(notAssigned_in), Assignment())
+    {}
+
+    PartialAssignment PartialStrategy::assign(
+        ChunkTable table, RankMeta const &rankIn, RankMeta const &rankOut)
+    {
+        return this->assign(
+            PartialAssignment(std::move(table)), rankIn, rankOut);
+    }
+
+    FromPartialStrategy::FromPartialStrategy(
+        std::unique_ptr<PartialStrategy> firstPass,
+        std::unique_ptr<Strategy> secondPass)
+        : m_firstPass(std::move(firstPass)), m_secondPass(std::move(secondPass))
+    {}
+
+    Assignment FromPartialStrategy::assign(
+        PartialAssignment partialAssignment,
+        RankMeta const &in,
+        RankMeta const &out)
+    {
+        return m_secondPass->assign(
+            m_firstPass->assign(std::move(partialAssignment), in, out),
+            in,
+            out);
+    }
+
+    std::unique_ptr<Strategy> FromPartialStrategy::clone() const
+    {
+        return std::unique_ptr<Strategy>(new FromPartialStrategy(
+            m_firstPass->clone(), m_secondPass->clone()));
+    }
+
+    Assignment RoundRobin::assign(
+        PartialAssignment partialAssignment,
+        RankMeta const &, // ignored parameter
+        RankMeta const &out)
+    {
+        if (out.size() == 0)
+        {
+            throw std::runtime_error(
+                "[RoundRobin] Cannot round-robin to zero ranks.");
+        }
+        auto it = out.begin();
+        auto nextRank = [&it, &out]() {
+            if (it == out.end())
+            {
+                it = out.begin();
+            }
+            auto res = it->first;
+            it++;
+            return res;
+        };
+        ChunkTable &sourceChunks = partialAssignment.notAssigned;
+        Assignment &sinkChunks = partialAssignment.assigned;
+        for (auto &chunk : sourceChunks)
+        {
+            chunk.sourceID = nextRank();
+            sinkChunks[chunk.sourceID].push_back(std::move(chunk));
+        }
+        return sinkChunks;
+    }
+
+    std::unique_ptr<Strategy> RoundRobin::clone() const
+    {
+        return std::unique_ptr<Strategy>(new RoundRobin);
+    }
+
+    ByHostname::ByHostname(std::unique_ptr<Strategy> withinNode)
+        : m_withinNode(std::move(withinNode))
+    {}
+
+    PartialAssignment ByHostname::assign(
+        PartialAssignment res, RankMeta const &in, RankMeta const &out)
+    {
+        // collect chunks by hostname
+        std::map<std::string, ChunkTable> chunkGroups;
+        ChunkTable &sourceChunks = res.notAssigned;
+        Assignment &sinkChunks = res.assigned;
+        {
+            ChunkTable leftover;
+            for (auto &chunk : sourceChunks)
+            {
+                auto it = in.find(chunk.sourceID);
+                if (it == in.end())
+                {
+                    leftover.push_back(std::move(chunk));
+                }
+                else
+                {
+                    std::string const &hostname = it->second;
+                    ChunkTable &chunksOnHost = chunkGroups[hostname];
+                    chunksOnHost.push_back(std::move(chunk));
+                }
+            }
+            // undistributed chunks will be put back in later on
+            sourceChunks.clear();
+            for (auto &chunk : leftover)
+            {
+                sourceChunks.push_back(std::move(chunk));
+            }
+        }
+        // chunkGroups will now contain chunks by hostname
+        // the ranks are the source ranks
+
+        // which ranks live on host <string> in the sink?
+        std::map<std::string, std::list<unsigned int> > ranksPerHostSink =
+            ranksPerHost(out);
+        for (auto &chunkGroup : chunkGroups)
+        {
+            std::string const &hostname = chunkGroup.first;
+            // find reading ranks on the sink host with same name
+            auto it = ranksPerHostSink.find(hostname);
+            if (it == ranksPerHostSink.end() || it->second.empty())
+            {
+                /*
+                 * These are leftover, go back to the input.
+                 */
+                for (auto &chunk : chunkGroup.second)
+                {
+                    sourceChunks.push_back(std::move(chunk));
+                }
+            }
+            else
+            {
+                RankMeta ranksOnTargetNode;
+                for (unsigned int rank : it->second)
+                {
+                    ranksOnTargetNode[rank] = hostname;
+                }
+                Assignment swapped;
+                swapped.swap(sinkChunks);
+                sinkChunks = m_withinNode->assign(
+                    PartialAssignment(chunkGroup.second, std::move(swapped)),
+                    in,
+                    ranksOnTargetNode);
+            }
+        }
+        return res;
+    }
+
+    std::unique_ptr<PartialStrategy> ByHostname::clone() const
+    {
+        return std::unique_ptr<PartialStrategy>(
+            new ByHostname(m_withinNode->clone()));
+    }
+
+    ByCuboidSlice::ByCuboidSlice(
+        std::unique_ptr<BlockSlicer> blockSlicer_in,
+        Extent totalExtent_in,
+        unsigned int mpi_rank_in,
+        unsigned int mpi_size_in)
+        : blockSlicer(std::move(blockSlicer_in))
+        , totalExtent(std::move(totalExtent_in))
+        , mpi_rank(mpi_rank_in)
+        , mpi_size(mpi_size_in)
+    {}
+
+    namespace
+    {
+        /**
+         * @brief Compute the intersection of two chunks.
+         *
+         * @param offset Offset of chunk 1, result will be written in place.
+         * @param extent Extent of chunk 1, result will be written in place.
+         * @param withinOffset Offset of chunk 2.
+         * @param withinExtent Extent of chunk 2.
+         */
+        void restrictToSelection(
+            Offset &offset,
+            Extent &extent,
+            Offset const &withinOffset,
+            Extent const &withinExtent)
+        {
+            for (size_t i = 0; i < offset.size(); ++i)
+            {
+                if (offset[i] < withinOffset[i])
+                {
+                    auto delta = withinOffset[i] - offset[i];
+                    offset[i] = withinOffset[i];
+                    if (delta > extent[i])
+                    {
+                        extent[i] = 0;
+                    }
+                    else
+                    {
+                        extent[i] -= delta;
+                    }
+                }
+                auto totalExtent = extent[i] + offset[i];
+                auto totalWithinExtent = withinExtent[i] + withinOffset[i];
+                if (totalExtent > totalWithinExtent)
+                {
+                    auto delta = totalExtent - totalWithinExtent;
+                    if (delta > extent[i])
+                    {
+                        extent[i] = 0;
+                    }
+                    else
+                    {
+                        extent[i] -= delta;
+                    }
+                }
+            }
+        }
+
+        struct SizedChunk
+        {
+            WrittenChunkInfo chunk;
+            size_t dataSize;
+
+            SizedChunk(WrittenChunkInfo chunk_in, size_t dataSize_in)
+                : chunk(std::move(chunk_in)), dataSize(dataSize_in)
+            {}
+        };
+
+        /**
+         * @brief Slice chunks to a maximum size and sort those by size.
+         *
+         * Chunks are sliced into hyperslabs along a specified dimension.
+         * Returned chunks may be larger than the specified maximum size
+         * if hyperslabs of thickness 1 are larger than that size.
+         *
+         * @param table Chunks of arbitrary sizes.
+         * @param maxSize The maximum size that returned chunks should have.
+         * @param dimension The dimension along which to create hyperslabs.
+         */
+        std::vector<SizedChunk> splitToSizeSorted(
+            ChunkTable const &table, size_t maxSize, size_t const dimension = 0)
+        {
+            std::vector<SizedChunk> res;
+            for (auto const &chunk : table)
+            {
+                auto const &extent = chunk.extent;
+                size_t sliceSize = 1;
+                for (size_t i = 0; i < extent.size(); ++i)
+                {
+                    if (i == dimension)
+                    {
+                        continue;
+                    }
+                    sliceSize *= extent[i];
+                }
+                if (sliceSize == 0)
+                {
+                    std::cerr << "Chunktable::splitToSizeSorted: encountered "
+                                 "zero-sized chunk"
+                              << std::endl;
+                    continue;
+                }
+
+                // this many slices go in one packet before it exceeds the max
+                // size
+                size_t streakLength = maxSize / sliceSize;
+                if (streakLength == 0)
+                {
+                    // otherwise we get caught in an endless loop
+                    ++streakLength;
+                }
+                size_t const slicedDimensionExtent = extent[dimension];
+
+                for (size_t currentPosition = 0;;
+                     currentPosition += streakLength)
+                {
+                    WrittenChunkInfo newChunk = chunk;
+                    newChunk.offset[dimension] += currentPosition;
+                    if (currentPosition + streakLength >= slicedDimensionExtent)
+                    {
+                        newChunk.extent[dimension] =
+                            slicedDimensionExtent - currentPosition;
+                        size_t chunkSize =
+                            newChunk.extent[dimension] * sliceSize;
+                        res.emplace_back(std::move(newChunk), chunkSize);
+                        break;
+                    }
+                    else
+                    {
+                        newChunk.extent[dimension] = streakLength;
+                        res.emplace_back(
+                            std::move(newChunk), streakLength * sliceSize);
+                    }
+                }
+            }
+            std::sort(
+                res.begin(),
+                res.end(),
+                [](SizedChunk const &left, SizedChunk const &right) {
+                    return right.dataSize < left.dataSize; // decreasing order
+                });
+            return res;
+        }
+    } // namespace
+
+    Assignment ByCuboidSlice::assign(
+        PartialAssignment res, RankMeta const &, RankMeta const &)
+    {
+        ChunkTable &sourceSide = res.notAssigned;
+        Assignment &sinkSide = res.assigned;
+        Offset myOffset;
+        Extent myExtent;
+        std::tie(myOffset, myExtent) =
+            blockSlicer->sliceBlock(totalExtent, mpi_size, mpi_rank);
+
+        for (auto &chunk : sourceSide)
+        {
+            restrictToSelection(chunk.offset, chunk.extent, myOffset, myExtent);
+            for (auto ext : chunk.extent)
+            {
+                if (ext == 0)
+                {
+                    goto outer_loop;
+                }
+            }
+            sinkSide[mpi_rank].push_back(std::move(chunk));
+        outer_loop:;
+        }
+
+        return res.assigned;
+    }
+
+    std::unique_ptr<Strategy> ByCuboidSlice::clone() const
+    {
+        return std::unique_ptr<Strategy>(new ByCuboidSlice(
+            blockSlicer->clone(), totalExtent, mpi_rank, mpi_size));
+    }
+
+    BinPacking::BinPacking(size_t splitAlongDimension_in)
+        : splitAlongDimension(splitAlongDimension_in)
+    {}
+
+    Assignment BinPacking::assign(
+        PartialAssignment res, RankMeta const &, RankMeta const &sinkRanks)
+    {
+        ChunkTable &sourceChunks = res.notAssigned;
+        Assignment &sinkChunks = res.assigned;
+        size_t totalExtent = 0;
+        for (auto const &chunk : sourceChunks)
+        {
+            size_t chunkExtent = 1;
+            for (auto ext : chunk.extent)
+            {
+                chunkExtent *= ext;
+            }
+            totalExtent += chunkExtent;
+        }
+        size_t const idealSize = totalExtent / sinkRanks.size();
+        /*
+         * Split chunks into subchunks of size at most idealSize.
+         * The resulting list of chunks is sorted by chunk size in decreasing
+         * order. This is important for the greedy Bin-Packing approximation
+         * algorithm.
+         * Under sub-ideal circumstances, chunks may not be splittable small
+         * enough. This algorithm will still produce results just fine in that
+         * case, but it will not keep the factor-2 approximation.
+         */
+        std::vector<SizedChunk> digestibleChunks =
+            splitToSizeSorted(sourceChunks, idealSize, splitAlongDimension);
+
+        /*
+         * Worker lambda: Iterate the reading processes once and greedily assign
+         * the largest chunks to them without exceeding idealSize amount of
+         * data per process.
+         */
+        auto worker =
+            [&sinkRanks, &digestibleChunks, &sinkChunks, idealSize]() {
+                for (auto const &destRank : sinkRanks)
+                {
+                    /*
+                     * Within the second call of the worker lambda, this will
+                     * not be true any longer, strictly speaking. The trick of
+                     * this algorithm is to pretend that it is.
+                     */
+                    size_t leftoverSize = idealSize;
+                    {
+                        auto it = digestibleChunks.begin();
+                        while (it != digestibleChunks.end())
+                        {
+                            if (it->dataSize >= idealSize)
+                            {
+                                /*
+                                 * This branch is only taken if it was not
+                                 * possible to slice chunks small enough -- or
+                                 * exactly the right size. In any case, the
+                                 * chunk will be the only one assigned to the
+                                 * process within this call of the worker
+                                 * lambda, so the loop can be broken out of.
+                                 */
+                                sinkChunks[destRank.first].push_back(
+                                    std::move(it->chunk));
+                                digestibleChunks.erase(it);
+                                break;
+                            }
+                            else if (it->dataSize <= leftoverSize)
+                            {
+                                // assign smaller chunks as long as they fit
+                                sinkChunks[destRank.first].push_back(
+                                    std::move(it->chunk));
+                                leftoverSize -= it->dataSize;
+                                it = digestibleChunks.erase(it);
+                            }
+                            else
+                            {
+                                // look for smaller chunks
+                                ++it;
+                            }
+                        }
+                    }
+                }
+            };
+        // sic!
+        // run the worker twice to implement a factor-two approximation
+        // of the bin packing problem
+        worker();
+        worker();
+        /*
+         * By the nature of the greedy approach, each iteration of the outer
+         * for loop in the worker assigns chunks to the current rank that sum
+         * up to at least more than half of the allowed idealSize. (Until it
+         * runs out of chunks).
+         * This means that calling the worker twice guarantees a full
+         * distribution.
+         */
+
+        return sinkChunks;
+    }
+
+    std::unique_ptr<Strategy> BinPacking::clone() const
+    {
+        return std::unique_ptr<Strategy>(new BinPacking(splitAlongDimension));
+    }
+
+    FailingStrategy::FailingStrategy() = default;
+
+    Assignment FailingStrategy::assign(
+        PartialAssignment assignment, RankMeta const &, RankMeta const &)
+    {
+        if (assignment.notAssigned.empty())
+        {
+            return assignment.assigned;
+        }
+        else
+        {
+            throw std::runtime_error(
+                "[FailingStrategy] There are unassigned chunks!");
+        }
+    }
+
+    std::unique_ptr<Strategy> FailingStrategy::clone() const
+    {
+        return std::make_unique<FailingStrategy>();
+    }
+} // namespace chunk_assignment
+
 namespace host_info
 {
     constexpr size_t MAX_HOSTNAME_LENGTH = 256;
diff --git a/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp b/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp
index e494b175de..7fbb734faa 100644
--- a/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp
+++ b/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp
@@ -72,4 +72,9 @@ OneDimensionalBlockSlicer::sliceBlock(Extent &totalExtent, int size, int rank)
     }
     return std::make_pair(std::move(offs), std::move(localExtent));
 }
+
+std::unique_ptr<BlockSlicer> OneDimensionalBlockSlicer::clone() const
+{
+    return std::unique_ptr<BlockSlicer>(new OneDimensionalBlockSlicer(m_dim));
+}
 } // namespace openPMD

From e2acd494b7830686fd2220c07fb3f3435144b09f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Thu, 2 Mar 2023 13:46:10 +0100
Subject: [PATCH 02/27] Python bindings: Chunk distribution algorithms

---
 include/openPMD/binding/python/Common.hpp | 10 +++
 src/binding/python/ChunkInfo.cpp          | 89 +++++++++++++++++++++++
 2 files changed, 99 insertions(+)

diff --git a/include/openPMD/binding/python/Common.hpp b/include/openPMD/binding/python/Common.hpp
index c72d72ce83..b21d490070 100644
--- a/include/openPMD/binding/python/Common.hpp
+++ b/include/openPMD/binding/python/Common.hpp
@@ -8,6 +8,7 @@
  */
 #pragma once
 
+#include "openPMD/ChunkInfo.hpp"
 #include "openPMD/Iteration.hpp"
 #include "openPMD/Mesh.hpp"
 #include "openPMD/ParticlePatches.hpp"
@@ -33,6 +34,15 @@
 // not yet used:
 //   pybind11/functional.h  // for std::function
 
+using PyVecChunkInfo = std::vector<openPMD::ChunkInfo>;
+
+PYBIND11_MAKE_OPAQUE(openPMD::ChunkInfo)
+PYBIND11_MAKE_OPAQUE(PyVecChunkInfo)
+PYBIND11_MAKE_OPAQUE(openPMD::WrittenChunkInfo)
+PYBIND11_MAKE_OPAQUE(openPMD::ChunkTable)
+PYBIND11_MAKE_OPAQUE(openPMD::chunk_assignment::Assignment)
+PYBIND11_MAKE_OPAQUE(openPMD::chunk_assignment::PartialAssignment)
+
 // used exclusively in all our Python .cpp files
 namespace py = pybind11;
 using namespace openPMD;
diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp
index 3d0837d504..76deca3a10 100644
--- a/src/binding/python/ChunkInfo.cpp
+++ b/src/binding/python/ChunkInfo.cpp
@@ -19,12 +19,14 @@
  * If not, see <http://www.gnu.org/licenses/>.
  */
 #include "openPMD/ChunkInfo.hpp"
+#include "openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp"
 #include "openPMD/binding/python/Mpi.hpp"
 
 #include "openPMD/binding/python/Common.hpp"
 
 #include <exception>
 #include <string>
+#include <utility> // std::move
 
 void init_Chunk(py::module &m)
 {
@@ -102,4 +104,91 @@ void init_Chunk(py::module &m)
                 return host_info::byMethod(self);
             })
         .def("available", &host_info::methodAvailable);
+
+
+    using namespace chunk_assignment;
+
+    (void)py::class_<PartialStrategy>(m, "PartialStrategy");
+
+    py::class_<PartialStrategy>(m, "PartialStrategy")
+        .def(
+            "assign",
+            py::overload_cast<ChunkTable, RankMeta const &, RankMeta const &>(
+                &PartialStrategy::assign),
+            py::arg("chunk_table"),
+            py::arg("rank_meta_in") = RankMeta(),
+            py::arg("rank_meta_out") = RankMeta())
+        .def(
+            "assign",
+            py::overload_cast<
+                PartialAssignment,
+                RankMeta const &,
+                RankMeta const &>(&PartialStrategy::assign),
+            py::arg("partial_assignment"),
+            py::arg("rank_meta_in") = RankMeta(),
+            py::arg("rank_meta_out") = RankMeta());
+
+    py::class_<Strategy>(m, "Strategy")
+        .def(
+            "assign",
+            py::overload_cast<ChunkTable, RankMeta const &, RankMeta const &>(
+                &Strategy::assign),
+            py::arg("chunk_table"),
+            py::arg("rank_meta_in") = RankMeta(),
+            py::arg("rank_meta_out") = RankMeta())
+        .def(
+            "assign",
+            py::overload_cast<
+                PartialAssignment,
+                RankMeta const &,
+                RankMeta const &>(&Strategy::assign),
+            py::arg("partial_assignment"),
+            py::arg("rank_meta_in") = RankMeta(),
+            py::arg("rank_meta_out") = RankMeta());
+
+    py::class_<FromPartialStrategy, Strategy>(m, "FromPartialStrategy")
+        .def(py::init([](PartialStrategy const &firstPass,
+                         Strategy const &secondPass) {
+            return FromPartialStrategy(firstPass.clone(), secondPass.clone());
+        }));
+
+    py::class_<RoundRobin, Strategy>(m, "RoundRobin").def(py::init<>());
+
+    py::class_<ByHostname, PartialStrategy>(m, "ByHostname")
+        .def(
+            py::init([](Strategy const &withinNode) {
+                return ByHostname(withinNode.clone());
+            }),
+            py::arg("strategy_within_node"));
+
+    (void)py::class_<BlockSlicer>(m, "BlockSlicer");
+
+    py::class_<OneDimensionalBlockSlicer, BlockSlicer>(
+        m, "OneDimensionalBlockSlicer")
+        .def(py::init<>())
+        .def(py::init<Extent::value_type>(), py::arg("dim"));
+
+    py::class_<ByCuboidSlice, Strategy>(m, "ByCuboidSlice")
+        .def(
+            py::init([](BlockSlicer const &blockSlicer,
+                        Extent totalExtent,
+                        unsigned int mpi_rank,
+                        unsigned int mpi_size) {
+                return ByCuboidSlice(
+                    blockSlicer.clone(),
+                    std::move(totalExtent),
+                    mpi_rank,
+                    mpi_size);
+            }),
+            py::arg("block_slicer"),
+            py::arg("total_extent"),
+            py::arg("mpi_rank"),
+            py::arg("mpi_size"));
+
+    py::class_<BinPacking, Strategy>(m, "BinPacking")
+        .def(py::init<>())
+        .def(py::init<size_t>(), py::arg("split_along_dimension"));
+
+    py::class_<FailingStrategy, Strategy>(m, "FailingStrategy")
+        .def(py::init<>());
 }

From 72da3986b3277a012fa925c4ddec80e77462f17d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Thu, 2 Mar 2023 13:47:06 +0100
Subject: [PATCH 03/27] Use chunk distribtion algorithms in openpmd-pipe

---
 .../python/openpmd_api/pipe/__main__.py       | 145 +++++++++---------
 1 file changed, 70 insertions(+), 75 deletions(-)

diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py
index 7bd5305c56..96802530b0 100644
--- a/src/binding/python/openpmd_api/pipe/__main__.py
+++ b/src/binding/python/openpmd_api/pipe/__main__.py
@@ -10,6 +10,7 @@
 """
 import argparse
 import os  # os.path.basename
+import re
 import sys  # sys.stderr.write
 
 from .. import openpmd_api_cxx as io
@@ -39,8 +40,14 @@ def parse_args(program_name):
    By default, the openPMD-api will be initialized without an MPI communicator
    if the MPI size is 1. This is to simplify the use of the JSON backend
    which is only available in serial openPMD.
-With parallelization enabled, each dataset will be equally sliced along
-the dimension with the largest extent.
+With parallelization enabled, each dataset will be equally sliced according to
+a chunk distribution strategy which may be selected via the environment
+variable OPENPMD_CHUNK_DISTRIBUTION. Options include "roundrobin",
+"binpacking", "slicedataset" and "hostname_<1>_<2>", where <1> should be
+replaced with a strategy to be applied within a compute node and <2> with a
+secondary strategy in case the hostname strategy does not distribute
+all chunks.
+The default is `hostname_binpacking_slicedataset`.
 
 Examples:
     {0} --infile simData.h5 --outfile simData_%T.bp
@@ -99,65 +106,6 @@ def __init__(self):
         self.rank = 0
 
 
-class Chunk:
-    """
-    A Chunk is an n-dimensional hypercube, defined by an offset and an extent.
-    Offset and extent must be of the same dimensionality (Chunk.__len__).
-    """
-    def __init__(self, offset, extent):
-        assert (len(offset) == len(extent))
-        self.offset = offset
-        self.extent = extent
-
-    def __len__(self):
-        return len(self.offset)
-
-    def slice1D(self, mpi_rank, mpi_size, dimension=None):
-        """
-        Slice this chunk into mpi_size hypercubes along one of its
-        n dimensions. The dimension is given through the 'dimension'
-        parameter. If None, the dimension with the largest extent on
-        this hypercube is automatically picked.
-        Returns the mpi_rank'th of the sliced chunks.
-        """
-        if dimension is None:
-            # pick that dimension which has the highest count of items
-            dimension = 0
-            maximum = self.extent[0]
-            for k, v in enumerate(self.extent):
-                if v > maximum:
-                    dimension = k
-        assert (dimension < len(self))
-        # no offset
-        assert (self.offset == [0 for _ in range(len(self))])
-        offset = [0 for _ in range(len(self))]
-        stride = self.extent[dimension] // mpi_size
-        rest = self.extent[dimension] % mpi_size
-
-        # local function f computes the offset of a rank
-        # for more equal balancing, we want the start index
-        # at the upper gaussian bracket of (N/n*rank)
-        # where N the size of the dataset in dimension dim
-        # and n the MPI size
-        # for avoiding integer overflow, this is the same as:
-        # (N div n)*rank + round((N%n)/n*rank)
-        def f(rank):
-            res = stride * rank
-            padDivident = rest * rank
-            pad = padDivident // mpi_size
-            if pad * mpi_size < padDivident:
-                pad += 1
-            return res + pad
-
-        offset[dimension] = f(mpi_rank)
-        extent = self.extent.copy()
-        if mpi_rank >= mpi_size - 1:
-            extent[dimension] -= offset[dimension]
-        else:
-            extent[dimension] = f(mpi_rank + 1) - offset[dimension]
-        return Chunk(offset, extent)
-
-
 class deferred_load:
     def __init__(self, source, dynamicView, offset, extent):
         self.source = source
@@ -166,6 +114,42 @@ def __init__(self, source, dynamicView, offset, extent):
         self.extent = extent
 
 
+def distribution_strategy(dataset_extent,
+                          mpi_rank,
+                          mpi_size,
+                          strategy_identifier=None):
+    if strategy_identifier is None or not strategy_identifier:
+        if 'OPENPMD_CHUNK_DISTRIBUTION' in os.environ:
+            strategy_identifier = os.environ[
+                'OPENPMD_CHUNK_DISTRIBUTION'].lower()
+        else:
+            strategy_identifier = 'hostname_binpacking_slicedataset'  # default
+    match = re.search('hostname_(.*)_(.*)', strategy_identifier)
+    if match is not None:
+        inside_node = distribution_strategy(dataset_extent,
+                                            mpi_rank,
+                                            mpi_size,
+                                            strategy_identifier=match.group(1))
+        second_phase = distribution_strategy(
+            dataset_extent,
+            mpi_rank,
+            mpi_size,
+            strategy_identifier=match.group(2))
+        return io.FromPartialStrategy(io.ByHostname(inside_node), second_phase)
+    elif strategy_identifier == 'roundrobin':
+        return io.RoundRobin()
+    elif strategy_identifier == 'binpacking':
+        return io.BinPacking()
+    elif strategy_identifier == 'slicedataset':
+        return io.ByCuboidSlice(io.OneDimensionalBlockSlicer(), dataset_extent,
+                                mpi_rank, mpi_size)
+    elif strategy_identifier == 'fail':
+        return io.FailingStrategy()
+    else:
+        raise RuntimeError("Unknown distribution strategy: " +
+                           strategy_identifier)
+
+
 class pipe:
     """
     Represents the configuration of one "pipe" pass.
@@ -177,6 +161,11 @@ def __init__(self, infile, outfile, inconfig, outconfig, comm):
         self.outconfig = outconfig
         self.loads = []
         self.comm = comm
+        if HAVE_MPI:
+            hostinfo = io.HostInfo.MPI_PROCESSOR_NAME
+            self.outranks = hostinfo.get_collective(self.comm)
+        else:
+            self.outranks = {i: str(i) for i in range(self.comm.size)}
 
     def run(self):
         if not HAVE_MPI or (args.mpi is None and self.comm.size == 1):
@@ -268,6 +257,9 @@ def __copy(self, src, dest, current_path="/data/"):
                         print("With records:")
                         for r in in_iteration.particles[ps]:
                             print("\t {0}".format(r))
+                # With linear read mode, we can only load the source rank table
+                # inside `read_iterations()` since it's a dataset.
+                self.inranks = src.get_rank_table(collective=True)
                 out_iteration = write_iterations[in_iteration.iteration_index]
                 sys.stdout.flush()
                 self.__copy(
@@ -284,7 +276,6 @@ def __copy(self, src, dest, current_path="/data/"):
         elif isinstance(src, io.Record_Component) and (not is_container
                                                        or src.scalar):
             shape = src.shape
-            offset = [0 for _ in shape]
             dtype = src.dtype
             dest.reset_dataset(io.Dataset(dtype, shape))
             if src.empty:
@@ -294,19 +285,23 @@ def __copy(self, src, dest, current_path="/data/"):
             elif src.constant:
                 dest.make_constant(src.get_attribute("value"))
             else:
-                chunk = Chunk(offset, shape)
-                local_chunk = chunk.slice1D(self.comm.rank, self.comm.size)
-                if debug:
-                    end = local_chunk.offset.copy()
-                    for i in range(len(end)):
-                        end[i] += local_chunk.extent[i]
-                    print("{}\t{}/{}:\t{} -- {}".format(
-                        current_path, self.comm.rank, self.comm.size,
-                        local_chunk.offset, end))
-                span = dest.store_chunk(local_chunk.offset, local_chunk.extent)
-                self.loads.append(
-                    deferred_load(src, span, local_chunk.offset,
-                                  local_chunk.extent))
+                chunk_table = src.available_chunks()
+                strategy = distribution_strategy(shape, self.comm.rank,
+                                                 self.comm.size)
+                my_chunks = strategy.assign(chunk_table, self.inranks,
+                                            self.outranks)
+                for chunk in my_chunks[
+                        self.comm.rank] if self.comm.rank in my_chunks else []:
+                    if debug:
+                        end = chunk.offset.copy()
+                        for i in range(len(end)):
+                            end[i] += chunk.extent[i]
+                        print("{}\t{}/{}:\t{} -- {}".format(
+                            current_path, self.comm.rank, self.comm.size,
+                            chunk.offset, end))
+                    span = dest.store_chunk(chunk.offset, chunk.extent)
+                    self.loads.append(
+                        deferred_load(src, span, chunk.offset, chunk.extent))
         elif isinstance(src, io.Iteration):
             self.__copy(src.meshes, dest.meshes, current_path + "meshes/")
             self.__copy(src.particles, dest.particles,

From 539f61f08a200dbdc259a273d32929b7ab4b78a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Thu, 2 Mar 2023 13:49:13 +0100
Subject: [PATCH 04/27] Testing

---
 CMakeLists.txt          |   4 +-
 test/CoreTest.cpp       |  88 +++++++++++
 test/ParallelIOTest.cpp | 331 ++++++++++++++++++++++++++++++++++------
 3 files changed, 376 insertions(+), 47 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d6153e8e24..b6f01f3d2e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1346,7 +1346,7 @@ if(openPMD_BUILD_TESTING)
                 )
                 add_test(NAME CLI.pipe.py
                     COMMAND sh -c
-                        "${MPI_TEST_EXE} ${Python_EXECUTABLE}                      \
+                        "${MPI_TEST_EXE} -n 2 ${Python_EXECUTABLE}                 \
                             ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe       \
                             --infile ../samples/git-sample/data%T.h5               \
                             --outfile ../samples/git-sample/data%T.bp &&           \
@@ -1357,7 +1357,7 @@ if(openPMD_BUILD_TESTING)
                             --outfile                                              \
                                 ../samples/git-sample/single_iteration_%T.bp &&    \
                                                                                    \
-                        ${MPI_TEST_EXE} ${Python_EXECUTABLE}                       \
+                        ${MPI_TEST_EXE} -n 2 ${Python_EXECUTABLE}                  \
                             ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe       \
                             --infile ../samples/git-sample/thetaMode/data%T.h5     \
                             --outfile                                              \
diff --git a/test/CoreTest.cpp b/test/CoreTest.cpp
index 6bdefff1fb..b9820d8222 100644
--- a/test/CoreTest.cpp
+++ b/test/CoreTest.cpp
@@ -3,6 +3,8 @@
 #define OPENPMD_private public:
 #define OPENPMD_protected public:
 #endif
+
+#include "openPMD/ChunkInfo.hpp"
 #include "openPMD/openPMD.hpp"
 
 #include "Files_Core/CoreTests.hpp"
@@ -45,6 +47,92 @@ using namespace openPMD;
 
 Dataset globalDataset(Datatype::CHAR, {1});
 
+namespace test_chunk_assignment
+{
+using namespace openPMD::chunk_assignment;
+struct Params
+{
+    ChunkTable table;
+    RankMeta metaSource;
+    RankMeta metaSink;
+
+    void init(
+        size_t sourceRanks,
+        size_t sinkRanks,
+        size_t in_per_host,
+        size_t out_per_host)
+    {
+        for (size_t rank = 0; rank < sourceRanks; ++rank)
+        {
+            table.emplace_back(Offset{rank, rank}, Extent{rank, rank}, rank);
+            table.emplace_back(
+                Offset{rank, 100 * rank}, Extent{rank, 100 * rank}, rank);
+            metaSource.emplace(rank, std::to_string(rank / in_per_host));
+        }
+        for (size_t rank = 0; rank < sinkRanks; ++rank)
+        {
+            metaSink.emplace(rank, std::to_string(rank / out_per_host));
+        }
+    }
+};
+void print(RankMeta const &meta, ChunkTable const &table)
+{
+    for (auto const &chunk : table)
+    {
+        std::cout << "[HOST: " << meta.at(chunk.sourceID)
+                  << ",\tRank: " << chunk.sourceID << ",\tOffset: ";
+        for (auto offset : chunk.offset)
+        {
+            std::cout << offset << ", ";
+        }
+        std::cout << "\tExtent: ";
+        for (auto extent : chunk.extent)
+        {
+            std::cout << extent << ", ";
+        }
+        std::cout << "]" << std::endl;
+    }
+}
+void print(RankMeta const &meta, Assignment const &table)
+{
+    for (auto &[rank, chunkList] : table)
+    {
+        std::cout << "[HOST: " << meta.at(rank) << ",\tRank: " << rank << "]"
+                  << std::endl;
+        for (auto const &chunk : chunkList)
+        {
+            std::cout << "\t[Offset: ";
+            for (auto offset : chunk.offset)
+            {
+                std::cout << offset << ", ";
+            }
+            std::cout << "\tExtent: ";
+            for (auto extent : chunk.extent)
+            {
+                std::cout << extent << ", ";
+            }
+            std::cout << "]" << std::endl;
+        }
+    }
+}
+} // namespace test_chunk_assignment
+
+TEST_CASE("chunk_assignment", "[core]")
+{
+    using namespace chunk_assignment;
+    test_chunk_assignment::Params params;
+    params.init(6, 2, 2, 1);
+    test_chunk_assignment::print(params.metaSource, params.table);
+    ByHostname byHostname(std::make_unique<RoundRobin>());
+    FromPartialStrategy fullStrategy(
+        std::make_unique<ByHostname>(std::move(byHostname)),
+        std::make_unique<BinPacking>());
+    Assignment res =
+        fullStrategy.assign(params.table, params.metaSource, params.metaSink);
+    std::cout << "\nRESULTS:" << std::endl;
+    test_chunk_assignment::print(params.metaSink, res);
+}
+
 TEST_CASE("versions_test", "[core]")
 {
     auto const apiVersion = getVersion();
diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp
index 5f38973bed..cb3d2eea92 100644
--- a/test/ParallelIOTest.cpp
+++ b/test/ParallelIOTest.cpp
@@ -8,6 +8,8 @@
 #include "openPMD/auxiliary/Environment.hpp"
 #include "openPMD/auxiliary/Filesystem.hpp"
 #include "openPMD/openPMD.hpp"
+// @todo change includes
+#include "openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp"
 #include <catch2/catch.hpp>
 
 #if !openPMD_HAVE_MPI
@@ -1183,6 +1185,53 @@ TEST_CASE("independent_write_with_collective_flush", "[parallel]")
 }
 #endif
 
+#if openPMD_HAVE_MPI
+TEST_CASE("unavailable_backend", "[core][parallel]")
+{
+#if !openPMD_HAVE_ADIOS2
+    {
+        auto fail = []() {
+            Series(
+                "unavailable.bp",
+                Access::CREATE,
+                MPI_COMM_WORLD,
+                R"({"backend": "ADIOS2"})");
+        };
+        REQUIRE_THROWS_WITH(
+            fail(),
+            "Wrong API usage: openPMD-api built without support for backend "
+            "'ADIOS2'.");
+    }
+#endif
+#if !openPMD_HAVE_ADIOS2
+    {
+        auto fail = []() {
+            Series("unavailable.bp", Access::CREATE, MPI_COMM_WORLD);
+        };
+        REQUIRE_THROWS_WITH(
+            fail(),
+            "Wrong API usage: openPMD-api built without support for backend "
+            "'ADIOS2'.");
+    }
+#endif
+#if !openPMD_HAVE_HDF5
+    {
+        auto fail = []() {
+            Series(
+                "unavailable.h5",
+                Access::CREATE,
+                MPI_COMM_WORLD,
+                R"({"backend": "HDF5"})");
+        };
+        REQUIRE_THROWS_WITH(
+            fail(),
+            "Wrong API usage: openPMD-api built without support for backend "
+            "'HDF5'.");
+    }
+#endif
+}
+#endif
+
 #if openPMD_HAVE_ADIOS2 && openPMD_HAVE_MPI
 
 void adios2_streaming(bool variableBasedLayout)
@@ -1879,51 +1928,6 @@ TEST_CASE("append_mode", "[serial]")
     }
 }
 
-TEST_CASE("unavailable_backend", "[core][parallel]")
-{
-#if !openPMD_HAVE_ADIOS2
-    {
-        auto fail = []() {
-            Series(
-                "unavailable.bp",
-                Access::CREATE,
-                MPI_COMM_WORLD,
-                R"({"backend": "ADIOS2"})");
-        };
-        REQUIRE_THROWS_WITH(
-            fail(),
-            "Wrong API usage: openPMD-api built without support for backend "
-            "'ADIOS2'.");
-    }
-#endif
-#if !openPMD_HAVE_ADIOS2
-    {
-        auto fail = []() {
-            Series("unavailable.bp", Access::CREATE, MPI_COMM_WORLD);
-        };
-        REQUIRE_THROWS_WITH(
-            fail(),
-            "Wrong API usage: openPMD-api built without support for backend "
-            "'ADIOS2'.");
-    }
-#endif
-#if !openPMD_HAVE_HDF5
-    {
-        auto fail = []() {
-            Series(
-                "unavailable.h5",
-                Access::CREATE,
-                MPI_COMM_WORLD,
-                R"({"backend": "HDF5"})");
-        };
-        REQUIRE_THROWS_WITH(
-            fail(),
-            "Wrong API usage: openPMD-api built without support for backend "
-            "'HDF5'.");
-    }
-#endif
-}
-
 void joined_dim(std::string const &ext)
 {
     using type = float;
@@ -2220,6 +2224,243 @@ TEST_CASE("iterate_nonstreaming_series", "[serial][adios2]")
     iterate_nonstreaming_series::iterate_nonstreaming_series();
 }
 
+void adios2_chunk_distribution()
+{
+    /*
+     * This test simulates a multi-node streaming setup in order to test some
+     * of our chunk distribution strategies.
+     * We don't actually stream (but write a .bp file instead) and also we don't
+     * actually run anything on multiple nodes, but we can use this for testing
+     * the distribution strategies anyway.
+     */
+    int mpi_size{-1};
+    int mpi_rank{-1};
+    MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
+    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
+
+    /*
+     * Mappings: MPI rank -> hostname where the rank is executed.
+     * For the writing application as well as for the reading one.
+     */
+    chunk_assignment::RankMeta writingRanksHostnames, readingRanksHostnames;
+    for (int i = 0; i < mpi_size; ++i)
+    {
+        /*
+         * The mapping is intentionally weird. Nodes "node1", "node3", ...
+         * do not have instances of the reading application running on them.
+         * Our distribution strategies will need to deal with that situation.
+         */
+        // 0, 0, 1, 1, 2, 2, 3, 3 ...
+        writingRanksHostnames[i] = "node" + std::to_string(i / 2);
+        // 0, 0, 0, 0, 2, 2, 2, 2 ...
+        readingRanksHostnames[i] = "node" + std::to_string(i / 4 * 2);
+    }
+
+    std::string filename = "../samples/adios2_chunk_distribution.bp";
+    // Simulate a stream: BP4 assigns chunk IDs by subfile (i.e. aggregator).
+    std::stringstream parameters;
+    parameters << R"END(
+{
+    "adios2":
+    {
+        "engine":
+        {
+            "type": "bp4",
+            "parameters":
+            {
+                "NumAggregators":)END"
+               << "\"" << std::to_string(mpi_size) << "\""
+               << R"END(
+            }
+        }
+    }
+}
+)END";
+
+    auto printChunktable = [mpi_rank](
+                               std::string const &strategyName,
+                               ChunkTable const &table,
+                               chunk_assignment::RankMeta const &meta) {
+        if (mpi_rank != 0)
+        {
+            return;
+        }
+        std::cout << "WITH STRATEGY '" << strategyName << "':\n";
+        for (auto const &chunk : table)
+        {
+            std::cout << "[HOST: " << meta.at(chunk.sourceID)
+                      << ",\tRank: " << chunk.sourceID << ",\tOffset: ";
+            for (auto offset : chunk.offset)
+            {
+                std::cout << offset << ", ";
+            }
+            std::cout << "\tExtent: ";
+            for (auto extent : chunk.extent)
+            {
+                std::cout << extent << ", ";
+            }
+            std::cout << "]" << std::endl;
+        }
+    };
+
+    auto printAssignment = [mpi_rank](
+                               std::string const &strategyName,
+                               chunk_assignment::Assignment const &table,
+                               chunk_assignment::RankMeta const &meta) {
+        if (mpi_rank != 0)
+        {
+            return;
+        }
+        std::cout << "WITH STRATEGY '" << strategyName << "':\n";
+        for (auto &[rank, chunkList] : table)
+        {
+            std::cout << "[HOST: " << meta.at(rank) << ",\tRank: " << rank
+                      << "]" << std::endl;
+            for (auto const &chunk : chunkList)
+            {
+                std::cout << "\t[Source rank: " << chunk.sourceID
+                          << "\tOffset: ";
+                for (auto offset : chunk.offset)
+                {
+                    std::cout << offset << ", ";
+                }
+                std::cout << "\tExtent: ";
+                for (auto extent : chunk.extent)
+                {
+                    std::cout << extent << ", ";
+                }
+                std::cout << "]" << std::endl;
+            }
+        }
+    };
+
+    // Create a dataset.
+    {
+        Series series(
+            filename,
+            openPMD::Access::CREATE,
+            MPI_COMM_WORLD,
+            parameters.str());
+        /*
+         * The writing application sets an attribute that tells the reading
+         * application about the "MPI rank -> hostname" mapping.
+         * Each rank only needs to set its own value.
+         * (Some other options like setting all at once or reading from a file
+         * exist as well.)
+         */
+        series.setRankTable(writingRanksHostnames.at(mpi_rank));
+
+        auto E_x = series.iterations[0].meshes["E"]["x"];
+        openPMD::Dataset ds(openPMD::Datatype::INT, {unsigned(mpi_size), 10});
+        E_x.resetDataset(ds);
+        std::vector<int> data(10, 0);
+        std::iota(data.begin(), data.end(), 0);
+        E_x.storeChunk(data, {unsigned(mpi_rank), 0}, {1, 10});
+        series.flush();
+    }
+
+    {
+        Series series(filename, openPMD::Access::READ_ONLY, MPI_COMM_WORLD);
+        /*
+         * Inquire the writing application's "MPI rank -> hostname" mapping.
+         * The reading application needs to know about its own mapping.
+         * Having both of these mappings is the basis for an efficient chunk
+         * distribution since we can use it to figure out which instances
+         * are running on the same nodes.
+         */
+        auto rankMetaIn = series.rankTable(/* collective = */ true);
+        REQUIRE(rankMetaIn == writingRanksHostnames);
+
+        auto E_x = series.iterations[0].meshes["E"]["x"];
+        /*
+         * Ask the backend which chunks are available.
+         */
+        auto const chunkTable = E_x.availableChunks();
+
+        printChunktable("INPUT", chunkTable, rankMetaIn);
+
+        using namespace chunk_assignment;
+
+        /*
+         * Assign the chunks by distributing them one after the other to reading
+         * ranks. Easy, but not particularly efficient.
+         */
+        RoundRobin roundRobinStrategy;
+        auto roundRobinAssignment = roundRobinStrategy.assign(
+            chunkTable, rankMetaIn, readingRanksHostnames);
+        printAssignment(
+            "ROUND ROBIN", roundRobinAssignment, readingRanksHostnames);
+
+        /*
+         * Assign chunks by hostname.
+         * Two difficulties:
+         * * A distribution strategy within one node needs to be picked.
+         *   We pick the BinPacking strategy that tries to assign chunks in a
+         *   balanced manner. Since our chunks have a small extent along
+         *   dimension 0, use dimension 1 for slicing.
+         * * The assignment is partial since some nodes only have instances of
+         *   the writing application. Those chunks remain unassigned.
+         */
+        ByHostname byHostname(
+            std::make_unique<BinPacking>(/* splitAlongDimension = */ 1));
+        auto byHostnamePartialAssignment =
+            byHostname.assign(chunkTable, rankMetaIn, readingRanksHostnames);
+        printAssignment(
+            "HOSTNAME, ASSIGNED",
+            byHostnamePartialAssignment.assigned,
+            readingRanksHostnames);
+        printChunktable(
+            "HOSTNAME, LEFTOVER",
+            byHostnamePartialAssignment.notAssigned,
+            rankMetaIn);
+
+        /*
+         * Assign chunks by hostnames, once more.
+         * This time, apply a secondary distribution strategy to assign
+         * leftovers. We pick BinPacking, once more.
+         * Notice that the BinPacking strategy does not (yet) take into account
+         * chunks that have been assigned by the first round.
+         * Balancing is calculated solely based on the leftover chunks from the
+         * first round.
+         */
+        FromPartialStrategy fromPartialStrategy(
+            std::make_unique<ByHostname>(std::move(byHostname)),
+            std::make_unique<BinPacking>(/* splitAlongDimension = */ 1));
+        auto fromPartialAssignment = fromPartialStrategy.assign(
+            chunkTable, rankMetaIn, readingRanksHostnames);
+        printAssignment(
+            "HOSTNAME WITH SECOND PASS",
+            fromPartialAssignment,
+            readingRanksHostnames);
+
+        /*
+         * Assign chunks by slicing the n-dimensional physical domain and
+         * intersecting those slices with the available chunks from the backend.
+         * Notice that this strategy only returns the chunks that the currently
+         * running rank is supposed to load, whereas the other strategies return
+         * a chunk table containing all chunks that all ranks will load.
+         * In principle, a chunk_assignment::Strategy only needs to return the
+         * chunks that the current rank should load, but is free to emplace the
+         * other chunks for other reading ranks as well.
+         * (Reasoning: In some strategies, calculating everything is necessary,
+         * in others such as this one, it's an unneeded overhead.)
+         */
+        ByCuboidSlice cuboidSliceStrategy(
+            std::make_unique<OneDimensionalBlockSlicer>(1),
+            E_x.getExtent(),
+            mpi_rank,
+            mpi_size);
+        auto cuboidSliceAssignment = cuboidSliceStrategy.assign(
+            chunkTable, rankMetaIn, readingRanksHostnames);
+        printAssignment(
+            "CUBOID SLICE", cuboidSliceAssignment, readingRanksHostnames);
+    }
+}
+
+TEST_CASE("adios2_chunk_distribution", "[parallel][adios2]")
+{
+    adios2_chunk_distribution();
+}
 #endif // openPMD_HAVE_ADIOS2 && openPMD_HAVE_MPI
 
 #if openPMD_HAVE_MPI

From cda9e762f8c4e7c82e2a656835fb68fa0557fe8e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Fri, 3 Mar 2023 15:09:57 +0100
Subject: [PATCH 05/27] Add DiscardingStrategy

---
 include/openPMD/ChunkInfo.hpp    | 21 +++++++++++++++++++++
 src/ChunkInfo.cpp                | 13 +++++++++++++
 src/binding/python/ChunkInfo.cpp |  3 +++
 3 files changed, 37 insertions(+)

diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp
index b44379b2aa..54454b4f13 100644
--- a/include/openPMD/ChunkInfo.hpp
+++ b/include/openPMD/ChunkInfo.hpp
@@ -322,6 +322,27 @@ namespace chunk_assignment
 
         virtual std::unique_ptr<Strategy> clone() const override;
     };
+
+    /**
+     * @brief Strategy that purposefully discards leftover chunk from
+     *        the PartialAssignment.
+     *
+     * Useful as second phase in FromPartialStrategy when knowing that some
+     * chunks will go unassigned, but still wanting to communicate only within
+     * the same node.
+     *
+     */
+    struct DiscardingStrategy : Strategy
+    {
+        explicit DiscardingStrategy();
+
+        Assignment assign(
+            PartialAssignment,
+            RankMeta const &in,
+            RankMeta const &out) override;
+
+        virtual std::unique_ptr<Strategy> clone() const override;
+    };
 } // namespace chunk_assignment
 
 namespace host_info
diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp
index 7b6c1e32ca..190bc8012c 100644
--- a/src/ChunkInfo.cpp
+++ b/src/ChunkInfo.cpp
@@ -552,6 +552,19 @@ namespace chunk_assignment
     {
         return std::make_unique<FailingStrategy>();
     }
+
+    DiscardingStrategy::DiscardingStrategy() = default;
+
+    Assignment DiscardingStrategy::assign(
+        PartialAssignment assignment, RankMeta const &, RankMeta const &)
+    {
+        return assignment.assigned;
+    }
+
+    std::unique_ptr<Strategy> DiscardingStrategy::clone() const
+    {
+        return std::make_unique<DiscardingStrategy>();
+    }
 } // namespace chunk_assignment
 
 namespace host_info
diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp
index 76deca3a10..4c93233f22 100644
--- a/src/binding/python/ChunkInfo.cpp
+++ b/src/binding/python/ChunkInfo.cpp
@@ -191,4 +191,7 @@ void init_Chunk(py::module &m)
 
     py::class_<FailingStrategy, Strategy>(m, "FailingStrategy")
         .def(py::init<>());
+
+    py::class_<DiscardingStrategy, Strategy>(m, "DiscardingStrategy")
+        .def(py::init<>());
 }

From 8125286f8480eac35dd579cf1b764250aa838bb9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Wed, 15 Mar 2023 14:56:50 +0100
Subject: [PATCH 06/27] Make Strategy class extensible from Python

@todo Why do we need to increase the refcount twice??
---
 .../openPMD/backend/BaseRecordComponent.hpp   |   1 +
 src/binding/python/ChunkInfo.cpp              | 158 +++++++++++++++++-
 .../python/openpmd_api/pipe/__main__.py       |  33 ++++
 3 files changed, 189 insertions(+), 3 deletions(-)

diff --git a/include/openPMD/backend/BaseRecordComponent.hpp b/include/openPMD/backend/BaseRecordComponent.hpp
index fe4490830d..a871d67bcf 100644
--- a/include/openPMD/backend/BaseRecordComponent.hpp
+++ b/include/openPMD/backend/BaseRecordComponent.hpp
@@ -20,6 +20,7 @@
  */
 #pragma once
 
+#include "openPMD/ChunkInfo.hpp"
 #include "openPMD/Dataset.hpp"
 #include "openPMD/Error.hpp"
 #include "openPMD/backend/Attributable.hpp"
diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp
index 4c93233f22..9c94329d0e 100644
--- a/src/binding/python/ChunkInfo.cpp
+++ b/src/binding/python/ChunkInfo.cpp
@@ -28,8 +28,139 @@
 #include <string>
 #include <utility> // std::move
 
+/*
+ * PyStrategy and PyPartialStrategy are the C++ representations for objects
+ * created in Python.
+ * One challenge about these classes is that they cannot be easily copied or
+ * moved in memory, as the clone will lose the relation to the Python object.
+ * This class has a clone_impl() method that child classes can use for cloning
+ * the object and at the same time storing a reference to the original Python
+ * object.
+ * The template parameters ChildCpp and ChildPy implement a CRT-like pattern,
+ * split into a C++ class and a Python trampoline class as documented here:
+ * https://pybind11.readthedocs.io/en/stable/advanced/classes.html?highlight=trampoline#overriding-virtual-functions-in-python
+ *
+ * A typical child instantiation would look like:
+ * struct ChildPy : ChildCpp, ClonableTrampoline<ChildCpp, ChildPy>;
+ */
+template <typename ChildCpp, typename ChildPy>
+struct ClonableTrampoline
+{
+    struct OriginalInstance
+    {
+        py::handle pythonObject;
+
+        ~OriginalInstance()
+        {
+            pythonObject.dec_ref();
+        }
+    };
+    /*
+     * If the shared pointer is empty, this object is the original object owned
+     * by Python and the Python handle can be acquired by:
+     * py::cast(static_cast<ChildPy const *>(this))
+     *
+     * Copied instances will refer to the Python object handle via this member.
+     * By only storing this member in copied instances, but not in the original
+     * instance, we avoid a memory cycle and ensure clean destruction.
+     */
+    std::shared_ptr<OriginalInstance> m_originalInstance;
+
+    [[nodiscard]] py::handle get_python_handle() const
+    {
+        if (m_originalInstance)
+        {
+            // std::cout << "Refcount "
+            //           << m_originalInstance->pythonObject.ref_count()
+            //           << std::endl;
+            return m_originalInstance->pythonObject;
+        }
+        else
+        {
+            auto self = static_cast<ChildPy const *>(this);
+            return py::cast(self);
+        }
+    }
+
+    template <typename Res, typename... Args>
+    Res call_virtual(std::string const &nameOfPythonMethod, Args &&...args)
+    {
+        py::gil_scoped_acquire gil;
+        auto ptr = get_python_handle().template cast<ChildCpp *>();
+        auto fun = py::get_override(ptr, nameOfPythonMethod.c_str());
+        if (!fun)
+        {
+            throw std::runtime_error(
+                "Virtual method not found. Did you define '" +
+                nameOfPythonMethod + "' as method in Python?");
+        }
+        auto res = fun(std::forward<Args>(args)...);
+        return py::detail::cast_safe<Res>(std::move(res));
+    }
+
+    [[nodiscard]] std::unique_ptr<ChildCpp> clone_impl() const
+    {
+        auto self = static_cast<ChildPy const *>(this);
+        if (m_originalInstance)
+        {
+            return std::make_unique<ChildPy>(*self);
+        }
+        else
+        {
+            OriginalInstance oi;
+            oi.pythonObject = py::cast(self);
+            // no idea why we would need this twice, but we do
+            oi.pythonObject.inc_ref();
+            oi.pythonObject.inc_ref();
+            auto res = std::make_unique<ChildPy>(*self);
+            res->m_originalInstance =
+                std::make_shared<OriginalInstance>(std::move(oi));
+            return res;
+        }
+    }
+};
+
+struct PyStrategy
+    : chunk_assignment::Strategy
+    , ClonableTrampoline<chunk_assignment::Strategy, PyStrategy>
+{
+    chunk_assignment::Assignment assign(
+        chunk_assignment::PartialAssignment assignment,
+        chunk_assignment::RankMeta const &in,
+        chunk_assignment::RankMeta const &out) override
+    {
+        return call_virtual<chunk_assignment::Assignment>(
+            "assign", std::move(assignment), in, out);
+    }
+
+    [[nodiscard]] std::unique_ptr<Strategy> clone() const override
+    {
+        return clone_impl();
+    }
+};
+
+struct PyPartialStrategy
+    : chunk_assignment::PartialStrategy
+    , ClonableTrampoline<chunk_assignment::PartialStrategy, PyPartialStrategy>
+{
+    chunk_assignment::PartialAssignment assign(
+        chunk_assignment::PartialAssignment assignment,
+        chunk_assignment::RankMeta const &in,
+        chunk_assignment::RankMeta const &out) override
+    {
+        return call_virtual<chunk_assignment::PartialAssignment>(
+            "assign", std::move(assignment), in, out);
+    }
+
+    [[nodiscard]] std::unique_ptr<PartialStrategy> clone() const override
+    {
+        return clone_impl();
+    }
+};
+
 void init_Chunk(py::module &m)
 {
+
     py::class_<ChunkInfo>(m, "ChunkInfo")
         .def(py::init<Offset, Extent>(), py::arg("offset"), py::arg("extent"))
         .def(
@@ -40,6 +171,8 @@ void init_Chunk(py::module &m)
             })
         .def_readwrite("offset", &ChunkInfo::offset)
         .def_readwrite("extent", &ChunkInfo::extent);
+    py::bind_vector<PyVecChunkInfo>(m, "VectorChunkInfo");
+    py::implicitly_convertible<py::list, std::vector<ChunkInfo>>();
     py::class_<WrittenChunkInfo, ChunkInfo>(m, "WrittenChunkInfo")
         .def(py::init<Offset, Extent>(), py::arg("offset"), py::arg("extent"))
         .def(
@@ -105,12 +238,21 @@ void init_Chunk(py::module &m)
             })
         .def("available", &host_info::methodAvailable);
 
+    py::bind_vector<ChunkTable>(m, "ChunkTable");
 
     using namespace chunk_assignment;
 
-    (void)py::class_<PartialStrategy>(m, "PartialStrategy");
+    py::bind_map<Assignment>(m, "Assignment");
+
+    py::class_<PartialAssignment>(m, "PartialAssignment")
+        .def(py::init<>())
+        .def_readwrite("not_assigned", &PartialAssignment::notAssigned)
+        .def_readwrite("assigned", &PartialAssignment::assigned);
+
+    py::bind_map<RankMeta>(m, "RankMeta");
 
-    py::class_<PartialStrategy>(m, "PartialStrategy")
+    py::class_<PartialStrategy, PyPartialStrategy>(m, "PartialStrategy")
+        .def(py::init<>())
         .def(
             "assign",
             py::overload_cast<ChunkTable, RankMeta const &, RankMeta const &>(
@@ -128,7 +270,8 @@ void init_Chunk(py::module &m)
             py::arg("rank_meta_in") = RankMeta(),
             py::arg("rank_meta_out") = RankMeta());
 
-    py::class_<Strategy>(m, "Strategy")
+    py::class_<Strategy, PyStrategy>(m, "Strategy")
+        .def(py::init<>())
         .def(
             "assign",
             py::overload_cast<ChunkTable, RankMeta const &, RankMeta const &>(
@@ -194,4 +337,13 @@ void init_Chunk(py::module &m)
 
     py::class_<DiscardingStrategy, Strategy>(m, "DiscardingStrategy")
         .def(py::init<>());
+
+    // implicit conversions
+    {
+        py::implicitly_convertible<py::list, PyVecChunkInfo>();
+        py::implicitly_convertible<py::list, ChunkTable>();
+        py::implicitly_convertible<ChunkTable, PyVecChunkInfo>();
+        py::implicitly_convertible<py::dict, Assignment>();
+        py::implicitly_convertible<py::dict, RankMeta>();
+    }
 }
diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py
index 96802530b0..40e616e3bd 100644
--- a/src/binding/python/openpmd_api/pipe/__main__.py
+++ b/src/binding/python/openpmd_api/pipe/__main__.py
@@ -114,6 +114,37 @@ def __init__(self, source, dynamicView, offset, extent):
         self.extent = extent
 
 
+# Example how to implement a simple partial strategy in Python
+class LoadOne(io.PartialStrategy):
+    def __init__(self, rank):
+        super().__init__()
+        self.rank = rank
+
+    def assign(self, assignment, *_):
+        element = assignment.not_assigned.pop()
+        if self.rank not in assignment.assigned:
+            assignment.assigned[self.rank] = [element]
+        else:
+            assignment.assigned[self.rank].append(element)
+        return assignment
+
+
+# Example how to implement a simple strategy in Python
+class LoadAll(io.Strategy):
+
+    def __init__(self, rank):
+        super().__init__()
+        self.rank = rank
+
+    def assign(self, assignment, *_):
+        res = assignment.assigned
+        if self.rank not in res:
+            res[self.rank] = assignment.not_assigned
+        else:
+            res[self.rank].extend(assignment.not_assigned)
+        return res
+
+
 def distribution_strategy(dataset_extent,
                           mpi_rank,
                           mpi_size,
@@ -136,6 +167,8 @@ def distribution_strategy(dataset_extent,
             mpi_size,
             strategy_identifier=match.group(2))
         return io.FromPartialStrategy(io.ByHostname(inside_node), second_phase)
+    elif strategy_identifier == 'all':
+        return io.FromPartialStrategy(LoadOne(mpi_rank), LoadAll(mpi_rank))
     elif strategy_identifier == 'roundrobin':
         return io.RoundRobin()
     elif strategy_identifier == 'binpacking':

From a66f6562e8432be7a1dbcae3b366ea1322332f3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Tue, 14 Mar 2023 17:02:20 +0100
Subject: [PATCH 07/27] Make mergeChunks function public

---
 include/openPMD/ChunkInfo.hpp     |   3 +
 src/ChunkInfo.cpp                 | 110 ++++++++++++++++++++++++++++++
 src/IO/JSON/JSONIOHandlerImpl.cpp | 103 +---------------------------
 src/binding/python/ChunkInfo.cpp  |   7 +-
 4 files changed, 118 insertions(+), 105 deletions(-)

diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp
index 54454b4f13..976fac955e 100644
--- a/include/openPMD/ChunkInfo.hpp
+++ b/include/openPMD/ChunkInfo.hpp
@@ -91,6 +91,9 @@ namespace chunk_assignment
 
     using Assignment = std::map<unsigned int, std::vector<WrittenChunkInfo>>;
 
+    template <typename Chunk_t>
+    void mergeChunks(std::vector<Chunk_t> &);
+
     struct PartialAssignment
     {
         ChunkTable notAssigned;
diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp
index 190bc8012c..d0e10a4e11 100644
--- a/src/ChunkInfo.cpp
+++ b/src/ChunkInfo.cpp
@@ -27,6 +27,7 @@
 #include <iostream>
 #include <list>
 #include <map>
+#include <optional>
 #include <utility>
 
 #ifdef _WIN32
@@ -68,6 +69,115 @@ bool WrittenChunkInfo::operator==(WrittenChunkInfo const &other) const
 
 namespace chunk_assignment
 {
+    namespace
+    {
+        /*
+         * Check whether two chunks can be merged to form a large one
+         * and optionally return that larger chunk
+         */
+        template <typename Chunk_t>
+        std::optional<Chunk_t>
+        mergeChunks(Chunk_t const &chunk1, Chunk_t const &chunk2)
+        {
+            /*
+             * Idea:
+             * If two chunks can be merged into one, they agree on offsets and
+             * extents in all but exactly one dimension dim.
+             * At dimension dim, the offset of chunk 2 is equal to the offset
+             * of chunk 1 plus its extent -- or vice versa.
+             */
+            unsigned dimensionality = chunk1.extent.size();
+            for (unsigned dim = 0; dim < dimensionality; ++dim)
+            {
+                Chunk_t const *c1(&chunk1), *c2(&chunk2);
+                // check if one chunk is the extension of the other at
+                // dimension dim
+                // first, let's put things in order
+                if (c1->offset[dim] > c2->offset[dim])
+                {
+                    std::swap(c1, c2);
+                }
+                // now, c1 begins at the lower of both offsets
+                // next check, that both chunks border one another exactly
+                if (c2->offset[dim] != c1->offset[dim] + c1->extent[dim])
+                {
+                    continue;
+                }
+                // we've got a candidate
+                // verify that all other dimensions have equal values
+                auto equalValues = [dimensionality, dim, c1, c2]() {
+                    for (unsigned j = 0; j < dimensionality; ++j)
+                    {
+                        if (j == dim)
+                        {
+                            continue;
+                        }
+                        if (c1->offset[j] != c2->offset[j] ||
+                            c1->extent[j] != c2->extent[j])
+                        {
+                            return false;
+                        }
+                    }
+                    return true;
+                };
+                if (!equalValues())
+                {
+                    continue;
+                }
+                // we can merge the chunks
+                Offset offset(c1->offset);
+                Extent extent(c1->extent);
+                extent[dim] += c2->extent[dim];
+                return std::make_optional(Chunk_t(offset, extent));
+            }
+            return std::optional<Chunk_t>();
+        }
+    } // namespace
+
+    /*
+     * Merge chunks in the chunktable until no chunks are left that can be
+     * merged.
+     */
+    template <typename Chunk_t>
+    void mergeChunks(std::vector<Chunk_t> &table)
+    {
+        bool stillChanging;
+        do
+        {
+            stillChanging = false;
+            auto innerLoops = [&table]() {
+                /*
+                 * Iterate over pairs of chunks in the table.
+                 * When a pair that can be merged is found, merge it,
+                 * delete the original two chunks from the table,
+                 * put the new one in and return.
+                 */
+                for (auto i = table.begin(); i < table.end(); ++i)
+                {
+                    for (auto j = i + 1; j < table.end(); ++j)
+                    {
+                        std::optional<Chunk_t> merged = mergeChunks(*i, *j);
+                        if (merged)
+                        {
+                            // erase order is important due to iterator
+                            // invalidation
+                            table.erase(j);
+                            table.erase(i);
+                            table.emplace_back(std::move(merged.value()));
+                            return true;
+                        }
+                    }
+                }
+                return false;
+            };
+            stillChanging = innerLoops();
+        } while (stillChanging);
+    }
+
+    template void mergeChunks<ChunkInfo>(std::vector<ChunkInfo> &);
+    template void
+    mergeChunks<WrittenChunkInfo>(std::vector<WrittenChunkInfo> &);
+
     namespace
     {
         std::map<std::string, std::list<unsigned int> >
diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp
index 2287522d92..0a80e53321 100644
--- a/src/IO/JSON/JSONIOHandlerImpl.cpp
+++ b/src/IO/JSON/JSONIOHandlerImpl.cpp
@@ -875,107 +875,6 @@ namespace
         }
         return res;
     }
-
-    /*
-     * Check whether two chunks can be merged to form a large one
-     * and optionally return that larger chunk
-     */
-    std::optional<WrittenChunkInfo>
-    mergeChunks(WrittenChunkInfo const &chunk1, WrittenChunkInfo const &chunk2)
-    {
-        /*
-         * Idea:
-         * If two chunks can be merged into one, they agree on offsets and
-         * extents in all but exactly one dimension dim.
-         * At dimension dim, the offset of chunk 2 is equal to the offset
-         * of chunk 1 plus its extent -- or vice versa.
-         */
-        unsigned dimensionality = chunk1.extent.size();
-        for (unsigned dim = 0; dim < dimensionality; ++dim)
-        {
-            WrittenChunkInfo const *c1(&chunk1), *c2(&chunk2);
-            // check if one chunk is the extension of the other at
-            // dimension dim
-            // first, let's put things in order
-            if (c1->offset[dim] > c2->offset[dim])
-            {
-                std::swap(c1, c2);
-            }
-            // now, c1 begins at the lower of both offsets
-            // next check, that both chunks border one another exactly
-            if (c2->offset[dim] != c1->offset[dim] + c1->extent[dim])
-            {
-                continue;
-            }
-            // we've got a candidate
-            // verify that all other dimensions have equal values
-            auto equalValues = [dimensionality, dim, c1, c2]() {
-                for (unsigned j = 0; j < dimensionality; ++j)
-                {
-                    if (j == dim)
-                    {
-                        continue;
-                    }
-                    if (c1->offset[j] != c2->offset[j] ||
-                        c1->extent[j] != c2->extent[j])
-                    {
-                        return false;
-                    }
-                }
-                return true;
-            };
-            if (!equalValues())
-            {
-                continue;
-            }
-            // we can merge the chunks
-            Offset offset(c1->offset);
-            Extent extent(c1->extent);
-            extent[dim] += c2->extent[dim];
-            return std::make_optional(WrittenChunkInfo(offset, extent));
-        }
-        return std::optional<WrittenChunkInfo>();
-    }
-
-    /*
-     * Merge chunks in the chunktable until no chunks are left that can be
-     * merged.
-     */
-    void mergeChunks(ChunkTable &table)
-    {
-        bool stillChanging;
-        do
-        {
-            stillChanging = false;
-            auto innerLoops = [&table]() {
-                /*
-                 * Iterate over pairs of chunks in the table.
-                 * When a pair that can be merged is found, merge it,
-                 * delete the original two chunks from the table,
-                 * put the new one in and return.
-                 */
-                for (auto i = table.begin(); i < table.end(); ++i)
-                {
-                    for (auto j = i + 1; j < table.end(); ++j)
-                    {
-                        std::optional<WrittenChunkInfo> merged =
-                            mergeChunks(*i, *j);
-                        if (merged)
-                        {
-                            // erase order is important due to iterator
-                            // invalidation
-                            table.erase(j);
-                            table.erase(i);
-                            table.emplace_back(std::move(merged.value()));
-                            return true;
-                        }
-                    }
-                }
-                return false;
-            };
-            stillChanging = innerLoops();
-        } while (stillChanging);
-    }
 } // namespace
 
 void JSONIOHandlerImpl::availableChunks(
@@ -985,7 +884,7 @@ void JSONIOHandlerImpl::availableChunks(
     auto filePosition = setAndGetFilePosition(writable);
     auto &j = obtainJsonContents(writable)["data"];
     *parameters.chunks = chunksInJSON(j);
-    mergeChunks(*parameters.chunks);
+    chunk_assignment::mergeChunks(*parameters.chunks);
 }
 
 void JSONIOHandlerImpl::openFile(
diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp
index 9c94329d0e..c34b7cea1b 100644
--- a/src/binding/python/ChunkInfo.cpp
+++ b/src/binding/python/ChunkInfo.cpp
@@ -171,8 +171,8 @@ void init_Chunk(py::module &m)
             })
         .def_readwrite("offset", &ChunkInfo::offset)
         .def_readwrite("extent", &ChunkInfo::extent);
-    py::bind_vector<PyVecChunkInfo>(m, "VectorChunkInfo");
-    py::implicitly_convertible<py::list, std::vector<ChunkInfo>>();
+    py::bind_vector<PyVecChunkInfo>(m, "VectorChunkInfo")
+        .def("merge_chunks", &chunk_assignment::mergeChunks<ChunkInfo>);
     py::class_<WrittenChunkInfo, ChunkInfo>(m, "WrittenChunkInfo")
         .def(py::init<Offset, Extent>(), py::arg("offset"), py::arg("extent"))
         .def(
@@ -238,7 +238,8 @@ void init_Chunk(py::module &m)
             })
         .def("available", &host_info::methodAvailable);
 
-    py::bind_vector<ChunkTable>(m, "ChunkTable");
+    py::bind_vector<ChunkTable>(m, "ChunkTable")
+        .def("merge_chunks", &chunk_assignment::mergeChunks<WrittenChunkInfo>);
 
     using namespace chunk_assignment;
 

From 8fd934a17d4d8aee2d5791dbcd52dde0138e0b3f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Fri, 9 Feb 2024 13:18:33 +0100
Subject: [PATCH 08/27] Add mergeChunksFromSameSourceID

---
 include/openPMD/ChunkInfo.hpp    |  3 +++
 src/ChunkInfo.cpp                | 15 +++++++++++++++
 src/binding/python/ChunkInfo.cpp |  5 ++++-
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp
index 976fac955e..6787b49eb8 100644
--- a/include/openPMD/ChunkInfo.hpp
+++ b/include/openPMD/ChunkInfo.hpp
@@ -94,6 +94,9 @@ namespace chunk_assignment
     template <typename Chunk_t>
     void mergeChunks(std::vector<Chunk_t> &);
 
+    auto mergeChunksFromSameSourceID(std::vector<WrittenChunkInfo> const &)
+        -> std::map<unsigned int, std::vector<ChunkInfo>>;
+
     struct PartialAssignment
     {
         ChunkTable notAssigned;
diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp
index d0e10a4e11..0aa6fb4653 100644
--- a/src/ChunkInfo.cpp
+++ b/src/ChunkInfo.cpp
@@ -174,6 +174,21 @@ namespace chunk_assignment
         } while (stillChanging);
     }
 
+    auto mergeChunksFromSameSourceID(std::vector<WrittenChunkInfo> const &table)
+        -> std::map<unsigned int, std::vector<ChunkInfo>>
+    {
+        std::map<unsigned int, std::vector<ChunkInfo>> sortedBySourceID;
+        for (auto const &chunk : table)
+        {
+            sortedBySourceID[chunk.sourceID].emplace_back(chunk);
+        }
+        for (auto &pair : sortedBySourceID)
+        {
+            mergeChunks(pair.second);
+        }
+        return sortedBySourceID;
+    }
+
     template void mergeChunks<ChunkInfo>(std::vector<ChunkInfo> &);
     template void
     mergeChunks<WrittenChunkInfo>(std::vector<WrittenChunkInfo> &);
diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp
index c34b7cea1b..76c7120a9b 100644
--- a/src/binding/python/ChunkInfo.cpp
+++ b/src/binding/python/ChunkInfo.cpp
@@ -239,7 +239,10 @@ void init_Chunk(py::module &m)
         .def("available", &host_info::methodAvailable);
 
     py::bind_vector<ChunkTable>(m, "ChunkTable")
-        .def("merge_chunks", &chunk_assignment::mergeChunks<WrittenChunkInfo>);
+        .def("merge_chunks", &chunk_assignment::mergeChunks<WrittenChunkInfo>)
+        .def(
+            "merge_chunks_from_same_sourceID",
+            &chunk_assignment::mergeChunksFromSameSourceID);
 
     using namespace chunk_assignment;
 

From b27716901e4fd3d2298bfccea22b4d16b17e00f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Thu, 8 Feb 2024 17:02:58 +0100
Subject: [PATCH 09/27] Add RoundRobinOfSourceRanks strategy

---
 include/openPMD/ChunkInfo.hpp    | 11 ++++++++
 src/ChunkInfo.cpp                | 47 ++++++++++++++++++++++++++++++--
 src/binding/python/ChunkInfo.cpp |  2 ++
 test/ParallelIOTest.cpp          | 23 ++++++++++++++--
 4 files changed, 78 insertions(+), 5 deletions(-)

diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp
index 6787b49eb8..2193a6c691 100644
--- a/include/openPMD/ChunkInfo.hpp
+++ b/include/openPMD/ChunkInfo.hpp
@@ -24,6 +24,7 @@
 
 #include "openPMD/Dataset.hpp" // Offset, Extent
 #include "openPMD/benchmark/mpi/BlockSlicer.hpp"
+#include <memory>
 
 #if openPMD_HAVE_MPI
 #include <mpi.h>
@@ -224,6 +225,16 @@ namespace chunk_assignment
         virtual std::unique_ptr<Strategy> clone() const override;
     };
 
+    struct RoundRobinOfSourceRanks : Strategy
+    {
+        Assignment assign(
+            PartialAssignment,
+            RankMeta const &in,
+            RankMeta const &out) override;
+
+        virtual std::unique_ptr<Strategy> clone() const override;
+    };
+
     /**
      * @brief Strategy that assigns chunks to be read by processes within
      *        the same host that produced the chunk.
diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp
index 0aa6fb4653..6c5cff166f 100644
--- a/src/ChunkInfo.cpp
+++ b/src/ChunkInfo.cpp
@@ -24,10 +24,13 @@
 #include "openPMD/auxiliary/Mpi.hpp"
 
 #include <algorithm> // std::sort
+#include <deque>
 #include <iostream>
+#include <iterator>
 #include <list>
 #include <map>
 #include <optional>
+#include <set>
 #include <utility>
 
 #ifdef _WIN32
@@ -195,10 +198,10 @@ namespace chunk_assignment
 
     namespace
     {
-        std::map<std::string, std::list<unsigned int> >
+        std::map<std::string, std::list<unsigned int>>
         ranksPerHost(RankMeta const &rankMeta)
         {
-            std::map<std::string, std::list<unsigned int> > res;
+            std::map<std::string, std::list<unsigned int>> res;
             for (auto const &pair : rankMeta)
             {
                 auto &list = res[pair.second];
@@ -294,6 +297,44 @@ namespace chunk_assignment
         return std::unique_ptr<Strategy>(new RoundRobin);
     }
 
+    Assignment RoundRobinOfSourceRanks::assign(
+        PartialAssignment partialAssignment,
+        RankMeta const &, // ignored parameter
+        RankMeta const &out)
+    {
+        std::map<unsigned int, std::deque<WrittenChunkInfo>>
+            sortSourceChunksBySourceRank;
+        for (auto &chunk : partialAssignment.notAssigned)
+        {
+            auto sourceID = chunk.sourceID;
+            sortSourceChunksBySourceRank[sourceID].push_back(std::move(chunk));
+        }
+        partialAssignment.notAssigned.clear();
+        auto source_it = sortSourceChunksBySourceRank.begin();
+        auto sink_it = out.begin();
+        for (; source_it != sortSourceChunksBySourceRank.end();
+             ++source_it, ++sink_it)
+        {
+            if (sink_it == out.end())
+            {
+                sink_it = out.begin();
+            }
+            auto &chunks_go_here = partialAssignment.assigned[sink_it->first];
+            chunks_go_here.reserve(
+                partialAssignment.assigned.size() + source_it->second.size());
+            for (auto &chunk : source_it->second)
+            {
+                chunks_go_here.push_back(std::move(chunk));
+            }
+        }
+        return partialAssignment.assigned;
+    }
+
+    std::unique_ptr<Strategy> RoundRobinOfSourceRanks::clone() const
+    {
+        return std::unique_ptr<Strategy>(new RoundRobinOfSourceRanks);
+    }
+
     ByHostname::ByHostname(std::unique_ptr<Strategy> withinNode)
         : m_withinNode(std::move(withinNode))
     {}
@@ -332,7 +373,7 @@ namespace chunk_assignment
         // the ranks are the source ranks
 
         // which ranks live on host <string> in the sink?
-        std::map<std::string, std::list<unsigned int> > ranksPerHostSink =
+        std::map<std::string, std::list<unsigned int>> ranksPerHostSink =
             ranksPerHost(out);
         for (auto &chunkGroup : chunkGroups)
         {
diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp
index 76c7120a9b..6f94cdd8aa 100644
--- a/src/binding/python/ChunkInfo.cpp
+++ b/src/binding/python/ChunkInfo.cpp
@@ -300,6 +300,8 @@ void init_Chunk(py::module &m)
         }));
 
     py::class_<RoundRobin, Strategy>(m, "RoundRobin").def(py::init<>());
+    py::class_<RoundRobinOfSourceRanks, Strategy>(m, "RoundRobinOfSourceRanks")
+        .def(py::init<>());
 
     py::class_<ByHostname, PartialStrategy>(m, "ByHostname")
         .def(
diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp
index cb3d2eea92..ee2923b178 100644
--- a/test/ParallelIOTest.cpp
+++ b/test/ParallelIOTest.cpp
@@ -2351,11 +2351,13 @@ void adios2_chunk_distribution()
         series.setRankTable(writingRanksHostnames.at(mpi_rank));
 
         auto E_x = series.iterations[0].meshes["E"]["x"];
-        openPMD::Dataset ds(openPMD::Datatype::INT, {unsigned(mpi_size), 10});
+        openPMD::Dataset ds(
+            openPMD::Datatype::INT, {unsigned(mpi_size * 2), 10});
         E_x.resetDataset(ds);
         std::vector<int> data(10, 0);
         std::iota(data.begin(), data.end(), 0);
-        E_x.storeChunk(data, {unsigned(mpi_rank), 0}, {1, 10});
+        E_x.storeChunk(data, {unsigned(mpi_rank * 2), 0}, {1, 10});
+        E_x.storeChunk(data, {unsigned(mpi_rank * 2 + 1), 0}, {1, 10});
         series.flush();
     }
 
@@ -2414,6 +2416,23 @@ void adios2_chunk_distribution()
             byHostnamePartialAssignment.notAssigned,
             rankMetaIn);
 
+        /*
+         * Same as above, but use RoundRobinOfSourceRanks this time, a strategy
+         * which ensures that each source rank's data is uniquely mapped to one
+         * sink rank. Needed in some domains.
+         */
+        ByHostname byHostname2(std::make_unique<RoundRobinOfSourceRanks>());
+        auto byHostnamePartialAssignment2 =
+            byHostname2.assign(chunkTable, rankMetaIn, readingRanksHostnames);
+        printAssignment(
+            "HOSTNAME2, ASSIGNED",
+            byHostnamePartialAssignment2.assigned,
+            readingRanksHostnames);
+        printChunktable(
+            "HOSTNAME2, LEFTOVER",
+            byHostnamePartialAssignment2.notAssigned,
+            rankMetaIn);
+
         /*
          * Assign chunks by hostnames, once more.
          * This time, apply a secondary distribution strategy to assign

From 79cec4160e560a0960d269fd9856fcd6ead2b3d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Fri, 16 Aug 2024 12:05:39 +0200
Subject: [PATCH 10/27] Add Blocks distribution strategy

---
 include/openPMD/ChunkInfo.hpp                 | 16 ++++++
 .../mpi/OneDimensionalBlockSlicer.hpp         |  3 ++
 src/ChunkInfo.cpp                             | 26 ++++++++++
 .../mpi/OneDimensionalBlockSlicer.cpp         | 52 +++++++++++--------
 src/binding/python/ChunkInfo.cpp              |  5 ++
 test/ParallelIOTest.cpp                       |  5 ++
 6 files changed, 85 insertions(+), 22 deletions(-)

diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp
index 2193a6c691..868478f8f6 100644
--- a/include/openPMD/ChunkInfo.hpp
+++ b/include/openPMD/ChunkInfo.hpp
@@ -235,6 +235,22 @@ namespace chunk_assignment
         virtual std::unique_ptr<Strategy> clone() const override;
     };
 
+    struct Blocks : Strategy
+    {
+    private:
+        unsigned int mpi_size, mpi_rank;
+
+    public:
+        Blocks(unsigned int mpi_rank, unsigned int mpi_size);
+
+        Assignment assign(
+            PartialAssignment,
+            RankMeta const &in,
+            RankMeta const &out) override;
+
+        [[nodiscard]] std::unique_ptr<Strategy> clone() const override;
+    };
+
     /**
      * @brief Strategy that assigns chunks to be read by processes within
      *        the same host that produced the chunk.
diff --git a/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp b/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp
index cb12da9350..f0d943d972 100644
--- a/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp
+++ b/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp
@@ -33,6 +33,9 @@ class OneDimensionalBlockSlicer : public BlockSlicer
 
     explicit OneDimensionalBlockSlicer(Extent::value_type dim = 0);
 
+    static std::pair<size_t, size_t>
+    n_th_block_inside(size_t length, size_t rank, size_t size);
+
     std::pair<Offset, Extent>
     sliceBlock(Extent &totalExtent, int size, int rank) override;
 
diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp
index 6c5cff166f..ceb074b887 100644
--- a/src/ChunkInfo.cpp
+++ b/src/ChunkInfo.cpp
@@ -22,6 +22,7 @@
 #include "openPMD/ChunkInfo_internal.hpp"
 
 #include "openPMD/auxiliary/Mpi.hpp"
+#include "openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp"
 
 #include <algorithm> // std::sort
 #include <deque>
@@ -29,6 +30,7 @@
 #include <iterator>
 #include <list>
 #include <map>
+#include <memory>
 #include <optional>
 #include <set>
 #include <utility>
@@ -335,6 +337,30 @@ namespace chunk_assignment
         return std::unique_ptr<Strategy>(new RoundRobinOfSourceRanks);
     }
 
+    Blocks::Blocks(unsigned int mpi_rank_in, unsigned int mpi_size_in)
+        : mpi_size(mpi_size_in), mpi_rank(mpi_rank_in)
+    {}
+
+    Assignment
+    Blocks::assign(PartialAssignment pa, RankMeta const &, RankMeta const &)
+    {
+        auto [notAssigned, res] = std::move(pa);
+        auto [myChunksFrom, myChunksTo] =
+            OneDimensionalBlockSlicer::n_th_block_inside(
+                notAssigned.size(), mpi_rank, mpi_size);
+        std::transform(
+            notAssigned.begin() + myChunksFrom,
+            notAssigned.begin() + (myChunksFrom + myChunksTo),
+            std::back_inserter(res[mpi_rank]),
+            [](WrittenChunkInfo &chunk) { return std::move(chunk); });
+        return res;
+    }
+
+    std::unique_ptr<Strategy> Blocks::clone() const
+    {
+        return std::unique_ptr<Strategy>(new Blocks(*this));
+    }
+
     ByHostname::ByHostname(std::unique_ptr<Strategy> withinNode)
         : m_withinNode(std::move(withinNode))
     {}
diff --git a/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp b/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp
index 7fbb734faa..bb71cc29db 100644
--- a/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp
+++ b/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp
@@ -29,29 +29,23 @@ OneDimensionalBlockSlicer::OneDimensionalBlockSlicer(Extent::value_type dim)
     : m_dim{dim}
 {}
 
-std::pair<Offset, Extent>
-OneDimensionalBlockSlicer::sliceBlock(Extent &totalExtent, int size, int rank)
+std::pair<size_t, size_t> OneDimensionalBlockSlicer::n_th_block_inside(
+    size_t length, size_t rank, size_t size)
 {
-    Offset offs(totalExtent.size(), 0);
-
     if (rank >= size)
     {
-        Extent extent(totalExtent.size(), 0);
-        return std::make_pair(std::move(offs), std::move(extent));
+        return {length, 0};
     }
 
-    auto dim = this->m_dim;
-
     // for more equal balancing, we want the start index
     // at the upper gaussian bracket of (N/n*rank)
     // where N the size of the dataset in dimension dim
     // and n the MPI size
     // for avoiding integer overflow, this is the same as:
     // (N div n)*rank + round((N%n)/n*rank)
-    auto f = [&totalExtent, size, dim](int threadRank) {
-        auto N = totalExtent[dim];
-        auto res = (N / size) * threadRank;
-        auto padDivident = (N % size) * threadRank;
+    auto f = [length, size](size_t rank_lambda) {
+        auto res = (length / size) * rank_lambda;
+        auto padDivident = (length % size) * rank_lambda;
         auto pad = padDivident / size;
         if (pad * size < padDivident)
         {
@@ -60,17 +54,31 @@ OneDimensionalBlockSlicer::sliceBlock(Extent &totalExtent, int size, int rank)
         return res + pad;
     };
 
-    offs[dim] = f(rank);
+    size_t offset = f(rank);
+    size_t extent = [&]() {
+        if (rank >= size - 1)
+        {
+            return length - offset;
+        }
+        else
+        {
+            return f(rank + 1) - offset;
+        }
+    }();
+    return {offset, extent};
+}
+
+std::pair<Offset, Extent>
+OneDimensionalBlockSlicer::sliceBlock(Extent &totalExtent, int size, int rank)
+{
+    Offset localOffset(totalExtent.size(), 0);
     Extent localExtent{totalExtent};
-    if (rank >= size - 1)
-    {
-        localExtent[dim] -= offs[dim];
-    }
-    else
-    {
-        localExtent[dim] = f(rank + 1) - offs[dim];
-    }
-    return std::make_pair(std::move(offs), std::move(localExtent));
+
+    auto [offset_dim, extent_dim] =
+        n_th_block_inside(totalExtent.at(this->m_dim), rank, size);
+    localOffset[m_dim] = offset_dim;
+    localExtent[m_dim] = extent_dim;
+    return std::make_pair(std::move(localOffset), std::move(localExtent));
 }
 
 std::unique_ptr<BlockSlicer> OneDimensionalBlockSlicer::clone() const
diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp
index 6f94cdd8aa..5727e75403 100644
--- a/src/binding/python/ChunkInfo.cpp
+++ b/src/binding/python/ChunkInfo.cpp
@@ -302,6 +302,11 @@ void init_Chunk(py::module &m)
     py::class_<RoundRobin, Strategy>(m, "RoundRobin").def(py::init<>());
     py::class_<RoundRobinOfSourceRanks, Strategy>(m, "RoundRobinOfSourceRanks")
         .def(py::init<>());
+    py::class_<Blocks, Strategy>(m, "Blocks")
+        .def(
+            py::init<unsigned int, unsigned int>(),
+            py::arg("mpi_rank"),
+            py::arg("mpi_size"));
 
     py::class_<ByHostname, PartialStrategy>(m, "ByHostname")
         .def(
diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp
index ee2923b178..8204326b94 100644
--- a/test/ParallelIOTest.cpp
+++ b/test/ParallelIOTest.cpp
@@ -2473,6 +2473,11 @@ void adios2_chunk_distribution()
             chunkTable, rankMetaIn, readingRanksHostnames);
         printAssignment(
             "CUBOID SLICE", cuboidSliceAssignment, readingRanksHostnames);
+
+        Blocks blocksStrategy(mpi_rank, mpi_size);
+        auto blocksAssignment = blocksStrategy.assign(
+            chunkTable, rankMetaIn, readingRanksHostnames);
+        printAssignment("BLOCKS", blocksAssignment, readingRanksHostnames);
     }
 }
 

From 46213d7ff777a9ecfa03ead0762d378a0331ca53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Fri, 16 Aug 2024 14:15:20 +0200
Subject: [PATCH 11/27] Add BlocksOfSourceRanks strategy

---
 include/openPMD/ChunkInfo.hpp    | 16 +++++++++++++
 src/ChunkInfo.cpp                | 41 ++++++++++++++++++++++++++++++++
 src/binding/python/ChunkInfo.cpp |  5 ++++
 test/ParallelIOTest.cpp          |  8 +++++++
 4 files changed, 70 insertions(+)

diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp
index 868478f8f6..daa4cc2c5c 100644
--- a/include/openPMD/ChunkInfo.hpp
+++ b/include/openPMD/ChunkInfo.hpp
@@ -251,6 +251,22 @@ namespace chunk_assignment
         [[nodiscard]] std::unique_ptr<Strategy> clone() const override;
     };
 
+    struct BlocksOfSourceRanks : Strategy
+    {
+    private:
+        unsigned int mpi_size, mpi_rank;
+
+    public:
+        BlocksOfSourceRanks(unsigned int mpi_rank, unsigned int mpi_size);
+
+        Assignment assign(
+            PartialAssignment,
+            RankMeta const &in,
+            RankMeta const &out) override;
+
+        [[nodiscard]] std::unique_ptr<Strategy> clone() const override;
+    };
+
     /**
      * @brief Strategy that assigns chunks to be read by processes within
      *        the same host that produced the chunk.
diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp
index ceb074b887..6162a49f3a 100644
--- a/src/ChunkInfo.cpp
+++ b/src/ChunkInfo.cpp
@@ -361,6 +361,47 @@ namespace chunk_assignment
         return std::unique_ptr<Strategy>(new Blocks(*this));
     }
 
+    BlocksOfSourceRanks::BlocksOfSourceRanks(
+        unsigned int mpi_rank_in, unsigned int mpi_size_in)
+        : mpi_size(mpi_size_in), mpi_rank(mpi_rank_in)
+    {}
+
+    Assignment BlocksOfSourceRanks::assign(
+        PartialAssignment pa, RankMeta const &, RankMeta const &)
+    {
+        auto [notAssigned, res] = std::move(pa);
+        std::map<unsigned int, std::deque<WrittenChunkInfo>>
+            sortSourceChunksBySourceRank;
+        for (auto &chunk : notAssigned)
+        {
+            auto sourceID = chunk.sourceID;
+            sortSourceChunksBySourceRank[sourceID].push_back(std::move(chunk));
+        }
+        notAssigned.clear();
+        auto [myChunksFrom, myChunksTo] =
+            OneDimensionalBlockSlicer::n_th_block_inside(
+                sortSourceChunksBySourceRank.size(), mpi_rank, mpi_size);
+        auto it = sortSourceChunksBySourceRank.begin();
+        for (size_t i = 0; i < myChunksFrom; ++i)
+        {
+            ++it;
+        }
+        for (size_t i = 0; i < myChunksTo; ++i, ++it)
+        {
+            std::transform(
+                it->second.begin(),
+                it->second.end(),
+                std::back_inserter(res[mpi_rank]),
+                [](WrittenChunkInfo &chunk) { return std::move(chunk); });
+        }
+        return res;
+    }
+
+    std::unique_ptr<Strategy> BlocksOfSourceRanks::clone() const
+    {
+        return std::unique_ptr<Strategy>(new BlocksOfSourceRanks(*this));
+    }
+
     ByHostname::ByHostname(std::unique_ptr<Strategy> withinNode)
         : m_withinNode(std::move(withinNode))
     {}
diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp
index 5727e75403..75e200f5ea 100644
--- a/src/binding/python/ChunkInfo.cpp
+++ b/src/binding/python/ChunkInfo.cpp
@@ -307,6 +307,11 @@ void init_Chunk(py::module &m)
             py::init<unsigned int, unsigned int>(),
             py::arg("mpi_rank"),
             py::arg("mpi_size"));
+    py::class_<BlocksOfSourceRanks, Strategy>(m, "BlocksOfSourceRanks")
+        .def(
+            py::init<unsigned int, unsigned int>(),
+            py::arg("mpi_rank"),
+            py::arg("mpi_size"));
 
     py::class_<ByHostname, PartialStrategy>(m, "ByHostname")
         .def(
diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp
index 8204326b94..3d495a8b95 100644
--- a/test/ParallelIOTest.cpp
+++ b/test/ParallelIOTest.cpp
@@ -2478,6 +2478,14 @@ void adios2_chunk_distribution()
         auto blocksAssignment = blocksStrategy.assign(
             chunkTable, rankMetaIn, readingRanksHostnames);
         printAssignment("BLOCKS", blocksAssignment, readingRanksHostnames);
+
+        BlocksOfSourceRanks blocksOfSourceRanksStrategy(mpi_rank, mpi_size);
+        auto blocksOfSourceRanksAssignment = blocksOfSourceRanksStrategy.assign(
+            chunkTable, rankMetaIn, readingRanksHostnames);
+        printAssignment(
+            "BLOCKS OF SOURCE RANKS",
+            blocksOfSourceRanksAssignment,
+            readingRanksHostnames);
     }
 }
 

From 7024c3f6891966c3382a17a34c80ba1afe11928b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 4 Apr 2025 08:33:00 +0000
Subject: [PATCH 12/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/binding/python/ChunkInfo.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp
index 75e200f5ea..55d5c824e5 100644
--- a/src/binding/python/ChunkInfo.cpp
+++ b/src/binding/python/ChunkInfo.cpp
@@ -294,10 +294,12 @@ void init_Chunk(py::module &m)
             py::arg("rank_meta_out") = RankMeta());
 
     py::class_<FromPartialStrategy, Strategy>(m, "FromPartialStrategy")
-        .def(py::init([](PartialStrategy const &firstPass,
-                         Strategy const &secondPass) {
-            return FromPartialStrategy(firstPass.clone(), secondPass.clone());
-        }));
+        .def(
+            py::init([](PartialStrategy const &firstPass,
+                        Strategy const &secondPass) {
+                return FromPartialStrategy(
+                    firstPass.clone(), secondPass.clone());
+            }));
 
     py::class_<RoundRobin, Strategy>(m, "RoundRobin").def(py::init<>());
     py::class_<RoundRobinOfSourceRanks, Strategy>(m, "RoundRobinOfSourceRanks")

From 3ac9533a0d9b5bc3c884d3f53c2c942afd15c4b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Tue, 13 May 2025 15:58:58 +0200
Subject: [PATCH 13/27] Add rank info to assign() params

---
 include/openPMD/ChunkInfo.hpp                 |  78 ++++++----
 src/ChunkInfo.cpp                             | 137 ++++++++++++------
 src/binding/python/ChunkInfo.cpp              |  77 +++++-----
 .../python/openpmd_api/pipe/__main__.py       |  39 ++---
 test/CoreTest.cpp                             |   6 +-
 test/ParallelIOTest.cpp                       |  27 ++--
 6 files changed, 212 insertions(+), 152 deletions(-)

diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp
index daa4cc2c5c..2994a9e4e7 100644
--- a/include/openPMD/ChunkInfo.hpp
+++ b/include/openPMD/ChunkInfo.hpp
@@ -121,7 +121,9 @@ namespace chunk_assignment
         Assignment assign(
             ChunkTable,
             RankMeta const &rankMetaIn,
-            RankMeta const &rankMetaOut);
+            RankMeta const &rankMetaOut,
+            size_t my_rank,
+            size_t num_ranks);
         /**
          * @brief Assign chunks to be loaded to reading processes.
          *
@@ -136,7 +138,9 @@ namespace chunk_assignment
         virtual Assignment assign(
             PartialAssignment partialAssignment,
             RankMeta const &in,
-            RankMeta const &out) = 0;
+            RankMeta const &out,
+            size_t my_rank,
+            size_t num_ranks) = 0;
 
         virtual std::unique_ptr<Strategy> clone() const = 0;
 
@@ -155,8 +159,12 @@ namespace chunk_assignment
      */
     struct PartialStrategy
     {
-        PartialAssignment
-        assign(ChunkTable table, RankMeta const &in, RankMeta const &out);
+        PartialAssignment assign(
+            ChunkTable table,
+            RankMeta const &in,
+            RankMeta const &out,
+            size_t my_rank,
+            size_t num_ranks);
         /**
          * @brief Assign chunks to be loaded to reading processes.
          *
@@ -173,7 +181,9 @@ namespace chunk_assignment
         virtual PartialAssignment assign(
             PartialAssignment partialAssignment,
             RankMeta const &in,
-            RankMeta const &out) = 0;
+            RankMeta const &out,
+            size_t my_rank,
+            size_t num_ranks) = 0;
 
         virtual std::unique_ptr<PartialStrategy> clone() const = 0;
 
@@ -201,7 +211,9 @@ namespace chunk_assignment
         virtual Assignment assign(
             PartialAssignment,
             RankMeta const &in,
-            RankMeta const &out) override;
+            RankMeta const &out,
+            size_t my_rank,
+            size_t num_ranks) override;
 
         virtual std::unique_ptr<Strategy> clone() const override;
 
@@ -220,7 +232,9 @@ namespace chunk_assignment
         Assignment assign(
             PartialAssignment,
             RankMeta const &in,
-            RankMeta const &out) override;
+            RankMeta const &out,
+            size_t my_rank,
+            size_t num_ranks) override;
 
         virtual std::unique_ptr<Strategy> clone() const override;
     };
@@ -230,39 +244,33 @@ namespace chunk_assignment
         Assignment assign(
             PartialAssignment,
             RankMeta const &in,
-            RankMeta const &out) override;
+            RankMeta const &out,
+            size_t my_rank,
+            size_t num_ranks) override;
 
         virtual std::unique_ptr<Strategy> clone() const override;
     };
 
     struct Blocks : Strategy
     {
-    private:
-        unsigned int mpi_size, mpi_rank;
-
-    public:
-        Blocks(unsigned int mpi_rank, unsigned int mpi_size);
-
         Assignment assign(
             PartialAssignment,
             RankMeta const &in,
-            RankMeta const &out) override;
+            RankMeta const &out,
+            size_t my_rank,
+            size_t num_ranks) override;
 
         [[nodiscard]] std::unique_ptr<Strategy> clone() const override;
     };
 
     struct BlocksOfSourceRanks : Strategy
     {
-    private:
-        unsigned int mpi_size, mpi_rank;
-
-    public:
-        BlocksOfSourceRanks(unsigned int mpi_rank, unsigned int mpi_size);
-
         Assignment assign(
             PartialAssignment,
             RankMeta const &in,
-            RankMeta const &out) override;
+            RankMeta const &out,
+            size_t my_rank,
+            size_t num_ranks) override;
 
         [[nodiscard]] std::unique_ptr<Strategy> clone() const override;
     };
@@ -282,7 +290,9 @@ namespace chunk_assignment
         PartialAssignment assign(
             PartialAssignment,
             RankMeta const &in,
-            RankMeta const &out) override;
+            RankMeta const &out,
+            size_t my_rank,
+            size_t num_ranks) override;
 
         virtual std::unique_ptr<PartialStrategy> clone() const override;
 
@@ -303,22 +313,20 @@ namespace chunk_assignment
     struct ByCuboidSlice : Strategy
     {
         ByCuboidSlice(
-            std::unique_ptr<BlockSlicer> blockSlicer,
-            Extent totalExtent,
-            unsigned int mpi_rank,
-            unsigned int mpi_size);
+            std::unique_ptr<BlockSlicer> blockSlicer, Extent totalExtent);
 
         Assignment assign(
             PartialAssignment,
             RankMeta const &in,
-            RankMeta const &out) override;
+            RankMeta const &out,
+            size_t my_rank,
+            size_t num_ranks) override;
 
         virtual std::unique_ptr<Strategy> clone() const override;
 
     private:
         std::unique_ptr<BlockSlicer> blockSlicer;
         Extent totalExtent;
-        unsigned int mpi_rank, mpi_size;
     };
 
     /**
@@ -346,7 +354,9 @@ namespace chunk_assignment
         Assignment assign(
             PartialAssignment,
             RankMeta const &in,
-            RankMeta const &out) override;
+            RankMeta const &out,
+            size_t my_rank,
+            size_t num_ranks) override;
 
         virtual std::unique_ptr<Strategy> clone() const override;
     };
@@ -367,7 +377,9 @@ namespace chunk_assignment
         Assignment assign(
             PartialAssignment,
             RankMeta const &in,
-            RankMeta const &out) override;
+            RankMeta const &out,
+            size_t my_rank,
+            size_t num_ranks) override;
 
         virtual std::unique_ptr<Strategy> clone() const override;
     };
@@ -388,7 +400,9 @@ namespace chunk_assignment
         Assignment assign(
             PartialAssignment,
             RankMeta const &in,
-            RankMeta const &out) override;
+            RankMeta const &out,
+            size_t my_rank,
+            size_t num_ranks) override;
 
         virtual std::unique_ptr<Strategy> clone() const override;
     };
diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp
index 6162a49f3a..64b9d7f421 100644
--- a/src/ChunkInfo.cpp
+++ b/src/ChunkInfo.cpp
@@ -33,6 +33,7 @@
 #include <memory>
 #include <optional>
 #include <set>
+#include <stdexcept>
 #include <utility>
 
 #ifdef _WIN32
@@ -214,14 +215,22 @@ namespace chunk_assignment
     } // namespace
 
     Assignment Strategy::assign(
-        ChunkTable table, RankMeta const &rankIn, RankMeta const &rankOut)
+        ChunkTable table,
+        RankMeta const &rankIn,
+        RankMeta const &rankOut,
+        size_t my_rank,
+        size_t num_ranks)
     {
         if (rankOut.size() == 0)
         {
             throw std::runtime_error("[assignChunks] No output ranks defined");
         }
         return this->assign(
-            PartialAssignment(std::move(table)), rankIn, rankOut);
+            PartialAssignment(std::move(table)),
+            rankIn,
+            rankOut,
+            my_rank,
+            num_ranks);
     }
 
     PartialAssignment::PartialAssignment(
@@ -235,10 +244,18 @@ namespace chunk_assignment
     {}
 
     PartialAssignment PartialStrategy::assign(
-        ChunkTable table, RankMeta const &rankIn, RankMeta const &rankOut)
+        ChunkTable table,
+        RankMeta const &rankIn,
+        RankMeta const &rankOut,
+        size_t my_rank,
+        size_t num_ranks)
     {
         return this->assign(
-            PartialAssignment(std::move(table)), rankIn, rankOut);
+            PartialAssignment(std::move(table)),
+            rankIn,
+            rankOut,
+            my_rank,
+            num_ranks);
     }
 
     FromPartialStrategy::FromPartialStrategy(
@@ -250,12 +267,17 @@ namespace chunk_assignment
     Assignment FromPartialStrategy::assign(
         PartialAssignment partialAssignment,
         RankMeta const &in,
-        RankMeta const &out)
+        RankMeta const &out,
+        size_t my_rank,
+        size_t num_ranks)
     {
         return m_secondPass->assign(
-            m_firstPass->assign(std::move(partialAssignment), in, out),
+            m_firstPass->assign(
+                std::move(partialAssignment), in, out, my_rank, num_ranks),
             in,
-            out);
+            out,
+            my_rank,
+            num_ranks);
     }
 
     std::unique_ptr<Strategy> FromPartialStrategy::clone() const
@@ -267,7 +289,9 @@ namespace chunk_assignment
     Assignment RoundRobin::assign(
         PartialAssignment partialAssignment,
         RankMeta const &, // ignored parameter
-        RankMeta const &out)
+        RankMeta const &out,
+        size_t /* my_rank */,
+        size_t /* num_ranks */)
     {
         if (out.size() == 0)
         {
@@ -288,8 +312,8 @@ namespace chunk_assignment
         Assignment &sinkChunks = partialAssignment.assigned;
         for (auto &chunk : sourceChunks)
         {
-            chunk.sourceID = nextRank();
-            sinkChunks[chunk.sourceID].push_back(std::move(chunk));
+            auto rank = nextRank();
+            sinkChunks[rank].push_back(std::move(chunk));
         }
         return sinkChunks;
     }
@@ -302,7 +326,9 @@ namespace chunk_assignment
     Assignment RoundRobinOfSourceRanks::assign(
         PartialAssignment partialAssignment,
         RankMeta const &, // ignored parameter
-        RankMeta const &out)
+        RankMeta const &out,
+        size_t /* my_rank */,
+        size_t /* num_ranks */)
     {
         std::map<unsigned int, std::deque<WrittenChunkInfo>>
             sortSourceChunksBySourceRank;
@@ -337,21 +363,21 @@ namespace chunk_assignment
         return std::unique_ptr<Strategy>(new RoundRobinOfSourceRanks);
     }
 
-    Blocks::Blocks(unsigned int mpi_rank_in, unsigned int mpi_size_in)
-        : mpi_size(mpi_size_in), mpi_rank(mpi_rank_in)
-    {}
-
-    Assignment
-    Blocks::assign(PartialAssignment pa, RankMeta const &, RankMeta const &)
+    Assignment Blocks::assign(
+        PartialAssignment pa,
+        RankMeta const &,
+        RankMeta const &,
+        size_t my_rank,
+        size_t num_ranks)
     {
         auto [notAssigned, res] = std::move(pa);
         auto [myChunksFrom, myChunksTo] =
             OneDimensionalBlockSlicer::n_th_block_inside(
-                notAssigned.size(), mpi_rank, mpi_size);
+                notAssigned.size(), my_rank, num_ranks);
         std::transform(
             notAssigned.begin() + myChunksFrom,
             notAssigned.begin() + (myChunksFrom + myChunksTo),
-            std::back_inserter(res[mpi_rank]),
+            std::back_inserter(res[my_rank]),
             [](WrittenChunkInfo &chunk) { return std::move(chunk); });
         return res;
     }
@@ -361,13 +387,12 @@ namespace chunk_assignment
         return std::unique_ptr<Strategy>(new Blocks(*this));
     }
 
-    BlocksOfSourceRanks::BlocksOfSourceRanks(
-        unsigned int mpi_rank_in, unsigned int mpi_size_in)
-        : mpi_size(mpi_size_in), mpi_rank(mpi_rank_in)
-    {}
-
     Assignment BlocksOfSourceRanks::assign(
-        PartialAssignment pa, RankMeta const &, RankMeta const &)
+        PartialAssignment pa,
+        RankMeta const &,
+        RankMeta const &,
+        size_t my_rank,
+        size_t num_ranks)
     {
         auto [notAssigned, res] = std::move(pa);
         std::map<unsigned int, std::deque<WrittenChunkInfo>>
@@ -380,7 +405,7 @@ namespace chunk_assignment
         notAssigned.clear();
         auto [myChunksFrom, myChunksTo] =
             OneDimensionalBlockSlicer::n_th_block_inside(
-                sortSourceChunksBySourceRank.size(), mpi_rank, mpi_size);
+                sortSourceChunksBySourceRank.size(), my_rank, num_ranks);
         auto it = sortSourceChunksBySourceRank.begin();
         for (size_t i = 0; i < myChunksFrom; ++i)
         {
@@ -391,7 +416,7 @@ namespace chunk_assignment
             std::transform(
                 it->second.begin(),
                 it->second.end(),
-                std::back_inserter(res[mpi_rank]),
+                std::back_inserter(res[my_rank]),
                 [](WrittenChunkInfo &chunk) { return std::move(chunk); });
         }
         return res;
@@ -407,7 +432,11 @@ namespace chunk_assignment
     {}
 
     PartialAssignment ByHostname::assign(
-        PartialAssignment res, RankMeta const &in, RankMeta const &out)
+        PartialAssignment res,
+        RankMeta const &in,
+        RankMeta const &out,
+        size_t my_rank,
+        size_t /* num_ranks */)
     {
         // collect chunks by hostname
         std::map<std::string, ChunkTable> chunkGroups;
@@ -460,16 +489,25 @@ namespace chunk_assignment
             else
             {
                 RankMeta ranksOnTargetNode;
-                for (unsigned int rank : it->second)
+                size_t local_rank = 0;
+                size_t counter = 0;
+                for (auto rank : it->second)
                 {
                     ranksOnTargetNode[rank] = hostname;
+                    if (rank == my_rank)
+                    {
+                        local_rank = counter;
+                    }
+                    ++counter;
                 }
                 Assignment swapped;
                 swapped.swap(sinkChunks);
                 sinkChunks = m_withinNode->assign(
                     PartialAssignment(chunkGroup.second, std::move(swapped)),
                     in,
-                    ranksOnTargetNode);
+                    ranksOnTargetNode,
+                    local_rank,
+                    it->second.size());
             }
         }
         return res;
@@ -482,14 +520,9 @@ namespace chunk_assignment
     }
 
     ByCuboidSlice::ByCuboidSlice(
-        std::unique_ptr<BlockSlicer> blockSlicer_in,
-        Extent totalExtent_in,
-        unsigned int mpi_rank_in,
-        unsigned int mpi_size_in)
+        std::unique_ptr<BlockSlicer> blockSlicer_in, Extent totalExtent_in)
         : blockSlicer(std::move(blockSlicer_in))
         , totalExtent(std::move(totalExtent_in))
-        , mpi_rank(mpi_rank_in)
-        , mpi_size(mpi_size_in)
     {}
 
     namespace
@@ -628,14 +661,18 @@ namespace chunk_assignment
     } // namespace
 
     Assignment ByCuboidSlice::assign(
-        PartialAssignment res, RankMeta const &, RankMeta const &)
+        PartialAssignment res,
+        RankMeta const &,
+        RankMeta const &,
+        size_t my_rank,
+        size_t num_ranks)
     {
         ChunkTable &sourceSide = res.notAssigned;
         Assignment &sinkSide = res.assigned;
         Offset myOffset;
         Extent myExtent;
         std::tie(myOffset, myExtent) =
-            blockSlicer->sliceBlock(totalExtent, mpi_size, mpi_rank);
+            blockSlicer->sliceBlock(totalExtent, num_ranks, my_rank);
 
         for (auto &chunk : sourceSide)
         {
@@ -647,7 +684,7 @@ namespace chunk_assignment
                     goto outer_loop;
                 }
             }
-            sinkSide[mpi_rank].push_back(std::move(chunk));
+            sinkSide[my_rank].push_back(std::move(chunk));
         outer_loop:;
         }
 
@@ -656,8 +693,8 @@ namespace chunk_assignment
 
     std::unique_ptr<Strategy> ByCuboidSlice::clone() const
     {
-        return std::unique_ptr<Strategy>(new ByCuboidSlice(
-            blockSlicer->clone(), totalExtent, mpi_rank, mpi_size));
+        return std::unique_ptr<Strategy>(
+            new ByCuboidSlice(blockSlicer->clone(), totalExtent));
     }
 
     BinPacking::BinPacking(size_t splitAlongDimension_in)
@@ -665,7 +702,11 @@ namespace chunk_assignment
     {}
 
     Assignment BinPacking::assign(
-        PartialAssignment res, RankMeta const &, RankMeta const &sinkRanks)
+        PartialAssignment res,
+        RankMeta const &,
+        RankMeta const &sinkRanks,
+        size_t /* my_rank */,
+        size_t /* num_ranks */)
     {
         ChunkTable &sourceChunks = res.notAssigned;
         Assignment &sinkChunks = res.assigned;
@@ -768,7 +809,11 @@ namespace chunk_assignment
     FailingStrategy::FailingStrategy() = default;
 
     Assignment FailingStrategy::assign(
-        PartialAssignment assignment, RankMeta const &, RankMeta const &)
+        PartialAssignment assignment,
+        RankMeta const &,
+        RankMeta const &,
+        size_t /* my_rank */,
+        size_t /* num_ranks */)
     {
         if (assignment.notAssigned.empty())
         {
@@ -789,7 +834,11 @@ namespace chunk_assignment
     DiscardingStrategy::DiscardingStrategy() = default;
 
     Assignment DiscardingStrategy::assign(
-        PartialAssignment assignment, RankMeta const &, RankMeta const &)
+        PartialAssignment assignment,
+        RankMeta const &,
+        RankMeta const &,
+        size_t /* my_rank */,
+        size_t /* num_ranks */)
     {
         return assignment.assigned;
     }
diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp
index 55d5c824e5..35cf798e7a 100644
--- a/src/binding/python/ChunkInfo.cpp
+++ b/src/binding/python/ChunkInfo.cpp
@@ -127,10 +127,12 @@ struct PyStrategy
     chunk_assignment::Assignment assign(
         chunk_assignment::PartialAssignment assignment,
         chunk_assignment::RankMeta const &in,
-        chunk_assignment::RankMeta const &out) override
+        chunk_assignment::RankMeta const &out,
+        size_t my_rank,
+        size_t num_ranks) override
     {
         return call_virtual<chunk_assignment::Assignment>(
-            "assign", std::move(assignment), in, out);
+            "assign", std::move(assignment), in, out, my_rank, num_ranks);
     }
 
     [[nodiscard]] std::unique_ptr<Strategy> clone() const override
@@ -146,10 +148,12 @@ struct PyPartialStrategy
     chunk_assignment::PartialAssignment assign(
         chunk_assignment::PartialAssignment assignment,
         chunk_assignment::RankMeta const &in,
-        chunk_assignment::RankMeta const &out) override
+        chunk_assignment::RankMeta const &out,
+        size_t my_rank,
+        size_t num_ranks) override
     {
         return call_virtual<chunk_assignment::PartialAssignment>(
-            "assign", std::move(assignment), in, out);
+            "assign", std::move(assignment), in, out, my_rank, num_ranks);
     }
 
     [[nodiscard]] std::unique_ptr<PartialStrategy> clone() const override
@@ -259,39 +263,59 @@ void init_Chunk(py::module &m)
         .def(py::init<>())
         .def(
             "assign",
-            py::overload_cast<ChunkTable, RankMeta const &, RankMeta const &>(
-                &PartialStrategy::assign),
+            py::overload_cast<
+                ChunkTable,
+                RankMeta const &,
+                RankMeta const &,
+                size_t,
+                size_t>(&PartialStrategy::assign),
             py::arg("chunk_table"),
             py::arg("rank_meta_in") = RankMeta(),
-            py::arg("rank_meta_out") = RankMeta())
+            py::arg("rank_meta_out") = RankMeta(),
+            py::arg("my_rank") = 0,
+            py::arg("num_ranks") = 1)
         .def(
             "assign",
             py::overload_cast<
                 PartialAssignment,
                 RankMeta const &,
-                RankMeta const &>(&PartialStrategy::assign),
+                RankMeta const &,
+                size_t,
+                size_t>(&PartialStrategy::assign),
             py::arg("partial_assignment"),
             py::arg("rank_meta_in") = RankMeta(),
-            py::arg("rank_meta_out") = RankMeta());
+            py::arg("rank_meta_out") = RankMeta(),
+            py::arg("my_rank") = 0,
+            py::arg("num_ranks") = 1);
 
     py::class_<Strategy, PyStrategy>(m, "Strategy")
         .def(py::init<>())
         .def(
             "assign",
-            py::overload_cast<ChunkTable, RankMeta const &, RankMeta const &>(
-                &Strategy::assign),
+            py::overload_cast<
+                ChunkTable,
+                RankMeta const &,
+                RankMeta const &,
+                size_t,
+                size_t>(&Strategy::assign),
             py::arg("chunk_table"),
             py::arg("rank_meta_in") = RankMeta(),
-            py::arg("rank_meta_out") = RankMeta())
+            py::arg("rank_meta_out") = RankMeta(),
+            py::arg("my_rank") = 0,
+            py::arg("num_ranks") = 1)
         .def(
             "assign",
             py::overload_cast<
                 PartialAssignment,
                 RankMeta const &,
-                RankMeta const &>(&Strategy::assign),
+                RankMeta const &,
+                size_t,
+                size_t>(&Strategy::assign),
             py::arg("partial_assignment"),
             py::arg("rank_meta_in") = RankMeta(),
-            py::arg("rank_meta_out") = RankMeta());
+            py::arg("rank_meta_out") = RankMeta(),
+            py::arg("my_rank") = 0,
+            py::arg("num_ranks") = 1);
 
     py::class_<FromPartialStrategy, Strategy>(m, "FromPartialStrategy")
         .def(
@@ -304,16 +328,9 @@ void init_Chunk(py::module &m)
     py::class_<RoundRobin, Strategy>(m, "RoundRobin").def(py::init<>());
     py::class_<RoundRobinOfSourceRanks, Strategy>(m, "RoundRobinOfSourceRanks")
         .def(py::init<>());
-    py::class_<Blocks, Strategy>(m, "Blocks")
-        .def(
-            py::init<unsigned int, unsigned int>(),
-            py::arg("mpi_rank"),
-            py::arg("mpi_size"));
+    py::class_<Blocks, Strategy>(m, "Blocks").def(py::init<>());
     py::class_<BlocksOfSourceRanks, Strategy>(m, "BlocksOfSourceRanks")
-        .def(
-            py::init<unsigned int, unsigned int>(),
-            py::arg("mpi_rank"),
-            py::arg("mpi_size"));
+        .def(py::init<>());
 
     py::class_<ByHostname, PartialStrategy>(m, "ByHostname")
         .def(
@@ -331,20 +348,12 @@ void init_Chunk(py::module &m)
 
     py::class_<ByCuboidSlice, Strategy>(m, "ByCuboidSlice")
         .def(
-            py::init([](BlockSlicer const &blockSlicer,
-                        Extent totalExtent,
-                        unsigned int mpi_rank,
-                        unsigned int mpi_size) {
+            py::init([](BlockSlicer const &blockSlicer, Extent totalExtent) {
                 return ByCuboidSlice(
-                    blockSlicer.clone(),
-                    std::move(totalExtent),
-                    mpi_rank,
-                    mpi_size);
+                    blockSlicer.clone(), std::move(totalExtent));
             }),
             py::arg("block_slicer"),
-            py::arg("total_extent"),
-            py::arg("mpi_rank"),
-            py::arg("mpi_size"));
+            py::arg("total_extent"));
 
     py::class_<BinPacking, Strategy>(m, "BinPacking")
         .def(py::init<>())
diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py
index 40e616e3bd..ad9c31c48e 100644
--- a/src/binding/python/openpmd_api/pipe/__main__.py
+++ b/src/binding/python/openpmd_api/pipe/__main__.py
@@ -116,38 +116,34 @@ def __init__(self, source, dynamicView, offset, extent):
 
 # Example how to implement a simple partial strategy in Python
 class LoadOne(io.PartialStrategy):
-    def __init__(self, rank):
+    def __init__(self):
         super().__init__()
-        self.rank = rank
 
-    def assign(self, assignment, *_):
+    def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks):
         element = assignment.not_assigned.pop()
-        if self.rank not in assignment.assigned:
-            assignment.assigned[self.rank] = [element]
+        if my_rank not in assignment.assigned:
+            assignment.assigned[my_rank] = [element]
         else:
-            assignment.assigned[self.rank].append(element)
+            assignment.assigned[my_rank].append(element)
         return assignment
 
 
 # Example how to implement a simple strategy in Python
 class LoadAll(io.Strategy):
 
-    def __init__(self, rank):
+    def __init__(self):
         super().__init__()
-        self.rank = rank
 
-    def assign(self, assignment, *_):
+    def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks):
         res = assignment.assigned
-        if self.rank not in res:
-            res[self.rank] = assignment.not_assigned
+        if my_rank not in res:
+            res[my_rank] = assignment.not_assigned
         else:
-            res[self.rank].extend(assignment.not_assigned)
+            res[my_rank].extend(assignment.not_assigned)
         return res
 
 
 def distribution_strategy(dataset_extent,
-                          mpi_rank,
-                          mpi_size,
                           strategy_identifier=None):
     if strategy_identifier is None or not strategy_identifier:
         if 'OPENPMD_CHUNK_DISTRIBUTION' in os.environ:
@@ -158,24 +154,19 @@ def distribution_strategy(dataset_extent,
     match = re.search('hostname_(.*)_(.*)', strategy_identifier)
     if match is not None:
         inside_node = distribution_strategy(dataset_extent,
-                                            mpi_rank,
-                                            mpi_size,
                                             strategy_identifier=match.group(1))
         second_phase = distribution_strategy(
             dataset_extent,
-            mpi_rank,
-            mpi_size,
             strategy_identifier=match.group(2))
         return io.FromPartialStrategy(io.ByHostname(inside_node), second_phase)
     elif strategy_identifier == 'all':
-        return io.FromPartialStrategy(LoadOne(mpi_rank), LoadAll(mpi_rank))
+        return io.FromPartialStrategy(LoadOne(), LoadAll())
     elif strategy_identifier == 'roundrobin':
         return io.RoundRobin()
     elif strategy_identifier == 'binpacking':
         return io.BinPacking()
     elif strategy_identifier == 'slicedataset':
-        return io.ByCuboidSlice(io.OneDimensionalBlockSlicer(), dataset_extent,
-                                mpi_rank, mpi_size)
+        return io.ByCuboidSlice(io.OneDimensionalBlockSlicer(), dataset_extent)
     elif strategy_identifier == 'fail':
         return io.FailingStrategy()
     else:
@@ -319,10 +310,10 @@ def __copy(self, src, dest, current_path="/data/"):
                 dest.make_constant(src.get_attribute("value"))
             else:
                 chunk_table = src.available_chunks()
-                strategy = distribution_strategy(shape, self.comm.rank,
-                                                 self.comm.size)
+                strategy = distribution_strategy(shape)
                 my_chunks = strategy.assign(chunk_table, self.inranks,
-                                            self.outranks)
+                                            self.outranks,
+                                            self.comm.rank, self.comm.size)
                 for chunk in my_chunks[
                         self.comm.rank] if self.comm.rank in my_chunks else []:
                     if debug:
diff --git a/test/CoreTest.cpp b/test/CoreTest.cpp
index b9820d8222..b1e82b7498 100644
--- a/test/CoreTest.cpp
+++ b/test/CoreTest.cpp
@@ -101,7 +101,7 @@ void print(RankMeta const &meta, Assignment const &table)
                   << std::endl;
         for (auto const &chunk : chunkList)
         {
-            std::cout << "\t[Offset: ";
+            std::cout << "\t[From " << chunk.sourceID << "\tOffset: ";
             for (auto offset : chunk.offset)
             {
                 std::cout << offset << ", ";
@@ -127,8 +127,8 @@ TEST_CASE("chunk_assignment", "[core]")
     FromPartialStrategy fullStrategy(
         std::make_unique<ByHostname>(std::move(byHostname)),
         std::make_unique<BinPacking>());
-    Assignment res =
-        fullStrategy.assign(params.table, params.metaSource, params.metaSink);
+    Assignment res = fullStrategy.assign(
+        params.table, params.metaSource, params.metaSink, 0, 2);
     std::cout << "\nRESULTS:" << std::endl;
     test_chunk_assignment::print(params.metaSink, res);
 }
diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp
index 3d495a8b95..c8f60e14fe 100644
--- a/test/ParallelIOTest.cpp
+++ b/test/ParallelIOTest.cpp
@@ -2389,7 +2389,7 @@ void adios2_chunk_distribution()
          */
         RoundRobin roundRobinStrategy;
         auto roundRobinAssignment = roundRobinStrategy.assign(
-            chunkTable, rankMetaIn, readingRanksHostnames);
+            chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size);
         printAssignment(
             "ROUND ROBIN", roundRobinAssignment, readingRanksHostnames);
 
@@ -2405,8 +2405,8 @@ void adios2_chunk_distribution()
          */
         ByHostname byHostname(
             std::make_unique<BinPacking>(/* splitAlongDimension = */ 1));
-        auto byHostnamePartialAssignment =
-            byHostname.assign(chunkTable, rankMetaIn, readingRanksHostnames);
+        auto byHostnamePartialAssignment = byHostname.assign(
+            chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size);
         printAssignment(
             "HOSTNAME, ASSIGNED",
             byHostnamePartialAssignment.assigned,
@@ -2422,8 +2422,8 @@ void adios2_chunk_distribution()
          * sink rank. Needed in some domains.
          */
         ByHostname byHostname2(std::make_unique<RoundRobinOfSourceRanks>());
-        auto byHostnamePartialAssignment2 =
-            byHostname2.assign(chunkTable, rankMetaIn, readingRanksHostnames);
+        auto byHostnamePartialAssignment2 = byHostname2.assign(
+            chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size);
         printAssignment(
             "HOSTNAME2, ASSIGNED",
             byHostnamePartialAssignment2.assigned,
@@ -2446,7 +2446,7 @@ void adios2_chunk_distribution()
             std::make_unique<ByHostname>(std::move(byHostname)),
             std::make_unique<BinPacking>(/* splitAlongDimension = */ 1));
         auto fromPartialAssignment = fromPartialStrategy.assign(
-            chunkTable, rankMetaIn, readingRanksHostnames);
+            chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size);
         printAssignment(
             "HOSTNAME WITH SECOND PASS",
             fromPartialAssignment,
@@ -2465,23 +2465,20 @@ void adios2_chunk_distribution()
          * in others such as this one, it's an unneeded overhead.)
          */
         ByCuboidSlice cuboidSliceStrategy(
-            std::make_unique<OneDimensionalBlockSlicer>(1),
-            E_x.getExtent(),
-            mpi_rank,
-            mpi_size);
+            std::make_unique<OneDimensionalBlockSlicer>(1), E_x.getExtent());
         auto cuboidSliceAssignment = cuboidSliceStrategy.assign(
-            chunkTable, rankMetaIn, readingRanksHostnames);
+            chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size);
         printAssignment(
             "CUBOID SLICE", cuboidSliceAssignment, readingRanksHostnames);
 
-        Blocks blocksStrategy(mpi_rank, mpi_size);
+        Blocks blocksStrategy;
         auto blocksAssignment = blocksStrategy.assign(
-            chunkTable, rankMetaIn, readingRanksHostnames);
+            chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size);
         printAssignment("BLOCKS", blocksAssignment, readingRanksHostnames);
 
-        BlocksOfSourceRanks blocksOfSourceRanksStrategy(mpi_rank, mpi_size);
+        BlocksOfSourceRanks blocksOfSourceRanksStrategy;
         auto blocksOfSourceRanksAssignment = blocksOfSourceRanksStrategy.assign(
-            chunkTable, rankMetaIn, readingRanksHostnames);
+            chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size);
         printAssignment(
             "BLOCKS OF SOURCE RANKS",
             blocksOfSourceRanksAssignment,

From 337d21befcc504190d49951d68f9d2e43b5a0673 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Tue, 15 Jul 2025 12:01:14 +0200
Subject: [PATCH 14/27] Fix the Python trampoline logic

---
 src/binding/python/ChunkInfo.cpp | 37 ++++++++++----------------------
 1 file changed, 11 insertions(+), 26 deletions(-)

diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp
index 35cf798e7a..ed31202c82 100644
--- a/src/binding/python/ChunkInfo.cpp
+++ b/src/binding/python/ChunkInfo.cpp
@@ -25,6 +25,7 @@
 #include "openPMD/binding/python/Common.hpp"
 
 #include <exception>
+#include <pybind11/pytypes.h>
 #include <string>
 #include <utility> // std::move
 
@@ -46,15 +47,6 @@
 template <typename ChildCpp, typename ChildPy>
 struct ClonableTrampoline
 {
-    struct OriginalInstance
-    {
-        py::handle pythonObject;
-
-        ~OriginalInstance()
-        {
-            pythonObject.dec_ref();
-        }
-    };
     /*
      * If the shared pointer is empty, this object is the original object owned
      * by Python and the Python handle can be acquired by:
@@ -64,16 +56,13 @@ struct ClonableTrampoline
      * By only storing this member in copied instances, but not in the original
      * instance, we avoid a memory cycle and ensure clean destruction.
      */
-    std::shared_ptr<OriginalInstance> m_originalInstance;
+    std::shared_ptr<py::object> m_originalInstance;
 
-    [[nodiscard]] py::handle get_python_handle() const
+    [[nodiscard]] py::object get_python_handle() const
     {
         if (m_originalInstance)
         {
-            // std::cout << "Refcount "
-            //           << m_originalInstance->pythonObject.ref_count()
-            //           << std::endl;
-            return m_originalInstance->pythonObject;
+            return *m_originalInstance;
         }
         else
         {
@@ -86,7 +75,7 @@ struct ClonableTrampoline
     Res call_virtual(std::string const &nameOfPythonMethod, Args &&...args)
     {
         py::gil_scoped_acquire gil;
-        auto ptr = get_python_handle().template cast<ChildCpp *>();
+        auto ptr = get_python_handle().template cast<ChildPy *>();
         auto fun = py::get_override(ptr, nameOfPythonMethod.c_str());
         if (!fun)
         {
@@ -107,14 +96,9 @@ struct ClonableTrampoline
         }
         else
         {
-            OriginalInstance oi;
-            oi.pythonObject = py::cast(self);
-            // no idea why we would need this twice, but we do
-            oi.pythonObject.inc_ref();
-            oi.pythonObject.inc_ref();
             auto res = std::make_unique<ChildPy>(*self);
             res->m_originalInstance =
-                std::make_shared<OriginalInstance>(std::move(oi));
+                std::make_shared<py::object>(py::cast(self));
             return res;
         }
     }
@@ -259,8 +243,7 @@ void init_Chunk(py::module &m)
 
     py::bind_map<RankMeta>(m, "RankMeta");
 
-    py::class_<PartialStrategy, PyPartialStrategy>(m, "PartialStrategy")
-        .def(py::init<>())
+    py::class_<PartialStrategy>(m, "PartialStrategyCpp")
         .def(
             "assign",
             py::overload_cast<
@@ -287,9 +270,10 @@ void init_Chunk(py::module &m)
             py::arg("rank_meta_out") = RankMeta(),
             py::arg("my_rank") = 0,
             py::arg("num_ranks") = 1);
+    py::class_<PyPartialStrategy, PartialStrategy>(m, "PartialStrategy")
+        .def(py::init<>());
 
-    py::class_<Strategy, PyStrategy>(m, "Strategy")
-        .def(py::init<>())
+    py::class_<Strategy>(m, "StrategyCpp")
         .def(
             "assign",
             py::overload_cast<
@@ -316,6 +300,7 @@ void init_Chunk(py::module &m)
             py::arg("rank_meta_out") = RankMeta(),
             py::arg("my_rank") = 0,
             py::arg("num_ranks") = 1);
+    py::class_<PyStrategy, Strategy>(m, "Strategy").def(py::init<>());
 
     py::class_<FromPartialStrategy, Strategy>(m, "FromPartialStrategy")
         .def(

From 14b2559c5bfd3b8e16b180ca03367f4a3d2d9458 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Fri, 14 Jul 2023 14:55:10 +0200
Subject: [PATCH 15/27] Use discard strategy as second run, only consider my
 own hostname

---
 src/binding/python/openpmd_api/pipe/__main__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py
index ad9c31c48e..0f3348918b 100644
--- a/src/binding/python/openpmd_api/pipe/__main__.py
+++ b/src/binding/python/openpmd_api/pipe/__main__.py
@@ -169,6 +169,8 @@ def distribution_strategy(dataset_extent,
         return io.ByCuboidSlice(io.OneDimensionalBlockSlicer(), dataset_extent)
     elif strategy_identifier == 'fail':
         return io.FailingStrategy()
+    elif strategy_identifier == 'discard':
+        return io.DiscardingStrategy()
     else:
         raise RuntimeError("Unknown distribution strategy: " +
                            strategy_identifier)

From 941fe8191f1cd822b12710a1f1b49d33def75fa2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Tue, 7 Mar 2023 16:47:47 +0100
Subject: [PATCH 16/27] IncreaseGranularity strategy

supports different granularities at write and read sides
---
 .../python/openpmd_api/pipe/__main__.py       | 125 +++++++++++++++++-
 1 file changed, 123 insertions(+), 2 deletions(-)

diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py
index 0f3348918b..0ec5c91599 100644
--- a/src/binding/python/openpmd_api/pipe/__main__.py
+++ b/src/binding/python/openpmd_api/pipe/__main__.py
@@ -127,8 +127,121 @@ def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks):
             assignment.assigned[my_rank].append(element)
         return assignment
 
+class IncreaseGranularity(io.PartialStrategy):
+    def __init__(
+        self,
+        granularity_in,
+        granularity_out,
+        inner_distribution,
+    ):
+        super().__init__()
+        self.inner_distribution = inner_distribution
+        self.granularity_in = granularity_in
+        self.granularity_out = granularity_out
+
+    def assign(self, assignment, in_ranks, out_ranks, my_rank, num_ranks):
+        if "in_ranks_inner" in dir(self):
+            return self.inner_distribution.assign(
+                assignment, self.in_ranks_inner, self.out_ranks_inner
+            )
+
+        def hosts_in_order(rank_assignment):
+            already_seen = set()
+            res = []
+            for (_, hostname) in rank_assignment.items():
+                if hostname not in already_seen:
+                    already_seen.add(hostname)
+                    res.append(hostname)
+            return res
+
+        in_hosts_in_order = hosts_in_order(in_ranks)
+        out_hosts_in_order = hosts_in_order(out_ranks)
+
+        # Creates names "0", "1", "2", ... for the meta hosts and maps the real
+        # host names to the meta host names
+        def hostname_to_hostgroup(ordered_hosts, granularity):
+            res = {}  # real host -> host group
+            current_meta_host = 0
+            granularity_counter = 0
+            for host in ordered_hosts:
+                res[host] = str(current_meta_host)
+                granularity_counter += 1
+                if granularity_counter >= granularity:
+                    granularity_counter = 0
+                    current_meta_host += 1
+            return res
+
+        in_hostname_to_hostgroup = hostname_to_hostgroup(
+            in_hosts_in_order, self.granularity_in
+        )
+        out_hostname_to_hostgroup = hostname_to_hostgroup(
+            out_hosts_in_order, self.granularity_out
+        )
+
+        # Creates `in_ranks` and `out_ranks` for the inner call, based on the
+        # meta hosts created above
+        def inner_rank_assignment(outer_rank_assignment, hostname_to_hostgroup):
+            res = {}
+            for (rank, hostname) in outer_rank_assignment.items():
+                res[rank] = hostname_to_hostgroup[hostname]
+            return res
+
+        self.in_ranks_inner = inner_rank_assignment(in_ranks, in_hostname_to_hostgroup)
+        self.out_ranks_inner = inner_rank_assignment(
+            out_ranks, out_hostname_to_hostgroup
+        )
+
+        # # we only care about the local host (why tho?)
+        # local_host = self.out_ranks_inner[my_rank]
+        # # restrict out_ranks_inner to those ranks
+        # # that run on the current meta host
+        # self.out_ranks_inner = {
+        #     rank: host
+        #     for rank, host in self.out_ranks_inner.items()
+        #     if host == local_host
+        # }
+
+        return self.inner_distribution.assign(
+            assignment, self.in_ranks_inner, self.out_ranks_inner, my_rank, num_ranks
+        )
+
+class MergingStrategy(io.Strategy):
+    def __init__(self, inner_strategy):
+        super().__init__()
+        self.inner_strategy = inner_strategy
+
+    def assign(self, assignment, in_ranks, out_ranks):
+        res = self.inner_strategy.assign(assignment, in_ranks, out_ranks)
+        for out_rank, assignment in res.items():
+            merged = assignment.merge_chunks_from_same_sourceID()
+            assignment.clear()
+            for in_rank, chunks in merged.items():
+                for chunk in chunks:
+                    assignment.append(
+                        io.WrittenChunkInfo(chunk.offset, chunk.extent, in_rank)
+                    )
+        return res
+
 
-# Example how to implement a simple strategy in Python
+# strategy = IncreaseGranularity(2, 1)
+# assignment = [
+#     io.WrittenChunkInfo([0], [1], 0),
+#     io.WrittenChunkInfo([1], [1], 1),
+#     io.WrittenChunkInfo([2], [1], 2),
+#     io.WrittenChunkInfo([3], [1], 3),
+# ]
+# in_ranks = {0: "host0", 1: "host1", 2: "host3", 3: "host4"}
+# out_ranks = {0: "host2", 1: "host5"}
+# res = strategy.assign(assignment, in_ranks, out_ranks)
+# print(f"NOT ASSIGNED: {len(res.not_assigned)} chunks")
+# print("ASSIGNED:")
+# for rank, chunks in res.assigned.items():
+#     print(f"\tRANK {rank}:", end='')
+#     for chunk in chunks:
+#         print(f" [{chunk.offset}-{chunk.extent}]", end='')
+#     print()
+
+#Example how to implement a simple strategy in Python
 class LoadAll(io.Strategy):
 
     def __init__(self):
@@ -159,8 +272,15 @@ def distribution_strategy(dataset_extent,
             dataset_extent,
             strategy_identifier=match.group(2))
         return io.FromPartialStrategy(io.ByHostname(inside_node), second_phase)
+    elif strategy_identifier == 'fan_in':
+        granularity = os.environ['OPENPMD_FAN_IN']
+        granularity = int(granularity)
+        return IncreaseGranularity(
+            granularity, 1,
+            io.FromPartialStrategy(io.ByHostname(io.RoundRobin()),
+                                   io.DiscardingStrategy()))
     elif strategy_identifier == 'all':
-        return io.FromPartialStrategy(LoadOne(), LoadAll())
+        return io.FromPartialStrategy(IncreaseGranularity(5), LoadAll())
     elif strategy_identifier == 'roundrobin':
         return io.RoundRobin()
     elif strategy_identifier == 'binpacking':
@@ -312,6 +432,7 @@ def __copy(self, src, dest, current_path="/data/"):
                 dest.make_constant(src.get_attribute("value"))
             else:
                 chunk_table = src.available_chunks()
+                # todo buffer the strategy
                 strategy = distribution_strategy(shape)
                 my_chunks = strategy.assign(chunk_table, self.inranks,
                                             self.outranks,

From 1e6889927a865e7b318611144aeb977adce4a8a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Wed, 7 May 2025 17:00:03 +0200
Subject: [PATCH 17/27] Add blocksofsourcerank to pipe script

---
 src/binding/python/openpmd_api/pipe/__main__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py
index 0ec5c91599..79e3c730ec 100644
--- a/src/binding/python/openpmd_api/pipe/__main__.py
+++ b/src/binding/python/openpmd_api/pipe/__main__.py
@@ -291,6 +291,8 @@ def distribution_strategy(dataset_extent,
         return io.FailingStrategy()
     elif strategy_identifier == 'discard':
         return io.DiscardingStrategy()
+    elif strategy_identifier == 'blocksofsourceranks':
+        return io.BlocksOfSourceRanks()
     else:
         raise RuntimeError("Unknown distribution strategy: " +
                            strategy_identifier)

From 2511f6c1ef4aecefaf0474c6918c654c2bdc16ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Tue, 15 Jul 2025 14:33:16 +0200
Subject: [PATCH 18/27] CI fixes

---
 include/openPMD/ChunkInfo.hpp                 | 20 ++++++++++++++++++
 .../python/openpmd_api/pipe/__main__.py       | 21 ++++++++++++-------
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp
index 2994a9e4e7..96733c85d2 100644
--- a/include/openPMD/ChunkInfo.hpp
+++ b/include/openPMD/ChunkInfo.hpp
@@ -133,6 +133,16 @@ namespace chunk_assignment
          *        Merge the unassigned chunks into the partially assigned table.
          * @param in Meta information on writing processes, e.g. hostnames.
          * @param out Meta information on reading processes, e.g. hostnames.
+         * @param my_rank Rank identifier for the current process. Will be
+         *        considered by some distribution strategies that may be called
+         *        for only a subselection of the data space (e.g. for
+         *        distributing data within processes on the same compute node
+         *        in a cluster).
+         * @param num_ranks Number of processes among which chunks are to be
+         *        distributed. Will be considered by some distribution
+         *        strategies that may be called for only a subselection of the
+         *        data space (e.g. for distributing data within processes on the
+         *        same compute node in a cluster).
          * @return ChunkTable A table that assigns chunks to reading processes.
          */
         virtual Assignment assign(
@@ -174,6 +184,16 @@ namespace chunk_assignment
          *        Merge the unassigned chunks into the partially assigned table.
          * @param in Meta information on writing processes, e.g. hostnames.
          * @param out Meta information on reading processes, e.g. hostnames.
+         * @param my_rank Rank identifier for the current process. Will be
+         *        considered by some distribution strategies that may be called
+         *        for only a subselection of the data space (e.g. for
+         *        distributing data within processes on the same compute node
+         *        in a cluster).
+         * @param num_ranks Number of processes among which chunks are to be
+         *        distributed. Will be considered by some distribution
+         *        strategies that may be called for only a subselection of the
+         *        data space (e.g. for distributing data within processes on the
+         *        same compute node in a cluster).
          * @return PartialAssignment Two chunktables, one of leftover chunks
          *         that were not assigned and one that assigns chunks to
          *         reading processes.
diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py
index 79e3c730ec..9eae8085d1 100644
--- a/src/binding/python/openpmd_api/pipe/__main__.py
+++ b/src/binding/python/openpmd_api/pipe/__main__.py
@@ -127,6 +127,7 @@ def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks):
             assignment.assigned[my_rank].append(element)
         return assignment
 
+
 class IncreaseGranularity(io.PartialStrategy):
     def __init__(
         self,
@@ -180,13 +181,15 @@ def hostname_to_hostgroup(ordered_hosts, granularity):
 
         # Creates `in_ranks` and `out_ranks` for the inner call, based on the
         # meta hosts created above
-        def inner_rank_assignment(outer_rank_assignment, hostname_to_hostgroup):
+        def inner_rank_assignment(
+                outer_rank_assignment, hostname_to_hostgroup):
             res = {}
             for (rank, hostname) in outer_rank_assignment.items():
                 res[rank] = hostname_to_hostgroup[hostname]
             return res
 
-        self.in_ranks_inner = inner_rank_assignment(in_ranks, in_hostname_to_hostgroup)
+        self.in_ranks_inner = \
+            inner_rank_assignment(in_ranks, in_hostname_to_hostgroup)
         self.out_ranks_inner = inner_rank_assignment(
             out_ranks, out_hostname_to_hostgroup
         )
@@ -202,9 +205,12 @@ def inner_rank_assignment(outer_rank_assignment, hostname_to_hostgroup):
         # }
 
         return self.inner_distribution.assign(
-            assignment, self.in_ranks_inner, self.out_ranks_inner, my_rank, num_ranks
+            assignment,
+            self.in_ranks_inner, self.out_ranks_inner,
+            my_rank, num_ranks
         )
 
+
 class MergingStrategy(io.Strategy):
     def __init__(self, inner_strategy):
         super().__init__()
@@ -218,7 +224,8 @@ def assign(self, assignment, in_ranks, out_ranks):
             for in_rank, chunks in merged.items():
                 for chunk in chunks:
                     assignment.append(
-                        io.WrittenChunkInfo(chunk.offset, chunk.extent, in_rank)
+                        io.WrittenChunkInfo(
+                            chunk.offset, chunk.extent, in_rank)
                     )
         return res
 
@@ -241,7 +248,7 @@ def assign(self, assignment, in_ranks, out_ranks):
 #         print(f" [{chunk.offset}-{chunk.extent}]", end='')
 #     print()
 
-#Example how to implement a simple strategy in Python
+# Example how to implement a simple strategy in Python
 class LoadAll(io.Strategy):
 
     def __init__(self):
@@ -266,8 +273,8 @@ def distribution_strategy(dataset_extent,
             strategy_identifier = 'hostname_binpacking_slicedataset'  # default
     match = re.search('hostname_(.*)_(.*)', strategy_identifier)
     if match is not None:
-        inside_node = distribution_strategy(dataset_extent,
-                                            strategy_identifier=match.group(1))
+        inside_node = distribution_strategy(
+            dataset_extent, strategy_identifier=match.group(1))
         second_phase = distribution_strategy(
             dataset_extent,
             strategy_identifier=match.group(2))

From 409b2dd74d09579b6ce0a11cc38b33fd1ba89168 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Tue, 15 Jul 2025 16:51:21 +0200
Subject: [PATCH 19/27] Move BlockSlicer somewhere else

---
 CMakeLists.txt                                |  2 +-
 include/openPMD/ChunkInfo.hpp                 |  9 ++--
 include/openPMD/Streaming.hpp                 |  9 ----
 include/openPMD/auxiliary/BlockSlicer.hpp     | 53 +++++++++++++++++++
 .../auxiliary/OneDimensionalBlockSlicer.hpp   | 44 +++++++++++++++
 include/openPMD/benchmark/mpi/BlockSlicer.hpp | 34 +++---------
 .../mpi/OneDimensionalBlockSlicer.hpp         | 24 +++------
 src/ChunkInfo.cpp                             |  9 ++--
 .../OneDimensionalBlockSlicer.cpp             |  8 ++-
 src/binding/python/ChunkInfo.cpp              | 10 ++--
 test/ParallelIOTest.cpp                       |  5 +-
 11 files changed, 131 insertions(+), 76 deletions(-)
 create mode 100644 include/openPMD/auxiliary/BlockSlicer.hpp
 create mode 100644 include/openPMD/auxiliary/OneDimensionalBlockSlicer.hpp
 rename src/{benchmark/mpi => auxiliary}/OneDimensionalBlockSlicer.cpp (95%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b6f01f3d2e..49b62a8bbd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -422,7 +422,7 @@ set(CORE_SOURCE
         src/backend/PatchRecord.cpp
         src/backend/PatchRecordComponent.cpp
         src/backend/Writable.cpp
-        src/benchmark/mpi/OneDimensionalBlockSlicer.cpp
+        src/auxiliary/OneDimensionalBlockSlicer.cpp
         src/helper/list_series.cpp
         src/snapshots/ContainerImpls.cpp
         src/snapshots/ContainerTraits.cpp
diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp
index 96733c85d2..b0b9e549f3 100644
--- a/include/openPMD/ChunkInfo.hpp
+++ b/include/openPMD/ChunkInfo.hpp
@@ -23,14 +23,14 @@
 #include "openPMD/config.hpp"
 
 #include "openPMD/Dataset.hpp" // Offset, Extent
-#include "openPMD/benchmark/mpi/BlockSlicer.hpp"
-#include <memory>
+#include "openPMD/auxiliary/BlockSlicer.hpp"
 
 #if openPMD_HAVE_MPI
 #include <mpi.h>
 #endif
 
 #include <map>
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -333,7 +333,8 @@ namespace chunk_assignment
     struct ByCuboidSlice : Strategy
     {
         ByCuboidSlice(
-            std::unique_ptr<BlockSlicer> blockSlicer, Extent totalExtent);
+            std::unique_ptr<auxiliary::BlockSlicer> blockSlicer,
+            Extent totalExtent);
 
         Assignment assign(
             PartialAssignment,
@@ -345,7 +346,7 @@ namespace chunk_assignment
         virtual std::unique_ptr<Strategy> clone() const override;
 
     private:
-        std::unique_ptr<BlockSlicer> blockSlicer;
+        std::unique_ptr<auxiliary::BlockSlicer> blockSlicer;
         Extent totalExtent;
     };
 
diff --git a/include/openPMD/Streaming.hpp b/include/openPMD/Streaming.hpp
index 8d1a283761..47e7c39681 100644
--- a/include/openPMD/Streaming.hpp
+++ b/include/openPMD/Streaming.hpp
@@ -1,14 +1,5 @@
 #pragma once
 
-#include <list>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "openPMD/Dataset.hpp"
-#include "openPMD/benchmark/mpi/BlockSlicer.hpp"
-#include <unordered_map>
-
 namespace openPMD
 {
 /**
diff --git a/include/openPMD/auxiliary/BlockSlicer.hpp b/include/openPMD/auxiliary/BlockSlicer.hpp
new file mode 100644
index 0000000000..a2569aa002
--- /dev/null
+++ b/include/openPMD/auxiliary/BlockSlicer.hpp
@@ -0,0 +1,53 @@
+/* Copyright 2018-2021 Franz Poeschel
+ *
+ * This file is part of openPMD-api.
+ *
+ * openPMD-api is free software: you can redistribute it and/or modify
+ * it under the terms of of either the GNU General Public License or
+ * the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * openPMD-api is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License and the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * and the GNU Lesser General Public License along with openPMD-api.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "openPMD/Dataset.hpp"
+
+#include <memory>
+
+namespace openPMD::auxiliary
+{
+/**
+ * Abstract class to associate a thread with its local cuboid in the total
+ * cuboid.
+ */
+class BlockSlicer
+{
+public:
+    /**
+     * Associate the current thread with its cuboid.
+     * @param totalExtent The total extent of the cuboid.
+     * @param size The number of threads to be used (not greater than MPI size).
+     * @param rank The MPI rank.
+     * @return A pair of the cuboid's offset and extent.
+     */
+    virtual std::pair<Offset, Extent>
+    sliceBlock(Extent &totalExtent, int size, int rank) = 0;
+
+    virtual std::unique_ptr<BlockSlicer> clone() const = 0;
+
+    /** This class will be derived from
+     */
+    virtual ~BlockSlicer() = default;
+};
+} // namespace openPMD::auxiliary
diff --git a/include/openPMD/auxiliary/OneDimensionalBlockSlicer.hpp b/include/openPMD/auxiliary/OneDimensionalBlockSlicer.hpp
new file mode 100644
index 0000000000..79fb68dcdf
--- /dev/null
+++ b/include/openPMD/auxiliary/OneDimensionalBlockSlicer.hpp
@@ -0,0 +1,44 @@
+/* Copyright 2018-2021 Franz Poeschel
+ *
+ * This file is part of openPMD-api.
+ *
+ * openPMD-api is free software: you can redistribute it and/or modify
+ * it under the terms of of either the GNU General Public License or
+ * the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * openPMD-api is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License and the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * and the GNU Lesser General Public License along with openPMD-api.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "openPMD/Dataset.hpp"
+#include "openPMD/auxiliary/BlockSlicer.hpp"
+
+namespace openPMD::auxiliary
+{
+class OneDimensionalBlockSlicer : public BlockSlicer
+{
+public:
+    Extent::value_type m_dim;
+
+    explicit OneDimensionalBlockSlicer(Extent::value_type dim = 0);
+
+    static std::pair<size_t, size_t>
+    n_th_block_inside(size_t length, size_t rank, size_t size);
+
+    std::pair<Offset, Extent>
+    sliceBlock(Extent &totalExtent, int size, int rank) override;
+
+    virtual std::unique_ptr<BlockSlicer> clone() const override;
+};
+} // namespace openPMD::auxiliary
diff --git a/include/openPMD/benchmark/mpi/BlockSlicer.hpp b/include/openPMD/benchmark/mpi/BlockSlicer.hpp
index a720793b41..0670bb91a2 100644
--- a/include/openPMD/benchmark/mpi/BlockSlicer.hpp
+++ b/include/openPMD/benchmark/mpi/BlockSlicer.hpp
@@ -1,4 +1,4 @@
-/* Copyright 2018-2021 Franz Poeschel
+/* Copyright 2025 Franz Poeschel
  *
  * This file is part of openPMD-api.
  *
@@ -19,35 +19,13 @@
  * If not, see <http://www.gnu.org/licenses/>.
  */
 
-#pragma once
+/* Legacy header for backward compatibility */
 
-#include "openPMD/Dataset.hpp"
+#pragma once
 
-#include <memory>
+#include "openPMD/auxiliary/BlockSlicer.hpp"
 
 namespace openPMD
 {
-/**
- * Abstract class to associate a thread with its local cuboid in the total
- * cuboid.
- */
-class BlockSlicer
-{
-public:
-    /**
-     * Associate the current thread with its cuboid.
-     * @param totalExtent The total extent of the cuboid.
-     * @param size The number of threads to be used (not greater than MPI size).
-     * @param rank The MPI rank.
-     * @return A pair of the cuboid's offset and extent.
-     */
-    virtual std::pair<Offset, Extent>
-    sliceBlock(Extent &totalExtent, int size, int rank) = 0;
-
-    virtual std::unique_ptr<BlockSlicer> clone() const = 0;
-
-    /** This class will be derived from
-     */
-    virtual ~BlockSlicer() = default;
-};
-} // namespace openPMD
+using auxiliary::BlockSlicer;
+}
diff --git a/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp b/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp
index f0d943d972..510bdc2731 100644
--- a/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp
+++ b/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp
@@ -1,4 +1,4 @@
-/* Copyright 2018-2021 Franz Poeschel
+/* Copyright 2025 Franz Poeschel
  *
  * This file is part of openPMD-api.
  *
@@ -19,26 +19,14 @@
  * If not, see <http://www.gnu.org/licenses/>.
  */
 
+/* Legacy header for backward compatibility */
+
 #pragma once
 
-#include "openPMD/Dataset.hpp"
+#include "openPMD/auxiliary/OneDimensionalBlockSlicer.hpp"
 #include "openPMD/benchmark/mpi/BlockSlicer.hpp"
 
 namespace openPMD
 {
-class OneDimensionalBlockSlicer : public BlockSlicer
-{
-public:
-    Extent::value_type m_dim;
-
-    explicit OneDimensionalBlockSlicer(Extent::value_type dim = 0);
-
-    static std::pair<size_t, size_t>
-    n_th_block_inside(size_t length, size_t rank, size_t size);
-
-    std::pair<Offset, Extent>
-    sliceBlock(Extent &totalExtent, int size, int rank) override;
-
-    virtual std::unique_ptr<BlockSlicer> clone() const override;
-};
-} // namespace openPMD
+using auxiliary::OneDimensionalBlockSlicer;
+}
diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp
index 64b9d7f421..4461eb0227 100644
--- a/src/ChunkInfo.cpp
+++ b/src/ChunkInfo.cpp
@@ -22,7 +22,7 @@
 #include "openPMD/ChunkInfo_internal.hpp"
 
 #include "openPMD/auxiliary/Mpi.hpp"
-#include "openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp"
+#include "openPMD/auxiliary/OneDimensionalBlockSlicer.hpp"
 
 #include <algorithm> // std::sort
 #include <deque>
@@ -372,7 +372,7 @@ namespace chunk_assignment
     {
         auto [notAssigned, res] = std::move(pa);
         auto [myChunksFrom, myChunksTo] =
-            OneDimensionalBlockSlicer::n_th_block_inside(
+            auxiliary::OneDimensionalBlockSlicer::n_th_block_inside(
                 notAssigned.size(), my_rank, num_ranks);
         std::transform(
             notAssigned.begin() + myChunksFrom,
@@ -404,7 +404,7 @@ namespace chunk_assignment
         }
         notAssigned.clear();
         auto [myChunksFrom, myChunksTo] =
-            OneDimensionalBlockSlicer::n_th_block_inside(
+            auxiliary::OneDimensionalBlockSlicer::n_th_block_inside(
                 sortSourceChunksBySourceRank.size(), my_rank, num_ranks);
         auto it = sortSourceChunksBySourceRank.begin();
         for (size_t i = 0; i < myChunksFrom; ++i)
@@ -520,7 +520,8 @@ namespace chunk_assignment
     }
 
     ByCuboidSlice::ByCuboidSlice(
-        std::unique_ptr<BlockSlicer> blockSlicer_in, Extent totalExtent_in)
+        std::unique_ptr<auxiliary::BlockSlicer> blockSlicer_in,
+        Extent totalExtent_in)
         : blockSlicer(std::move(blockSlicer_in))
         , totalExtent(std::move(totalExtent_in))
     {}
diff --git a/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp b/src/auxiliary/OneDimensionalBlockSlicer.cpp
similarity index 95%
rename from src/benchmark/mpi/OneDimensionalBlockSlicer.cpp
rename to src/auxiliary/OneDimensionalBlockSlicer.cpp
index bb71cc29db..5520ee9cd3 100644
--- a/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp
+++ b/src/auxiliary/OneDimensionalBlockSlicer.cpp
@@ -19,11 +19,9 @@
  * If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp"
+#include "openPMD/auxiliary/OneDimensionalBlockSlicer.hpp"
 
-#include <algorithm>
-
-namespace openPMD
+namespace openPMD::auxiliary
 {
 OneDimensionalBlockSlicer::OneDimensionalBlockSlicer(Extent::value_type dim)
     : m_dim{dim}
@@ -85,4 +83,4 @@ std::unique_ptr<BlockSlicer> OneDimensionalBlockSlicer::clone() const
 {
     return std::unique_ptr<BlockSlicer>(new OneDimensionalBlockSlicer(m_dim));
 }
-} // namespace openPMD
+} // namespace openPMD::auxiliary
diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp
index ed31202c82..b95e7caf56 100644
--- a/src/binding/python/ChunkInfo.cpp
+++ b/src/binding/python/ChunkInfo.cpp
@@ -19,12 +19,11 @@
  * If not, see <http://www.gnu.org/licenses/>.
  */
 #include "openPMD/ChunkInfo.hpp"
-#include "openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp"
+#include "openPMD/auxiliary/OneDimensionalBlockSlicer.hpp"
 #include "openPMD/binding/python/Mpi.hpp"
 
 #include "openPMD/binding/python/Common.hpp"
 
-#include <exception>
 #include <pybind11/pytypes.h>
 #include <string>
 #include <utility> // std::move
@@ -324,16 +323,17 @@ void init_Chunk(py::module &m)
             }),
             py::arg("strategy_within_node"));
 
-    (void)py::class_<BlockSlicer>(m, "BlockSlicer");
+    (void)py::class_<auxiliary::BlockSlicer>(m, "BlockSlicer");
 
-    py::class_<OneDimensionalBlockSlicer, BlockSlicer>(
+    py::class_<auxiliary::OneDimensionalBlockSlicer, auxiliary::BlockSlicer>(
         m, "OneDimensionalBlockSlicer")
         .def(py::init<>())
         .def(py::init<Extent::value_type>(), py::arg("dim"));
 
     py::class_<ByCuboidSlice, Strategy>(m, "ByCuboidSlice")
         .def(
-            py::init([](BlockSlicer const &blockSlicer, Extent totalExtent) {
+            py::init([](auxiliary::BlockSlicer const &blockSlicer,
+                        Extent totalExtent) {
                 return ByCuboidSlice(
                     blockSlicer.clone(), std::move(totalExtent));
             }),
diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp
index c8f60e14fe..79cbfb7a82 100644
--- a/test/ParallelIOTest.cpp
+++ b/test/ParallelIOTest.cpp
@@ -9,7 +9,7 @@
 #include "openPMD/auxiliary/Filesystem.hpp"
 #include "openPMD/openPMD.hpp"
 // @todo change includes
-#include "openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp"
+#include "openPMD/auxiliary/OneDimensionalBlockSlicer.hpp"
 #include <catch2/catch.hpp>
 
 #if !openPMD_HAVE_MPI
@@ -2465,7 +2465,8 @@ void adios2_chunk_distribution()
          * in others such as this one, it's an unneeded overhead.)
          */
         ByCuboidSlice cuboidSliceStrategy(
-            std::make_unique<OneDimensionalBlockSlicer>(1), E_x.getExtent());
+            std::make_unique<auxiliary::OneDimensionalBlockSlicer>(1),
+            E_x.getExtent());
         auto cuboidSliceAssignment = cuboidSliceStrategy.assign(
             chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size);
         printAssignment(

From 01bcc0f2cdc8e63f65ad96194f3f72b600c3c081 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Tue, 15 Jul 2025 17:36:17 +0200
Subject: [PATCH 20/27] More thorough documentation

---
 include/openPMD/ChunkInfo.hpp | 147 +++++++++++++++++++++++++++++++---
 1 file changed, 134 insertions(+), 13 deletions(-)

diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp
index b0b9e549f3..06522fade2 100644
--- a/include/openPMD/ChunkInfo.hpp
+++ b/include/openPMD/ChunkInfo.hpp
@@ -86,18 +86,53 @@ using ChunkTable = std::vector<WrittenChunkInfo>;
 
 namespace chunk_assignment
 {
-    constexpr char const *HOSTFILE_VARNAME = "MPI_WRITTEN_HOSTFILE";
-
+    /** @brief Meta information on processes by ID (MPI rank).
+     *         Typically a hostname.
+     */
     using RankMeta = std::map<unsigned int, std::string>;
 
+    /** @brief Result type for chunk assignment strategies.
+     *
+     * Chunks sorted by the destination process ID (MPI rank). Distribution
+     * strategies will in general only need to fill the chunks for the current
+     * (calling) process ID, but some (such as RoundRobin) will fill the
+     * information for other processes as well.
+     *
+     * The chunks have type WrittenChunkInfo, hence carrying information on the
+     * sourceID.
+     */
     using Assignment = std::map<unsigned int, std::vector<WrittenChunkInfo>>;
 
+    /**
+     * @brief Pairwise merge all chunks if they can be merged into a larger.
+     *
+     * Note that this function is in no way optimized, but follows a naive
+     * O(n^2) implementation. Use a more sophisticated method for large numbers
+     * of chunks.
+     *
+     * @param chunks A list of chunks. Merging will occur in-place.
+     */
     template <typename Chunk_t>
-    void mergeChunks(std::vector<Chunk_t> &);
+    void mergeChunks(std::vector<Chunk_t> &chunks);
 
-    auto mergeChunksFromSameSourceID(std::vector<WrittenChunkInfo> const &)
+    /**
+     * @brief Pairwise merge all chunks from the same source ID if they can be
+     *        merged into a larger.
+     *
+     * @param chunks A list of chunks.
+     * @return Ordered by source ID, lists of merged chunks for each source ID.
+     */
+    auto
+    mergeChunksFromSameSourceID(std::vector<WrittenChunkInfo> const &chunks)
         -> std::map<unsigned int, std::vector<ChunkInfo>>;
 
+    /**
+     * @brief Return type for partial chunk assignment strategies.
+     *
+     * A typical partial assignment strategy is ByHostname, which can assign
+     * chunks only within one compute node and will fail if there is no consumer
+     * in that same compute node.
+     */
     struct PartialAssignment
     {
         ChunkTable notAssigned;
@@ -118,14 +153,39 @@ namespace chunk_assignment
      */
     struct Strategy
     {
+        /**
+         * @brief Assign chunks to be loaded to reading processes.
+         *
+         * @param chunkTable Chunk table obtained by
+         *        BaseRecordComponent::availableChunks().
+         * @param in Meta information on writing processes, e.g. hostnames.
+         * @param out Meta information on reading processes, e.g. hostnames.
+         * @param my_rank Rank identifier for the current process. Will be
+         *        considered by some distribution strategies that may be called
+         *        for only a subselection of the data space (e.g. for
+         *        distributing data within processes on the same compute node
+         *        in a cluster).
+         * @param num_ranks Number of processes among which chunks are to be
+         *        distributed. Will be considered by some distribution
+         *        strategies that may be called for only a subselection of the
+         *        data space (e.g. for distributing data within processes on the
+         *        same compute node in a cluster).
+         * @return A table that assigns chunks to reading processes. Chunks are
+         *        sorted by the destination process ID (MPI rank). Distribution
+         *        strategies will in general only need to fill the chunks for
+         *        the current (calling) process ID, but some (such as
+         *        RoundRobin) will fill the information for other processes
+         *        as well.
+         */
         Assignment assign(
-            ChunkTable,
-            RankMeta const &rankMetaIn,
-            RankMeta const &rankMetaOut,
+            ChunkTable chunkTable,
+            RankMeta const &in,
+            RankMeta const &out,
             size_t my_rank,
             size_t num_ranks);
         /**
-         * @brief Assign chunks to be loaded to reading processes.
+         * @brief Assign chunks to be loaded to reading processes. To be defined
+         *        by implementors.
          *
          * @param partialAssignment Two chunktables, one of unassigned chunks
          *        and one of chunks that might have already been assigned
@@ -143,7 +203,12 @@ namespace chunk_assignment
          *        strategies that may be called for only a subselection of the
          *        data space (e.g. for distributing data within processes on the
          *        same compute node in a cluster).
-         * @return ChunkTable A table that assigns chunks to reading processes.
+         * @return A table that assigns chunks to reading processes. Chunks are
+         *        sorted by the destination process ID (MPI rank). Distribution
+         *        strategies will in general only need to fill the chunks for
+         *        the current (calling) process ID, but some (such as
+         *        RoundRobin) will fill the information for other processes
+         *        as well.
          */
         virtual Assignment assign(
             PartialAssignment partialAssignment,
@@ -166,9 +231,38 @@ namespace chunk_assignment
      * 1. Apply the partial strategy.
      * 2. Apply the full strategy to assign unassigned leftovers.
      *
+     * A typical partial assignment strategy is ByHostname, which can assign
+     * chunks only within one compute node and will fail if there is no consumer
+     * in that same compute node.
      */
     struct PartialStrategy
     {
+        /**
+         * @brief Assign chunks to be loaded to reading processes.
+         *
+         * @param table Chunk table obtained by
+         *        BaseRecordComponent::availableChunks().
+         *        Merge the unassigned chunks into the partially assigned table.
+         * @param in Meta information on writing processes, e.g. hostnames.
+         * @param out Meta information on reading processes, e.g. hostnames.
+         * @param my_rank Rank identifier for the current process. Will be
+         *        considered by some distribution strategies that may be called
+         *        for only a subselection of the data space (e.g. for
+         *        distributing data within processes on the same compute node
+         *        in a cluster).
+         * @param num_ranks Number of processes among which chunks are to be
+         *        distributed. Will be considered by some distribution
+         *        strategies that may be called for only a subselection of the
+         *        data space (e.g. for distributing data within processes on the
+         *        same compute node in a cluster).
+         * @return Two chunktables, one of leftover chunks that were not
+         *        assigned and one that assigns chunks to reading processes.
+         *        Assigned chunks are sorted by the destination process ID
+         *        (MPI rank). Distribution strategies will in general only need
+         *        to fill the chunks for the current (calling) process ID.
+         *        Chunks assigned to another destination processes may be
+         *        silently dropped.
+         */
         PartialAssignment assign(
             ChunkTable table,
             RankMeta const &in,
@@ -176,7 +270,8 @@ namespace chunk_assignment
             size_t my_rank,
             size_t num_ranks);
         /**
-         * @brief Assign chunks to be loaded to reading processes.
+         * @brief Assign chunks to be loaded to reading processes. To be defined
+         *        by implementors.
          *
          * @param partialAssignment Two chunktables, one of unassigned chunks
          *        and one of chunks that might have already been assigned
@@ -194,9 +289,13 @@ namespace chunk_assignment
          *        strategies that may be called for only a subselection of the
          *        data space (e.g. for distributing data within processes on the
          *        same compute node in a cluster).
-         * @return PartialAssignment Two chunktables, one of leftover chunks
-         *         that were not assigned and one that assigns chunks to
-         *         reading processes.
+         * @return Two chunktables, one of leftover chunks that were not
+         *        assigned and one that assigns chunks to reading processes.
+         *        Assigned chunks are sorted by the destination process ID
+         *        (MPI rank). Distribution strategies will in general only need
+         *        to fill the chunks for the current (calling) process ID.
+         *        Chunks assigned to another destination processes may be
+         *        silently dropped.
          */
         virtual PartialAssignment assign(
             PartialAssignment partialAssignment,
@@ -259,6 +358,12 @@ namespace chunk_assignment
         virtual std::unique_ptr<Strategy> clone() const override;
     };
 
+    /**
+     * @brief Round-Robin at process level.
+     *
+     * Assign all chunks from the first source rank to the first reader rank,
+     * all from the second source rank to the second reader, and so on.
+     */
     struct RoundRobinOfSourceRanks : Strategy
     {
         Assignment assign(
@@ -271,6 +376,16 @@ namespace chunk_assignment
         virtual std::unique_ptr<Strategy> clone() const override;
     };
 
+    /**
+     * @brief Alternative to RoundRobin, but instead gives every reader a
+     *        sequential range of blocks.
+     *
+     * Sequential in here means the order as returned by
+     * BaseRecordComponent::availableChunks().
+     * E.g. 6 blocks distributed to 2 processes will result in:
+     * The first three blocks go to the first process, the last three blocks to
+     * the second.
+     */
     struct Blocks : Strategy
     {
         Assignment assign(
@@ -283,6 +398,12 @@ namespace chunk_assignment
         [[nodiscard]] std::unique_ptr<Strategy> clone() const override;
     };
 
+    /**
+     * @brief Blocks at processs level.
+     *
+     * Assign writer processes to reader processes, instead of assigning blocks
+     * to processes.
+     */
     struct BlocksOfSourceRanks : Strategy
     {
         Assignment assign(

From 4385fc79e2b3baee5e8af5ce5f38df68f53b0ee6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Thu, 17 Jul 2025 11:51:12 +0200
Subject: [PATCH 21/27] Cleanup, documentation, proper use of rank
 specification

---
 src/ChunkInfo.cpp                             | 90 +++++++++++--------
 .../python/openpmd_api/pipe/__main__.py       | 74 ++++++---------
 2 files changed, 81 insertions(+), 83 deletions(-)

diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp
index 4461eb0227..785527867c 100644
--- a/src/ChunkInfo.cpp
+++ b/src/ChunkInfo.cpp
@@ -134,7 +134,13 @@ namespace chunk_assignment
                 Offset offset(c1->offset);
                 Extent extent(c1->extent);
                 extent[dim] += c2->extent[dim];
-                return std::make_optional(Chunk_t(offset, extent));
+                // Copy from chunk1 in order to initialize with meta information
+                // from instantiations of Chunk_t that we cannot generically
+                // state here (such as the source ID)
+                Chunk_t res = chunk1;
+                res.offset = offset;
+                res.extent = extent;
+                return std::make_optional<Chunk_t>(std::move(res));
             }
             return std::optional<Chunk_t>();
         }
@@ -289,23 +295,19 @@ namespace chunk_assignment
     Assignment RoundRobin::assign(
         PartialAssignment partialAssignment,
         RankMeta const &, // ignored parameter
-        RankMeta const &out,
+        RankMeta const &,
         size_t /* my_rank */,
-        size_t /* num_ranks */)
+        size_t num_ranks)
     {
-        if (out.size() == 0)
+        if (num_ranks == 0)
         {
             throw std::runtime_error(
                 "[RoundRobin] Cannot round-robin to zero ranks.");
         }
-        auto it = out.begin();
-        auto nextRank = [&it, &out]() {
-            if (it == out.end())
-            {
-                it = out.begin();
-            }
-            auto res = it->first;
-            it++;
+        size_t it = 0;
+        auto nextRank = [&it, num_ranks]() {
+            auto res = it;
+            it = (it + 1) % num_ranks;
             return res;
         };
         ChunkTable &sourceChunks = partialAssignment.notAssigned;
@@ -326,9 +328,9 @@ namespace chunk_assignment
     Assignment RoundRobinOfSourceRanks::assign(
         PartialAssignment partialAssignment,
         RankMeta const &, // ignored parameter
-        RankMeta const &out,
+        RankMeta const &,
         size_t /* my_rank */,
-        size_t /* num_ranks */)
+        size_t num_ranks)
     {
         std::map<unsigned int, std::deque<WrittenChunkInfo>>
             sortSourceChunksBySourceRank;
@@ -339,15 +341,12 @@ namespace chunk_assignment
         }
         partialAssignment.notAssigned.clear();
         auto source_it = sortSourceChunksBySourceRank.begin();
-        auto sink_it = out.begin();
+        size_t sink_it = 0;
         for (; source_it != sortSourceChunksBySourceRank.end();
              ++source_it, ++sink_it)
         {
-            if (sink_it == out.end())
-            {
-                sink_it = out.begin();
-            }
-            auto &chunks_go_here = partialAssignment.assigned[sink_it->first];
+            sink_it %= num_ranks;
+            auto &chunks_go_here = partialAssignment.assigned[sink_it];
             chunks_go_here.reserve(
                 partialAssignment.assigned.size() + source_it->second.size());
             for (auto &chunk : source_it->second)
@@ -436,8 +435,14 @@ namespace chunk_assignment
         RankMeta const &in,
         RankMeta const &out,
         size_t my_rank,
-        size_t /* num_ranks */)
+        size_t num_ranks)
     {
+        if (out.size() != num_ranks)
+        {
+            throw std::runtime_error(
+                "[ByHostname] Invalid call: Rank meta information (hostnames) "
+                "incomplete.");
+        }
         // collect chunks by hostname
         std::map<std::string, ChunkTable> chunkGroups;
         ChunkTable &sourceChunks = res.notAssigned;
@@ -447,6 +452,8 @@ namespace chunk_assignment
             for (auto &chunk : sourceChunks)
             {
                 auto it = in.find(chunk.sourceID);
+                // If the writer rank has no meta information, move its chunk
+                // back to the leftover
                 if (it == in.end())
                 {
                     leftover.push_back(std::move(chunk));
@@ -489,24 +496,33 @@ namespace chunk_assignment
             else
             {
                 RankMeta ranksOnTargetNode;
-                size_t local_rank = 0;
+                std::optional<size_t> local_rank = 0;
                 size_t counter = 0;
                 for (auto rank : it->second)
                 {
-                    ranksOnTargetNode[rank] = hostname;
+                    ranksOnTargetNode[counter] = hostname;
                     if (rank == my_rank)
                     {
                         local_rank = counter;
                     }
                     ++counter;
                 }
+                if (!local_rank.has_value())
+                {
+                    /*
+                     * We are running on another compute node. This is fine, we
+                     * have ensured above that some other process will take care
+                     * of these chunks, they need not go back to the leftover.
+                     */
+                    continue;
+                }
                 Assignment swapped;
                 swapped.swap(sinkChunks);
                 sinkChunks = m_withinNode->assign(
                     PartialAssignment(chunkGroup.second, std::move(swapped)),
                     in,
                     ranksOnTargetNode,
-                    local_rank,
+                    *local_rank,
                     it->second.size());
             }
         }
@@ -678,15 +694,13 @@ namespace chunk_assignment
         for (auto &chunk : sourceSide)
         {
             restrictToSelection(chunk.offset, chunk.extent, myOffset, myExtent);
-            for (auto ext : chunk.extent)
+            if (std::all_of(
+                    chunk.extent.begin(), chunk.extent.end(), [](auto const e) {
+                        return e > 0;
+                    }))
             {
-                if (ext == 0)
-                {
-                    goto outer_loop;
-                }
+                sinkSide[my_rank].push_back(std::move(chunk));
             }
-            sinkSide[my_rank].push_back(std::move(chunk));
-        outer_loop:;
         }
 
         return res.assigned;
@@ -705,9 +719,9 @@ namespace chunk_assignment
     Assignment BinPacking::assign(
         PartialAssignment res,
         RankMeta const &,
-        RankMeta const &sinkRanks,
+        RankMeta const &,
         size_t /* my_rank */,
-        size_t /* num_ranks */)
+        size_t num_ranks)
     {
         ChunkTable &sourceChunks = res.notAssigned;
         Assignment &sinkChunks = res.assigned;
@@ -721,7 +735,7 @@ namespace chunk_assignment
             }
             totalExtent += chunkExtent;
         }
-        size_t const idealSize = totalExtent / sinkRanks.size();
+        size_t const idealSize = totalExtent / num_ranks;
         /*
          * Split chunks into subchunks of size at most idealSize.
          * The resulting list of chunks is sorted by chunk size in decreasing
@@ -740,8 +754,8 @@ namespace chunk_assignment
          * data per process.
          */
         auto worker =
-            [&sinkRanks, &digestibleChunks, &sinkChunks, idealSize]() {
-                for (auto const &destRank : sinkRanks)
+            [&num_ranks, &digestibleChunks, &sinkChunks, idealSize]() {
+                for (size_t destRank = 0; destRank < num_ranks; ++destRank)
                 {
                     /*
                      * Within the second call of the worker lambda, this will
@@ -763,7 +777,7 @@ namespace chunk_assignment
                                  * process within this call of the worker
                                  * lambda, so the loop can be broken out of.
                                  */
-                                sinkChunks[destRank.first].push_back(
+                                sinkChunks[destRank].push_back(
                                     std::move(it->chunk));
                                 digestibleChunks.erase(it);
                                 break;
@@ -771,7 +785,7 @@ namespace chunk_assignment
                             else if (it->dataSize <= leftoverSize)
                             {
                                 // assign smaller chunks as long as they fit
-                                sinkChunks[destRank.first].push_back(
+                                sinkChunks[destRank].push_back(
                                     std::move(it->chunk));
                                 leftoverSize -= it->dataSize;
                                 it = digestibleChunks.erase(it);
diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py
index 9eae8085d1..2cdffeae9e 100644
--- a/src/binding/python/openpmd_api/pipe/__main__.py
+++ b/src/binding/python/openpmd_api/pipe/__main__.py
@@ -113,6 +113,11 @@ def __init__(self, source, dynamicView, offset, extent):
         self.offset = offset
         self.extent = extent
 
+# Find below a couple of examples on how to define chunk distribution
+# strategies in Python by extending classes PartialStrategy or Strategy.
+# These strategies may then be used inside composing strategies
+# such as ByHostname. They may also call other strategies, as in
+# IncreaseGranularity defined below.
 
 # Example how to implement a simple partial strategy in Python
 class LoadOne(io.PartialStrategy):
@@ -127,7 +132,26 @@ def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks):
             assignment.assigned[my_rank].append(element)
         return assignment
 
+# Example how to implement a simple strategy in Python
+class LoadAll(io.Strategy):
+
+    def __init__(self):
+        super().__init__()
+
+    def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks):
+        res = assignment.assigned
+        if my_rank not in res:
+            res[my_rank] = assignment.not_assigned
+        else:
+            res[my_rank].extend(assignment.not_assigned)
+        return res
 
+# A more complex distribution strategy. This creates supergroups of hostnames,
+# separately for the writer and reader ranks.
+# Every `granularity_in` writer hostnames are merged into one new hostname
+# each, same for every `granularity_out` reader hostnames.
+# An example usage is defining granularity_in=32, granularity_out=1 for a
+# 32-to-1 fan-in pattern.
 class IncreaseGranularity(io.PartialStrategy):
     def __init__(
         self,
@@ -194,16 +218,6 @@ def inner_rank_assignment(
             out_ranks, out_hostname_to_hostgroup
         )
 
-        # # we only care about the local host (why tho?)
-        # local_host = self.out_ranks_inner[my_rank]
-        # # restrict out_ranks_inner to those ranks
-        # # that run on the current meta host
-        # self.out_ranks_inner = {
-        #     rank: host
-        #     for rank, host in self.out_ranks_inner.items()
-        #     if host == local_host
-        # }
-
         return self.inner_distribution.assign(
             assignment,
             self.in_ranks_inner, self.out_ranks_inner,
@@ -211,13 +225,16 @@ def inner_rank_assignment(
         )
 
 
+# Merge chunks into larger chunks as much as possible within
+# each source process for reducing the number of load requests.
 class MergingStrategy(io.Strategy):
     def __init__(self, inner_strategy):
         super().__init__()
         self.inner_strategy = inner_strategy
 
-    def assign(self, assignment, in_ranks, out_ranks):
-        res = self.inner_strategy.assign(assignment, in_ranks, out_ranks)
+    def assign(self, assignment, in_ranks, out_ranks, my_rank, num_ranks):
+        res = self.inner_strategy.assign(
+            assignment, in_ranks, out_ranks, my_rank, num_ranks)
         for out_rank, assignment in res.items():
             merged = assignment.merge_chunks_from_same_sourceID()
             assignment.clear()
@@ -230,39 +247,6 @@ def assign(self, assignment, in_ranks, out_ranks):
         return res
 
 
-# strategy = IncreaseGranularity(2, 1)
-# assignment = [
-#     io.WrittenChunkInfo([0], [1], 0),
-#     io.WrittenChunkInfo([1], [1], 1),
-#     io.WrittenChunkInfo([2], [1], 2),
-#     io.WrittenChunkInfo([3], [1], 3),
-# ]
-# in_ranks = {0: "host0", 1: "host1", 2: "host3", 3: "host4"}
-# out_ranks = {0: "host2", 1: "host5"}
-# res = strategy.assign(assignment, in_ranks, out_ranks)
-# print(f"NOT ASSIGNED: {len(res.not_assigned)} chunks")
-# print("ASSIGNED:")
-# for rank, chunks in res.assigned.items():
-#     print(f"\tRANK {rank}:", end='')
-#     for chunk in chunks:
-#         print(f" [{chunk.offset}-{chunk.extent}]", end='')
-#     print()
-
-# Example how to implement a simple strategy in Python
-class LoadAll(io.Strategy):
-
-    def __init__(self):
-        super().__init__()
-
-    def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks):
-        res = assignment.assigned
-        if my_rank not in res:
-            res[my_rank] = assignment.not_assigned
-        else:
-            res[my_rank].extend(assignment.not_assigned)
-        return res
-
-
 def distribution_strategy(dataset_extent,
                           strategy_identifier=None):
     if strategy_identifier is None or not strategy_identifier:

From e72e1b425ec43112543b182318b513ef161857d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Thu, 17 Jul 2025 14:17:33 +0200
Subject: [PATCH 22/27] Actual testing

---
 test/ParallelIOTest.cpp | 190 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 176 insertions(+), 14 deletions(-)

diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp
index 79cbfb7a82..f8516baf49 100644
--- a/test/ParallelIOTest.cpp
+++ b/test/ParallelIOTest.cpp
@@ -3,10 +3,12 @@
  */
 #include "Files_ParallelIO/ParallelIOTests.hpp"
 
+#include "openPMD/ChunkInfo.hpp"
 #include "openPMD/IO/ADIOS/macros.hpp"
 #include "openPMD/IO/Access.hpp"
 #include "openPMD/auxiliary/Environment.hpp"
 #include "openPMD/auxiliary/Filesystem.hpp"
+#include "openPMD/auxiliary/Mpi.hpp"
 #include "openPMD/openPMD.hpp"
 // @todo change includes
 #include "openPMD/auxiliary/OneDimensionalBlockSlicer.hpp"
@@ -2224,7 +2226,121 @@ TEST_CASE("iterate_nonstreaming_series", "[serial][adios2]")
     iterate_nonstreaming_series::iterate_nonstreaming_series();
 }
 
-void adios2_chunk_distribution()
+namespace adios2_chunk_distribution
+{
+static auto add = [](size_t left, size_t right) { return left + right; };
+auto mergeTable(ChunkTable const &chunkTable) -> ChunkTable const &
+{
+    return chunkTable;
+}
+auto mergeTable(chunk_assignment::Assignment const &assignment) -> ChunkTable
+{
+    ChunkTable merged;
+    merged.reserve(
+        std::transform_reduce(
+            assignment.begin(),
+            assignment.end(),
+            0u,
+            add,
+            [](chunk_assignment::Assignment::value_type const &pair) {
+                return pair.second.size();
+            }));
+    for (auto const &pair : assignment)
+    {
+        for (auto const &chunk : pair.second)
+        {
+            merged.insert(merged.end(), chunk);
+        }
+    }
+    return merged;
+}
+auto mergeTable(chunk_assignment::PartialAssignment const &assignment)
+{
+    auto const &[not_assigned, assigned] = assignment;
+    ChunkTable merged = mergeTable(assigned);
+    merged.reserve(merged.size() + not_assigned.size());
+    for (auto const &chunk : not_assigned)
+    {
+        merged.insert(merged.end(), chunk);
+    }
+    return merged;
+}
+
+template <typename ChunkTable1, typename ChunkTable2>
+auto equalTables(ChunkTable1 &&availableChunks, ChunkTable2 &&assignedChunks)
+{
+    return chunk_assignment::mergeChunksFromSameSourceID(
+               mergeTable(availableChunks)) ==
+        chunk_assignment::mergeChunksFromSameSourceID(
+               mergeTable(assignedChunks));
+}
+
+auto totalVolume(ChunkInfo const &chunk) -> size_t
+{
+    return std::reduce(
+        chunk.extent.begin(),
+        chunk.extent.end(),
+        1,
+        [](size_t left, size_t right) { return left * right; });
+}
+
+auto totalVolume(ChunkTable const &chunkTable) -> size_t
+{
+    return std::transform_reduce(
+        chunkTable.begin(),
+        chunkTable.end(),
+        0u,
+        add,
+        static_cast<size_t (*)(ChunkInfo const &)>(&totalVolume));
+}
+
+auto totalVolume(chunk_assignment::Assignment const &assignment) -> size_t
+{
+    return std::transform_reduce(
+        assignment.begin(),
+        assignment.end(),
+        0u,
+        add,
+        [](chunk_assignment::Assignment::value_type const &pair) {
+            return totalVolume(pair.second);
+        });
+}
+
+template <typename Assignment_t>
+auto parallelDisjointVolume(Assignment_t &&assignment, MPI_Comm communicator)
+    -> size_t
+{
+    size_t myVolume = totalVolume(assignment);
+    size_t summedVolume = 0;
+    MPI_Allreduce(
+        &myVolume,
+        &summedVolume,
+        1,
+        auxiliary::openPMD_MPI_type<size_t>(),
+        MPI_SUM,
+        communicator);
+    return summedVolume;
+}
+
+template <typename Assignment_t>
+auto equalDisjointByVolume(
+    ChunkTable const &availableChunks,
+    Assignment_t &&assignment,
+    std::optional<ChunkTable> const &leftover,
+    MPI_Comm communicator) -> bool
+{
+    size_t targetVolume = totalVolume(availableChunks);
+    if (leftover.has_value())
+    {
+        targetVolume -= totalVolume(*leftover);
+    }
+    size_t summarizedVolume = parallelDisjointVolume(assignment, communicator);
+    // std::cout << "Left: " << targetVolume << ", right: " << summarizedVolume
+    //           << std::endl;
+    return targetVolume == summarizedVolume;
+}
+
+void run_test()
 {
     /*
      * This test simulates a multi-node streaming setup in order to test some
@@ -2276,12 +2392,13 @@ void adios2_chunk_distribution()
     }
 }
 )END";
+    constexpr bool verbose = true;
 
     auto printChunktable = [mpi_rank](
                                std::string const &strategyName,
                                ChunkTable const &table,
                                chunk_assignment::RankMeta const &meta) {
-        if (mpi_rank != 0)
+        if (!verbose || mpi_rank != 0)
         {
             return;
         }
@@ -2307,7 +2424,7 @@ void adios2_chunk_distribution()
                                std::string const &strategyName,
                                chunk_assignment::Assignment const &table,
                                chunk_assignment::RankMeta const &meta) {
-        if (mpi_rank != 0)
+        if (!verbose || mpi_rank != 0)
         {
             return;
         }
@@ -2392,6 +2509,7 @@ void adios2_chunk_distribution()
             chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size);
         printAssignment(
             "ROUND ROBIN", roundRobinAssignment, readingRanksHostnames);
+        REQUIRE(equalTables(chunkTable, roundRobinAssignment));
 
         /*
          * Assign chunks by hostname.
@@ -2415,6 +2533,17 @@ void adios2_chunk_distribution()
             "HOSTNAME, LEFTOVER",
             byHostnamePartialAssignment.notAssigned,
             rankMetaIn);
+        REQUIRE(equalDisjointByVolume(
+            chunkTable,
+            // Must restrict assignment to current rank, since
+            // ByHostname strategy output *may* also contain chunks from
+            // other ranks, but only partially. This is due to two
+            // effects: (1) Other processes are considered only as long
+            // as they live on the same node. (2) The within-node distribution
+            // is subject to a secondary distribution strategy.
+            byHostnamePartialAssignment.assigned[mpi_rank],
+            byHostnamePartialAssignment.notAssigned,
+            MPI_COMM_WORLD));
 
         /*
          * Same as above, but use RoundRobinOfSourceRanks this time, a strategy
@@ -2432,6 +2561,17 @@ void adios2_chunk_distribution()
             "HOSTNAME2, LEFTOVER",
             byHostnamePartialAssignment2.notAssigned,
             rankMetaIn);
+        REQUIRE(equalDisjointByVolume(
+            chunkTable,
+            // Must restrict assignment to current rank, since
+            // ByHostname strategy output *may* also contain chunks from
+            // other ranks, but only partially. This is due to two
+            // effects: (1) Other processes are considered only as long
+            // as they live on the same node. (2) The within-node distribution
+            // is subject to a secondary distribution strategy.
+            byHostnamePartialAssignment2.assigned[mpi_rank],
+            byHostnamePartialAssignment2.notAssigned,
+            MPI_COMM_WORLD));
 
         /*
          * Assign chunks by hostnames, once more.
@@ -2444,25 +2584,39 @@ void adios2_chunk_distribution()
          */
         FromPartialStrategy fromPartialStrategy(
             std::make_unique<ByHostname>(std::move(byHostname)),
-            std::make_unique<BinPacking>(/* splitAlongDimension = */ 1));
+            std::make_unique<Blocks>());
         auto fromPartialAssignment = fromPartialStrategy.assign(
             chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size);
         printAssignment(
             "HOSTNAME WITH SECOND PASS",
             fromPartialAssignment,
             readingRanksHostnames);
+        REQUIRE(equalDisjointByVolume(
+            chunkTable,
+            // Must restrict assignment to current rank, since
+            // ByHostname strategy output *may* also contain chunks from
+            // other ranks, but only partially. This is due to two
+            // effects: (1) Other processes are considered only as long
+            // as they live on the same node. (2) The within-node and
+            // leftover distributions are each subject to a secondary
+            // distribution strategies.
+            fromPartialAssignment[mpi_rank],
+            std::nullopt,
+            MPI_COMM_WORLD));
 
         /*
-         * Assign chunks by slicing the n-dimensional physical domain and
-         * intersecting those slices with the available chunks from the backend.
-         * Notice that this strategy only returns the chunks that the currently
-         * running rank is supposed to load, whereas the other strategies return
-         * a chunk table containing all chunks that all ranks will load.
-         * In principle, a chunk_assignment::Strategy only needs to return the
-         * chunks that the current rank should load, but is free to emplace the
+         * Assign chunks by slicing the n-dimensional physical domain
+         * and intersecting those slices with the available chunks from
+         * the backend. Notice that this strategy only returns the
+         * chunks that the currently running rank is supposed to load,
+         * whereas the other strategies return a chunk table containing
+         * all chunks that all ranks will load. In principle, a
+         * chunk_assignment::Strategy only needs to return the chunks
+         * that the current rank should load, but is free to emplace the
          * other chunks for other reading ranks as well.
-         * (Reasoning: In some strategies, calculating everything is necessary,
-         * in others such as this one, it's an unneeded overhead.)
+         * (Reasoning: In some strategies, calculating everything is
+         * necessary, in others such as this one, it's an unneeded
+         * overhead.)
          */
         ByCuboidSlice cuboidSliceStrategy(
             std::make_unique<auxiliary::OneDimensionalBlockSlicer>(1),
@@ -2476,6 +2630,8 @@ void adios2_chunk_distribution()
         auto blocksAssignment = blocksStrategy.assign(
             chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size);
         printAssignment("BLOCKS", blocksAssignment, readingRanksHostnames);
+        REQUIRE(equalDisjointByVolume(
+            chunkTable, blocksAssignment, std::nullopt, MPI_COMM_WORLD));
 
         BlocksOfSourceRanks blocksOfSourceRanksStrategy;
         auto blocksOfSourceRanksAssignment = blocksOfSourceRanksStrategy.assign(
@@ -2484,12 +2640,18 @@ void adios2_chunk_distribution()
             "BLOCKS OF SOURCE RANKS",
             blocksOfSourceRanksAssignment,
             readingRanksHostnames);
+        REQUIRE(equalDisjointByVolume(
+            chunkTable,
+            blocksOfSourceRanksAssignment,
+            std::nullopt,
+            MPI_COMM_WORLD));
     }
 }
+} // namespace adios2_chunk_distribution
 
 TEST_CASE("adios2_chunk_distribution", "[parallel][adios2]")
 {
-    adios2_chunk_distribution();
+    adios2_chunk_distribution::run_test();
 }
 #endif // openPMD_HAVE_ADIOS2 && openPMD_HAVE_MPI
 

From 10cbdd8bc3e79236cc74443f0bcebc15e0f56d45 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Thu, 17 Jul 2025 14:54:11 +0200
Subject: [PATCH 23/27] Test and fix ByHostname strategy

---
 src/ChunkInfo.cpp       | 35 +++++++++++++++++++++++++----------
 test/ParallelIOTest.cpp | 31 +++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp
index 785527867c..646aa2a74c 100644
--- a/src/ChunkInfo.cpp
+++ b/src/ChunkInfo.cpp
@@ -19,8 +19,9 @@
  * If not, see <http://www.gnu.org/licenses/>.
  */
 #include "openPMD/ChunkInfo.hpp"
-#include "openPMD/ChunkInfo_internal.hpp"
 
+#include "openPMD/ChunkInfo_internal.hpp"
+#include "openPMD/Error.hpp"
 #include "openPMD/auxiliary/Mpi.hpp"
 #include "openPMD/auxiliary/OneDimensionalBlockSlicer.hpp"
 
@@ -32,8 +33,8 @@
 #include <map>
 #include <memory>
 #include <optional>
-#include <set>
 #include <stdexcept>
+#include <string>
 #include <utility>
 
 #ifdef _WIN32
@@ -496,18 +497,21 @@ namespace chunk_assignment
             else
             {
                 RankMeta ranksOnTargetNode;
-                std::optional<size_t> local_rank = 0;
+                std::vector<size_t> mapLocalRanksBackToGlobal;
+                mapLocalRanksBackToGlobal.reserve(it->second.size());
+                std::optional<size_t> my_rank_local = 0;
                 size_t counter = 0;
                 for (auto rank : it->second)
                 {
+                    mapLocalRanksBackToGlobal.emplace_back(rank);
                     ranksOnTargetNode[counter] = hostname;
                     if (rank == my_rank)
                     {
-                        local_rank = counter;
+                        my_rank_local = counter;
                     }
                     ++counter;
                 }
-                if (!local_rank.has_value())
+                if (!my_rank_local.has_value())
                 {
                     /*
                      * We are running on another compute node. This is fine, we
@@ -516,14 +520,25 @@ namespace chunk_assignment
                      */
                     continue;
                 }
-                Assignment swapped;
-                swapped.swap(sinkChunks);
-                sinkChunks = m_withinNode->assign(
-                    PartialAssignment(chunkGroup.second, std::move(swapped)),
+                auto newlyAssigned = m_withinNode->assign(
+                    PartialAssignment(chunkGroup.second, {}),
                     in,
                     ranksOnTargetNode,
-                    *local_rank,
+                    *my_rank_local,
                     it->second.size());
+                for (auto &[local_rank, chunks] : newlyAssigned)
+                {
+                    size_t global_rank = mapLocalRanksBackToGlobal[local_rank];
+                    auto it_sinkChunks = sinkChunks.find(global_rank);
+                    if (it_sinkChunks != sinkChunks.end())
+                    {
+                        throw error::Internal(
+                            "Target rank " + std::to_string(global_rank) +
+                            " assigned multiple times?");
+                    }
+                    sinkChunks.emplace_hint(
+                        it_sinkChunks, global_rank, std::move(chunks));
+                }
             }
         }
         return res;
diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp
index f8516baf49..1cffd47a79 100644
--- a/test/ParallelIOTest.cpp
+++ b/test/ParallelIOTest.cpp
@@ -2340,6 +2340,33 @@ auto equalDisjointByVolume(
     return targetVolume == summarizedVolume;
 }
 
+void verifyHostnameAssignment(
+    chunk_assignment::PartialAssignment const &assignment,
+    chunk_assignment::RankMeta const &in,
+    chunk_assignment::RankMeta const &out)
+{
+    REQUIRE(!assignment.assigned.empty());
+    for (auto const &[out_rank, chunks] : assignment.assigned)
+    {
+        for (auto const &chunk : chunks)
+        {
+            REQUIRE(in.at(chunk.sourceID) == out.at(out_rank));
+        }
+    }
+    for (auto const &chunk : assignment.notAssigned)
+    {
+        auto const &hostname = in.at(chunk.sourceID);
+        REQUIRE(
+            std::none_of(
+                out.begin(),
+                out.end(),
+                [&hostname](
+                    chunk_assignment::RankMeta::value_type const &pair) {
+                    return pair.second == hostname;
+                }));
+    }
+}
+
 void run_test()
 {
     /*
@@ -2544,6 +2571,8 @@ void run_test()
             byHostnamePartialAssignment.assigned[mpi_rank],
             byHostnamePartialAssignment.notAssigned,
             MPI_COMM_WORLD));
+        verifyHostnameAssignment(
+            byHostnamePartialAssignment, rankMetaIn, readingRanksHostnames);
 
         /*
          * Same as above, but use RoundRobinOfSourceRanks this time, a strategy
@@ -2572,6 +2601,8 @@ void run_test()
             byHostnamePartialAssignment2.assigned[mpi_rank],
             byHostnamePartialAssignment2.notAssigned,
             MPI_COMM_WORLD));
+        verifyHostnameAssignment(
+            byHostnamePartialAssignment2, rankMetaIn, readingRanksHostnames);
 
         /*
          * Assign chunks by hostnames, once more.

From 83efecbff2ac233e7456ca8efd330864e8f3bc85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Thu, 17 Jul 2025 14:58:04 +0200
Subject: [PATCH 24/27] CI fixes

---
 src/binding/python/openpmd_api/pipe/__main__.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py
index 2cdffeae9e..f5cdc30996 100644
--- a/src/binding/python/openpmd_api/pipe/__main__.py
+++ b/src/binding/python/openpmd_api/pipe/__main__.py
@@ -119,6 +119,7 @@ def __init__(self, source, dynamicView, offset, extent):
 # such as ByHostname. They may also call other strategies, as in
 # IncreaseGranularity defined below.
 
+
 # Example how to implement a simple partial strategy in Python
 class LoadOne(io.PartialStrategy):
     def __init__(self):
@@ -132,6 +133,7 @@ def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks):
             assignment.assigned[my_rank].append(element)
         return assignment
 
+
 # Example how to implement a simple strategy in Python
 class LoadAll(io.Strategy):
 
@@ -146,6 +148,7 @@ def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks):
             res[my_rank].extend(assignment.not_assigned)
         return res
 
+
 # A more complex distribution strategy. This creates supergroups of hostnames,
 # separately for the writer and reader ranks.
 # Every `granularity_in` writer hostnames are merged into one new hostname

From 206341bddf8137adaa5bce3cc82cbfa55d104845 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Fri, 18 Jul 2025 10:58:21 +0200
Subject: [PATCH 25/27] Automate CoreTest

---
 src/ChunkInfo.cpp |   6 +++
 test/CoreTest.cpp | 109 ++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 112 insertions(+), 3 deletions(-)

diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp
index 646aa2a74c..0a98b2ddb5 100644
--- a/src/ChunkInfo.cpp
+++ b/src/ChunkInfo.cpp
@@ -444,6 +444,12 @@ namespace chunk_assignment
                 "[ByHostname] Invalid call: Rank meta information (hostnames) "
                 "incomplete.");
         }
+        if (!res.assigned.empty())
+        {
+            throw std::runtime_error(
+                "[ByHostname] No support for merging into partial "
+                "assignments.");
+        }
         // collect chunks by hostname
         std::map<std::string, ChunkTable> chunkGroups;
         ChunkTable &sourceChunks = res.notAssigned;
diff --git a/test/CoreTest.cpp b/test/CoreTest.cpp
index b1e82b7498..0e1034edf1 100644
--- a/test/CoreTest.cpp
+++ b/test/CoreTest.cpp
@@ -22,6 +22,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <iostream>
+#include <numeric>
 #include <sstream>
 // cstdlib does not have setenv
 #include <stdlib.h> // NOLINT(modernize-deprecated-headers)
@@ -75,8 +76,14 @@ struct Params
         }
     }
 };
+
+static constexpr bool verbose = false;
 void print(RankMeta const &meta, ChunkTable const &table)
 {
+    if (!verbose)
+    {
+        return;
+    }
     for (auto const &chunk : table)
     {
         std::cout << "[HOST: " << meta.at(chunk.sourceID)
@@ -95,6 +102,10 @@ void print(RankMeta const &meta, ChunkTable const &table)
 }
 void print(RankMeta const &meta, Assignment const &table)
 {
+    if (!verbose)
+    {
+        return;
+    }
     for (auto &[rank, chunkList] : table)
     {
         std::cout << "[HOST: " << meta.at(rank) << ",\tRank: " << rank << "]"
@@ -115,6 +126,80 @@ void print(RankMeta const &meta, Assignment const &table)
         }
     }
 }
+
+static auto add = [](size_t left, size_t right) { return left + right; };
+auto mergeTable(ChunkTable const &chunkTable) -> ChunkTable const &
+{
+    return chunkTable;
+}
+auto mergeTable(chunk_assignment::Assignment const &assignment) -> ChunkTable
+{
+    ChunkTable merged;
+    merged.reserve(
+        std::transform_reduce(
+            assignment.begin(),
+            assignment.end(),
+            0u,
+            add,
+            [](chunk_assignment::Assignment::value_type const &pair) {
+                return pair.second.size();
+            }));
+    for (auto const &pair : assignment)
+    {
+        for (auto const &chunk : pair.second)
+        {
+            merged.insert(merged.end(), chunk);
+        }
+    }
+    return merged;
+}
+auto mergeTable(chunk_assignment::PartialAssignment const &assignment)
+{
+    auto const &[not_assigned, assigned] = assignment;
+    ChunkTable merged = mergeTable(assigned);
+    merged.reserve(merged.size() + not_assigned.size());
+    for (auto const &chunk : not_assigned)
+    {
+        merged.insert(merged.end(), chunk);
+    }
+    return merged;
+}
+
+template <typename ChunkTable1, typename ChunkTable2>
+auto equalTables(ChunkTable1 &&availableChunks, ChunkTable2 &&assignedChunks)
+{
+    return chunk_assignment::mergeChunksFromSameSourceID(
+               mergeTable(availableChunks)) ==
+        chunk_assignment::mergeChunksFromSameSourceID(
+               mergeTable(assignedChunks));
+}
+
+void verifyHostnameAssignment(
+    chunk_assignment::PartialAssignment const &assignment,
+    chunk_assignment::RankMeta const &in,
+    chunk_assignment::RankMeta const &out)
+{
+    REQUIRE(!assignment.assigned.empty());
+    for (auto const &[out_rank, chunks] : assignment.assigned)
+    {
+        for (auto const &chunk : chunks)
+        {
+            REQUIRE(in.at(chunk.sourceID) == out.at(out_rank));
+        }
+    }
+    for (auto const &chunk : assignment.notAssigned)
+    {
+        auto const &hostname = in.at(chunk.sourceID);
+        REQUIRE(
+            std::none_of(
+                out.begin(),
+                out.end(),
+                [&hostname](
+                    chunk_assignment::RankMeta::value_type const &pair) {
+                    return pair.second == hostname;
+                }));
+    }
+}
 } // namespace test_chunk_assignment
 
 TEST_CASE("chunk_assignment", "[core]")
@@ -124,13 +209,31 @@ TEST_CASE("chunk_assignment", "[core]")
     params.init(6, 2, 2, 1);
     test_chunk_assignment::print(params.metaSource, params.table);
     ByHostname byHostname(std::make_unique<RoundRobin>());
+
+    PartialAssignment partial_res0 = byHostname.assign(
+        params.table, params.metaSource, params.metaSink, 0, 2);
+    PartialAssignment partial_res1 = byHostname.assign(
+        params.table, params.metaSource, params.metaSink, 0, 2);
+
+    REQUIRE(partial_res0.notAssigned == partial_res1.notAssigned);
+    PartialAssignment partial_res{
+        partial_res0.notAssigned,
+        {{0, partial_res0.assigned[0]}, {1, partial_res1.assigned[1]}}};
+    test_chunk_assignment::verifyHostnameAssignment(
+        partial_res, params.metaSource, params.metaSink);
+
     FromPartialStrategy fullStrategy(
         std::make_unique<ByHostname>(std::move(byHostname)),
         std::make_unique<BinPacking>());
-    Assignment res = fullStrategy.assign(
+    Assignment res0 = fullStrategy.assign(
         params.table, params.metaSource, params.metaSink, 0, 2);
-    std::cout << "\nRESULTS:" << std::endl;
-    test_chunk_assignment::print(params.metaSink, res);
+    Assignment res1 = fullStrategy.assign(
+        params.table, params.metaSource, params.metaSink, 1, 2);
+    Assignment res = {{0, res0[0]}, {1, res1[1]}};
+
+    REQUIRE(test_chunk_assignment::equalTables(params.table, res));
+
+    test_chunk_assignment::print(params.metaSink, res1);
 }
 
 TEST_CASE("versions_test", "[core]")

From d82045cdb7f0f9f72fbf52d3f125b3cd1c22f2b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Fri, 18 Jul 2025 11:23:37 +0200
Subject: [PATCH 26/27] Guard against unprintableString issue on Windows

---
 test/CoreTest.cpp       |  8 +++++---
 test/ParallelIOTest.cpp | 35 ++++++++++++++++++++++++++---------
 2 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/test/CoreTest.cpp b/test/CoreTest.cpp
index 0e1034edf1..6c62e1d82e 100644
--- a/test/CoreTest.cpp
+++ b/test/CoreTest.cpp
@@ -190,7 +190,7 @@ void verifyHostnameAssignment(
     for (auto const &chunk : assignment.notAssigned)
     {
         auto const &hostname = in.at(chunk.sourceID);
-        REQUIRE(
+        OPENPMD_REQUIRE_GUARD_WINDOWS(
             std::none_of(
                 out.begin(),
                 out.end(),
@@ -215,7 +215,8 @@ TEST_CASE("chunk_assignment", "[core]")
     PartialAssignment partial_res1 = byHostname.assign(
         params.table, params.metaSource, params.metaSink, 0, 2);
 
-    REQUIRE(partial_res0.notAssigned == partial_res1.notAssigned);
+    OPENPMD_REQUIRE_GUARD_WINDOWS(
+        partial_res0.notAssigned == partial_res1.notAssigned);
     PartialAssignment partial_res{
         partial_res0.notAssigned,
         {{0, partial_res0.assigned[0]}, {1, partial_res1.assigned[1]}}};
@@ -231,7 +232,8 @@ TEST_CASE("chunk_assignment", "[core]")
         params.table, params.metaSource, params.metaSink, 1, 2);
     Assignment res = {{0, res0[0]}, {1, res1[1]}};
 
-    REQUIRE(test_chunk_assignment::equalTables(params.table, res));
+    OPENPMD_REQUIRE_GUARD_WINDOWS(
+        test_chunk_assignment::equalTables(params.table, res));
 
     test_chunk_assignment::print(params.metaSink, res1);
 }
diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp
index 1cffd47a79..0bf5b972c5 100644
--- a/test/ParallelIOTest.cpp
+++ b/test/ParallelIOTest.cpp
@@ -41,6 +41,21 @@ TEST_CASE("none", "[parallel]")
 #include <tuple>
 #include <vector>
 
+// On Windows, REQUIRE() might not be able to print more complex data structures
+// upon failure:
+// CoreTest.obj : error LNK2001: unresolved external symbol
+// "class std::string const Catch::Detail::unprintableString" (...)
+#ifdef _WIN32
+#define OPENPMD_REQUIRE_GUARD_WINDOWS(...)                                     \
+    do                                                                         \
+    {                                                                          \
+        bool guarded_require_boolean = __VA_ARGS__;                            \
+        REQUIRE(guarded_require_boolean);                                      \
+    } while (0);
+#else
+#define OPENPMD_REQUIRE_GUARD_WINDOWS(...) REQUIRE(__VA_ARGS__)
+#endif
+
 using namespace openPMD;
 
 TEST_CASE("parallel_multi_series_test", "[parallel]")
@@ -2350,13 +2365,14 @@ void verifyHostnameAssignment(
     {
         for (auto const &chunk : chunks)
         {
-            REQUIRE(in.at(chunk.sourceID) == out.at(out_rank));
+            OPENPMD_REQUIRE_GUARD_WINDOWS(
+                in.at(chunk.sourceID) == out.at(out_rank));
         }
     }
     for (auto const &chunk : assignment.notAssigned)
     {
         auto const &hostname = in.at(chunk.sourceID);
-        REQUIRE(
+        OPENPMD_REQUIRE_GUARD_WINDOWS(
             std::none_of(
                 out.begin(),
                 out.end(),
@@ -2515,7 +2531,7 @@ void run_test()
          * are running on the same nodes.
          */
         auto rankMetaIn = series.rankTable(/* collective = */ true);
-        REQUIRE(rankMetaIn == writingRanksHostnames);
+        OPENPMD_REQUIRE_GUARD_WINDOWS(rankMetaIn == writingRanksHostnames);
 
         auto E_x = series.iterations[0].meshes["E"]["x"];
         /*
@@ -2536,7 +2552,8 @@ void run_test()
             chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size);
         printAssignment(
             "ROUND ROBIN", roundRobinAssignment, readingRanksHostnames);
-        REQUIRE(equalTables(chunkTable, roundRobinAssignment));
+        OPENPMD_REQUIRE_GUARD_WINDOWS(
+            equalTables(chunkTable, roundRobinAssignment));
 
         /*
          * Assign chunks by hostname.
@@ -2560,7 +2577,7 @@ void run_test()
             "HOSTNAME, LEFTOVER",
             byHostnamePartialAssignment.notAssigned,
             rankMetaIn);
-        REQUIRE(equalDisjointByVolume(
+        OPENPMD_REQUIRE_GUARD_WINDOWS(equalDisjointByVolume(
             chunkTable,
             // Must restrict assignment to current rank, since
             // ByHostname strategy output *may* also contain chunks from
@@ -2590,7 +2607,7 @@ void run_test()
             "HOSTNAME2, LEFTOVER",
             byHostnamePartialAssignment2.notAssigned,
             rankMetaIn);
-        REQUIRE(equalDisjointByVolume(
+        OPENPMD_REQUIRE_GUARD_WINDOWS(equalDisjointByVolume(
             chunkTable,
             // Must restrict assignment to current rank, since
             // ByHostname strategy output *may* also contain chunks from
@@ -2622,7 +2639,7 @@ void run_test()
             "HOSTNAME WITH SECOND PASS",
             fromPartialAssignment,
             readingRanksHostnames);
-        REQUIRE(equalDisjointByVolume(
+        OPENPMD_REQUIRE_GUARD_WINDOWS(equalDisjointByVolume(
             chunkTable,
             // Must restrict assignment to current rank, since
             // ByHostname strategy output *may* also contain chunks from
@@ -2661,7 +2678,7 @@ void run_test()
         auto blocksAssignment = blocksStrategy.assign(
             chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size);
         printAssignment("BLOCKS", blocksAssignment, readingRanksHostnames);
-        REQUIRE(equalDisjointByVolume(
+        OPENPMD_REQUIRE_GUARD_WINDOWS(equalDisjointByVolume(
             chunkTable, blocksAssignment, std::nullopt, MPI_COMM_WORLD));
 
         BlocksOfSourceRanks blocksOfSourceRanksStrategy;
@@ -2671,7 +2688,7 @@ void run_test()
             "BLOCKS OF SOURCE RANKS",
             blocksOfSourceRanksAssignment,
             readingRanksHostnames);
-        REQUIRE(equalDisjointByVolume(
+        OPENPMD_REQUIRE_GUARD_WINDOWS(equalDisjointByVolume(
             chunkTable,
             blocksOfSourceRanksAssignment,
             std::nullopt,

From 62e3bf05cd8ad36c8422e7fefdd137a8653fb622 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Thu, 7 Aug 2025 18:11:54 +0200
Subject: [PATCH 27/27] Use generic flag instead of -n 2

---
 CMakeLists.txt | 48 ++++++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 49b62a8bbd..ddd2cfd716 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1346,30 +1346,30 @@ if(openPMD_BUILD_TESTING)
                 )
                 add_test(NAME CLI.pipe.py
                     COMMAND sh -c
-                        "${MPI_TEST_EXE} -n 2 ${Python_EXECUTABLE}                 \
-                            ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe       \
-                            --infile ../samples/git-sample/data%T.h5               \
-                            --outfile ../samples/git-sample/data%T.bp &&           \
-                                                                                   \
-                        ${MPI_TEST_EXE} ${Python_EXECUTABLE}                       \
-                            ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe       \
-                            --infile ../samples/git-sample/data00000100.h5         \
-                            --outfile                                              \
-                                ../samples/git-sample/single_iteration_%T.bp &&    \
-                                                                                   \
-                        ${MPI_TEST_EXE} -n 2 ${Python_EXECUTABLE}                  \
-                            ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe       \
-                            --infile ../samples/git-sample/thetaMode/data%T.h5     \
-                            --outfile                                              \
-                                ../samples/git-sample/thetaMode/data_%T.bp &&      \
-                                                                                   \
-                        ${MPI_TEST_EXE} ${Python_EXECUTABLE}                       \
-                            ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe       \
-                            --infile ../samples/git-sample/thetaMode/data_%T.bp    \
-                            --outfile ../samples/git-sample/thetaMode/data%T.json  \
-                            --outconfig '                                          \
-                                json.attribute.mode = \"short\"                  \n\
-                                json.dataset.mode = \"template_no_warn\"'          \
+                        "${MPI_TEST_EXE} ${MPIEXEC_NUMPROC_FLAG} 2 ${Python_EXECUTABLE} \
+                            ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe            \
+                            --infile ../samples/git-sample/data%T.h5                    \
+                            --outfile ../samples/git-sample/data%T.bp &&                \
+                                                                                        \
+                        ${MPI_TEST_EXE} ${Python_EXECUTABLE}                            \
+                            ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe            \
+                            --infile ../samples/git-sample/data00000100.h5              \
+                            --outfile                                                   \
+                                ../samples/git-sample/single_iteration_%T.bp &&         \
+                                                                                        \
+                        ${MPI_TEST_EXE} ${MPIEXEC_NUMPROC_FLAG} 2 ${Python_EXECUTABLE}  \
+                            ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe            \
+                            --infile ../samples/git-sample/thetaMode/data%T.h5          \
+                            --outfile                                                   \
+                                ../samples/git-sample/thetaMode/data_%T.bp &&           \
+                                                                                        \
+                        ${MPI_TEST_EXE} ${Python_EXECUTABLE}                            \
+                            ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe            \
+                            --infile ../samples/git-sample/thetaMode/data_%T.bp         \
+                            --outfile ../samples/git-sample/thetaMode/data%T.json       \
+                            --outconfig '                                               \
+                                json.attribute.mode = \"short\"                       \n\
+                                json.dataset.mode = \"template_no_warn\"'               \
                         "
                     WORKING_DIRECTORY ${openPMD_RUNTIME_OUTPUT_DIRECTORY}
                 )