From a6eabfb7f677509244c4f53daacdca85a182ce4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 2 Mar 2023 13:34:07 +0100 Subject: [PATCH 01/27] Add chunk distribution algorithms --- include/openPMD/ChunkInfo.hpp | 237 +++++++++ include/openPMD/benchmark/mpi/BlockSlicer.hpp | 4 + .../mpi/OneDimensionalBlockSlicer.hpp | 2 + src/ChunkInfo.cpp | 492 ++++++++++++++++++ .../mpi/OneDimensionalBlockSlicer.cpp | 5 + 5 files changed, 740 insertions(+) diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp index 9bc6e94972..b44379b2aa 100644 --- a/include/openPMD/ChunkInfo.hpp +++ b/include/openPMD/ChunkInfo.hpp @@ -23,6 +23,7 @@ #include "openPMD/config.hpp" #include "openPMD/Dataset.hpp" // Offset, Extent +#include "openPMD/benchmark/mpi/BlockSlicer.hpp" #if openPMD_HAVE_MPI #include @@ -84,7 +85,243 @@ using ChunkTable = std::vector; namespace chunk_assignment { + constexpr char const *HOSTFILE_VARNAME = "MPI_WRITTEN_HOSTFILE"; + using RankMeta = std::map; + + using Assignment = std::map>; + + struct PartialAssignment + { + ChunkTable notAssigned; + Assignment assigned; + + explicit PartialAssignment() = default; + PartialAssignment(ChunkTable notAssigned); + PartialAssignment(ChunkTable notAssigned, Assignment assigned); + }; + + /** + * @brief Interface for a chunk distribution strategy. + * + * Used for implementing algorithms that read a ChunkTable as produced + * by BaseRecordComponent::availableChunks() and produce as result a + * ChunkTable that guides data sinks on how to load data into reading + * processes. + */ + struct Strategy + { + Assignment assign( + ChunkTable, + RankMeta const &rankMetaIn, + RankMeta const &rankMetaOut); + /** + * @brief Assign chunks to be loaded to reading processes. + * + * @param partialAssignment Two chunktables, one of unassigned chunks + * and one of chunks that might have already been assigned + * previously. + * Merge the unassigned chunks into the partially assigned table. + * @param in Meta information on writing processes, e.g. hostnames. + * @param out Meta information on reading processes, e.g. hostnames. + * @return ChunkTable A table that assigns chunks to reading processes. + */ + virtual Assignment assign( + PartialAssignment partialAssignment, + RankMeta const &in, + RankMeta const &out) = 0; + + virtual std::unique_ptr clone() const = 0; + + virtual ~Strategy() = default; + }; + + /** + * @brief A chunk distribution strategy that guarantees no complete + * distribution. + * + * Combine with a full Strategy using the FromPartialStrategy struct to + * obtain a Strategy that works in two phases: + * 1. Apply the partial strategy. + * 2. Apply the full strategy to assign unassigned leftovers. + * + */ + struct PartialStrategy + { + PartialAssignment + assign(ChunkTable table, RankMeta const &in, RankMeta const &out); + /** + * @brief Assign chunks to be loaded to reading processes. + * + * @param partialAssignment Two chunktables, one of unassigned chunks + * and one of chunks that might have already been assigned + * previously. + * Merge the unassigned chunks into the partially assigned table. + * @param in Meta information on writing processes, e.g. hostnames. + * @param out Meta information on reading processes, e.g. hostnames. + * @return PartialAssignment Two chunktables, one of leftover chunks + * that were not assigned and one that assigns chunks to + * reading processes. + */ + virtual PartialAssignment assign( + PartialAssignment partialAssignment, + RankMeta const &in, + RankMeta const &out) = 0; + + virtual std::unique_ptr clone() const = 0; + + virtual ~PartialStrategy() = default; + }; + + /** + * @brief Combine a PartialStrategy and a Strategy to obtain a Strategy + * working in two phases. + * + * 1. Apply the PartialStrategy to obtain a PartialAssignment. + * This may be a heuristic that will not work under all circumstances, + * e.g. trying to distribute chunks within the same compute node. + * 2. Apply the Strategy to assign leftovers. + * This guarantees correctness in case the heuristics in the first phase + * were not applicable e.g. due to a suboptimal setup. + * + */ + struct FromPartialStrategy : Strategy + { + FromPartialStrategy( + std::unique_ptr firstPass, + std::unique_ptr secondPass); + + virtual Assignment assign( + PartialAssignment, + RankMeta const &in, + RankMeta const &out) override; + + virtual std::unique_ptr clone() const override; + + private: + std::unique_ptr m_firstPass; + std::unique_ptr m_secondPass; + }; + + /** + * @brief Simple strategy that assigns produced chunks to reading processes + * in a round-Robin manner. + * + */ + struct RoundRobin : Strategy + { + Assignment assign( + PartialAssignment, + RankMeta const &in, + RankMeta const &out) override; + + virtual std::unique_ptr clone() const override; + }; + + /** + * @brief Strategy that assigns chunks to be read by processes within + * the same host that produced the chunk. + * + * The distribution strategy within one such chunk can be flexibly + * chosen. + * + */ + struct ByHostname : PartialStrategy + { + ByHostname(std::unique_ptr withinNode); + + PartialAssignment assign( + PartialAssignment, + RankMeta const &in, + RankMeta const &out) override; + + virtual std::unique_ptr clone() const override; + + private: + std::unique_ptr m_withinNode; + }; + + /** + * @brief Slice the n-dimensional dataset into hyperslabs and distribute + * chunks according to them. + * + * This strategy only produces chunks in the returned ChunkTable for the + * calling parallel process. + * Incoming chunks are intersected with the hyperslab and assigned to the + * current parallel process in case this intersection is non-empty. + * + */ + struct ByCuboidSlice : Strategy + { + ByCuboidSlice( + std::unique_ptr blockSlicer, + Extent totalExtent, + unsigned int mpi_rank, + unsigned int mpi_size); + + Assignment assign( + PartialAssignment, + RankMeta const &in, + RankMeta const &out) override; + + virtual std::unique_ptr clone() const override; + + private: + std::unique_ptr blockSlicer; + Extent totalExtent; + unsigned int mpi_rank, mpi_size; + }; + + /** + * @brief Strategy that tries to assign chunks in a balanced manner without + * arbitrarily cutting chunks. + * + * Idea: + * Calculate the ideal amount of data to be loaded per parallel process + * and cut chunks s.t. no chunk is larger than that ideal size. + * The resulting problem is an instance of the Bin-Packing problem which + * can be solved by a factor-2 approximation, meaning that a reading process + * will be assigned at worst twice the ideal amount of data. + * + */ + struct BinPacking : Strategy + { + size_t splitAlongDimension = 0; + + /** + * @param splitAlongDimension If a chunk needs to be split, split it + * along this dimension. + */ + BinPacking(size_t splitAlongDimension = 0); + + Assignment assign( + PartialAssignment, + RankMeta const &in, + RankMeta const &out) override; + + virtual std::unique_ptr clone() const override; + }; + + /** + * @brief Strategy that purposefully fails when the PartialAssignment has + * leftover chunks. + * + * Useful as second phase in FromPartialStrategy to assert that the first + * pass of the strategy catches all blocks, e.g. to assert that all chunks + * can be assigned within the same compute node. + * + */ + struct FailingStrategy : Strategy + { + explicit FailingStrategy(); + + Assignment assign( + PartialAssignment, + RankMeta const &in, + RankMeta const &out) override; + + virtual std::unique_ptr clone() const override; + }; } // namespace chunk_assignment namespace host_info diff --git a/include/openPMD/benchmark/mpi/BlockSlicer.hpp b/include/openPMD/benchmark/mpi/BlockSlicer.hpp index c66716217a..a720793b41 100644 --- a/include/openPMD/benchmark/mpi/BlockSlicer.hpp +++ b/include/openPMD/benchmark/mpi/BlockSlicer.hpp @@ -23,6 +23,8 @@ #include "openPMD/Dataset.hpp" +#include + namespace openPMD { /** @@ -42,6 +44,8 @@ class BlockSlicer virtual std::pair sliceBlock(Extent &totalExtent, int size, int rank) = 0; + virtual std::unique_ptr clone() const = 0; + /** This class will be derived from */ virtual ~BlockSlicer() = default; diff --git a/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp b/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp index 78f955524b..cb12da9350 100644 --- a/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp +++ b/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp @@ -35,5 +35,7 @@ class OneDimensionalBlockSlicer : public BlockSlicer std::pair sliceBlock(Extent &totalExtent, int size, int rank) override; + + virtual std::unique_ptr clone() const override; }; } // namespace openPMD diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp index 5acb1ea07e..7b6c1e32ca 100644 --- a/src/ChunkInfo.cpp +++ b/src/ChunkInfo.cpp @@ -23,6 +23,10 @@ #include "openPMD/auxiliary/Mpi.hpp" +#include // std::sort +#include +#include +#include #include #ifdef _WIN32 @@ -62,6 +66,494 @@ bool WrittenChunkInfo::operator==(WrittenChunkInfo const &other) const this->ChunkInfo::operator==(other); } +namespace chunk_assignment +{ + namespace + { + std::map > + ranksPerHost(RankMeta const &rankMeta) + { + std::map > res; + for (auto const &pair : rankMeta) + { + auto &list = res[pair.second]; + list.emplace_back(pair.first); + } + return res; + } + } // namespace + + Assignment Strategy::assign( + ChunkTable table, RankMeta const &rankIn, RankMeta const &rankOut) + { + if (rankOut.size() == 0) + { + throw std::runtime_error("[assignChunks] No output ranks defined"); + } + return this->assign( + PartialAssignment(std::move(table)), rankIn, rankOut); + } + + PartialAssignment::PartialAssignment( + ChunkTable notAssigned_in, Assignment assigned_in) + : notAssigned(std::move(notAssigned_in)) + , assigned(std::move(assigned_in)) + {} + + PartialAssignment::PartialAssignment(ChunkTable notAssigned_in) + : PartialAssignment(std::move(notAssigned_in), Assignment()) + {} + + PartialAssignment PartialStrategy::assign( + ChunkTable table, RankMeta const &rankIn, RankMeta const &rankOut) + { + return this->assign( + PartialAssignment(std::move(table)), rankIn, rankOut); + } + + FromPartialStrategy::FromPartialStrategy( + std::unique_ptr firstPass, + std::unique_ptr secondPass) + : m_firstPass(std::move(firstPass)), m_secondPass(std::move(secondPass)) + {} + + Assignment FromPartialStrategy::assign( + PartialAssignment partialAssignment, + RankMeta const &in, + RankMeta const &out) + { + return m_secondPass->assign( + m_firstPass->assign(std::move(partialAssignment), in, out), + in, + out); + } + + std::unique_ptr FromPartialStrategy::clone() const + { + return std::unique_ptr(new FromPartialStrategy( + m_firstPass->clone(), m_secondPass->clone())); + } + + Assignment RoundRobin::assign( + PartialAssignment partialAssignment, + RankMeta const &, // ignored parameter + RankMeta const &out) + { + if (out.size() == 0) + { + throw std::runtime_error( + "[RoundRobin] Cannot round-robin to zero ranks."); + } + auto it = out.begin(); + auto nextRank = [&it, &out]() { + if (it == out.end()) + { + it = out.begin(); + } + auto res = it->first; + it++; + return res; + }; + ChunkTable &sourceChunks = partialAssignment.notAssigned; + Assignment &sinkChunks = partialAssignment.assigned; + for (auto &chunk : sourceChunks) + { + chunk.sourceID = nextRank(); + sinkChunks[chunk.sourceID].push_back(std::move(chunk)); + } + return sinkChunks; + } + + std::unique_ptr RoundRobin::clone() const + { + return std::unique_ptr(new RoundRobin); + } + + ByHostname::ByHostname(std::unique_ptr withinNode) + : m_withinNode(std::move(withinNode)) + {} + + PartialAssignment ByHostname::assign( + PartialAssignment res, RankMeta const &in, RankMeta const &out) + { + // collect chunks by hostname + std::map chunkGroups; + ChunkTable &sourceChunks = res.notAssigned; + Assignment &sinkChunks = res.assigned; + { + ChunkTable leftover; + for (auto &chunk : sourceChunks) + { + auto it = in.find(chunk.sourceID); + if (it == in.end()) + { + leftover.push_back(std::move(chunk)); + } + else + { + std::string const &hostname = it->second; + ChunkTable &chunksOnHost = chunkGroups[hostname]; + chunksOnHost.push_back(std::move(chunk)); + } + } + // undistributed chunks will be put back in later on + sourceChunks.clear(); + for (auto &chunk : leftover) + { + sourceChunks.push_back(std::move(chunk)); + } + } + // chunkGroups will now contain chunks by hostname + // the ranks are the source ranks + + // which ranks live on host in the sink? + std::map > ranksPerHostSink = + ranksPerHost(out); + for (auto &chunkGroup : chunkGroups) + { + std::string const &hostname = chunkGroup.first; + // find reading ranks on the sink host with same name + auto it = ranksPerHostSink.find(hostname); + if (it == ranksPerHostSink.end() || it->second.empty()) + { + /* + * These are leftover, go back to the input. + */ + for (auto &chunk : chunkGroup.second) + { + sourceChunks.push_back(std::move(chunk)); + } + } + else + { + RankMeta ranksOnTargetNode; + for (unsigned int rank : it->second) + { + ranksOnTargetNode[rank] = hostname; + } + Assignment swapped; + swapped.swap(sinkChunks); + sinkChunks = m_withinNode->assign( + PartialAssignment(chunkGroup.second, std::move(swapped)), + in, + ranksOnTargetNode); + } + } + return res; + } + + std::unique_ptr ByHostname::clone() const + { + return std::unique_ptr( + new ByHostname(m_withinNode->clone())); + } + + ByCuboidSlice::ByCuboidSlice( + std::unique_ptr blockSlicer_in, + Extent totalExtent_in, + unsigned int mpi_rank_in, + unsigned int mpi_size_in) + : blockSlicer(std::move(blockSlicer_in)) + , totalExtent(std::move(totalExtent_in)) + , mpi_rank(mpi_rank_in) + , mpi_size(mpi_size_in) + {} + + namespace + { + /** + * @brief Compute the intersection of two chunks. + * + * @param offset Offset of chunk 1, result will be written in place. + * @param extent Extent of chunk 1, result will be written in place. + * @param withinOffset Offset of chunk 2. + * @param withinExtent Extent of chunk 2. + */ + void restrictToSelection( + Offset &offset, + Extent &extent, + Offset const &withinOffset, + Extent const &withinExtent) + { + for (size_t i = 0; i < offset.size(); ++i) + { + if (offset[i] < withinOffset[i]) + { + auto delta = withinOffset[i] - offset[i]; + offset[i] = withinOffset[i]; + if (delta > extent[i]) + { + extent[i] = 0; + } + else + { + extent[i] -= delta; + } + } + auto totalExtent = extent[i] + offset[i]; + auto totalWithinExtent = withinExtent[i] + withinOffset[i]; + if (totalExtent > totalWithinExtent) + { + auto delta = totalExtent - totalWithinExtent; + if (delta > extent[i]) + { + extent[i] = 0; + } + else + { + extent[i] -= delta; + } + } + } + } + + struct SizedChunk + { + WrittenChunkInfo chunk; + size_t dataSize; + + SizedChunk(WrittenChunkInfo chunk_in, size_t dataSize_in) + : chunk(std::move(chunk_in)), dataSize(dataSize_in) + {} + }; + + /** + * @brief Slice chunks to a maximum size and sort those by size. + * + * Chunks are sliced into hyperslabs along a specified dimension. + * Returned chunks may be larger than the specified maximum size + * if hyperslabs of thickness 1 are larger than that size. + * + * @param table Chunks of arbitrary sizes. + * @param maxSize The maximum size that returned chunks should have. + * @param dimension The dimension along which to create hyperslabs. + */ + std::vector splitToSizeSorted( + ChunkTable const &table, size_t maxSize, size_t const dimension = 0) + { + std::vector res; + for (auto const &chunk : table) + { + auto const &extent = chunk.extent; + size_t sliceSize = 1; + for (size_t i = 0; i < extent.size(); ++i) + { + if (i == dimension) + { + continue; + } + sliceSize *= extent[i]; + } + if (sliceSize == 0) + { + std::cerr << "Chunktable::splitToSizeSorted: encountered " + "zero-sized chunk" + << std::endl; + continue; + } + + // this many slices go in one packet before it exceeds the max + // size + size_t streakLength = maxSize / sliceSize; + if (streakLength == 0) + { + // otherwise we get caught in an endless loop + ++streakLength; + } + size_t const slicedDimensionExtent = extent[dimension]; + + for (size_t currentPosition = 0;; + currentPosition += streakLength) + { + WrittenChunkInfo newChunk = chunk; + newChunk.offset[dimension] += currentPosition; + if (currentPosition + streakLength >= slicedDimensionExtent) + { + newChunk.extent[dimension] = + slicedDimensionExtent - currentPosition; + size_t chunkSize = + newChunk.extent[dimension] * sliceSize; + res.emplace_back(std::move(newChunk), chunkSize); + break; + } + else + { + newChunk.extent[dimension] = streakLength; + res.emplace_back( + std::move(newChunk), streakLength * sliceSize); + } + } + } + std::sort( + res.begin(), + res.end(), + [](SizedChunk const &left, SizedChunk const &right) { + return right.dataSize < left.dataSize; // decreasing order + }); + return res; + } + } // namespace + + Assignment ByCuboidSlice::assign( + PartialAssignment res, RankMeta const &, RankMeta const &) + { + ChunkTable &sourceSide = res.notAssigned; + Assignment &sinkSide = res.assigned; + Offset myOffset; + Extent myExtent; + std::tie(myOffset, myExtent) = + blockSlicer->sliceBlock(totalExtent, mpi_size, mpi_rank); + + for (auto &chunk : sourceSide) + { + restrictToSelection(chunk.offset, chunk.extent, myOffset, myExtent); + for (auto ext : chunk.extent) + { + if (ext == 0) + { + goto outer_loop; + } + } + sinkSide[mpi_rank].push_back(std::move(chunk)); + outer_loop:; + } + + return res.assigned; + } + + std::unique_ptr ByCuboidSlice::clone() const + { + return std::unique_ptr(new ByCuboidSlice( + blockSlicer->clone(), totalExtent, mpi_rank, mpi_size)); + } + + BinPacking::BinPacking(size_t splitAlongDimension_in) + : splitAlongDimension(splitAlongDimension_in) + {} + + Assignment BinPacking::assign( + PartialAssignment res, RankMeta const &, RankMeta const &sinkRanks) + { + ChunkTable &sourceChunks = res.notAssigned; + Assignment &sinkChunks = res.assigned; + size_t totalExtent = 0; + for (auto const &chunk : sourceChunks) + { + size_t chunkExtent = 1; + for (auto ext : chunk.extent) + { + chunkExtent *= ext; + } + totalExtent += chunkExtent; + } + size_t const idealSize = totalExtent / sinkRanks.size(); + /* + * Split chunks into subchunks of size at most idealSize. + * The resulting list of chunks is sorted by chunk size in decreasing + * order. This is important for the greedy Bin-Packing approximation + * algorithm. + * Under sub-ideal circumstances, chunks may not be splittable small + * enough. This algorithm will still produce results just fine in that + * case, but it will not keep the factor-2 approximation. + */ + std::vector digestibleChunks = + splitToSizeSorted(sourceChunks, idealSize, splitAlongDimension); + + /* + * Worker lambda: Iterate the reading processes once and greedily assign + * the largest chunks to them without exceeding idealSize amount of + * data per process. + */ + auto worker = + [&sinkRanks, &digestibleChunks, &sinkChunks, idealSize]() { + for (auto const &destRank : sinkRanks) + { + /* + * Within the second call of the worker lambda, this will + * not be true any longer, strictly speaking. The trick of + * this algorithm is to pretend that it is. + */ + size_t leftoverSize = idealSize; + { + auto it = digestibleChunks.begin(); + while (it != digestibleChunks.end()) + { + if (it->dataSize >= idealSize) + { + /* + * This branch is only taken if it was not + * possible to slice chunks small enough -- or + * exactly the right size. In any case, the + * chunk will be the only one assigned to the + * process within this call of the worker + * lambda, so the loop can be broken out of. + */ + sinkChunks[destRank.first].push_back( + std::move(it->chunk)); + digestibleChunks.erase(it); + break; + } + else if (it->dataSize <= leftoverSize) + { + // assign smaller chunks as long as they fit + sinkChunks[destRank.first].push_back( + std::move(it->chunk)); + leftoverSize -= it->dataSize; + it = digestibleChunks.erase(it); + } + else + { + // look for smaller chunks + ++it; + } + } + } + } + }; + // sic! + // run the worker twice to implement a factor-two approximation + // of the bin packing problem + worker(); + worker(); + /* + * By the nature of the greedy approach, each iteration of the outer + * for loop in the worker assigns chunks to the current rank that sum + * up to at least more than half of the allowed idealSize. (Until it + * runs out of chunks). + * This means that calling the worker twice guarantees a full + * distribution. + */ + + return sinkChunks; + } + + std::unique_ptr BinPacking::clone() const + { + return std::unique_ptr(new BinPacking(splitAlongDimension)); + } + + FailingStrategy::FailingStrategy() = default; + + Assignment FailingStrategy::assign( + PartialAssignment assignment, RankMeta const &, RankMeta const &) + { + if (assignment.notAssigned.empty()) + { + return assignment.assigned; + } + else + { + throw std::runtime_error( + "[FailingStrategy] There are unassigned chunks!"); + } + } + + std::unique_ptr FailingStrategy::clone() const + { + return std::make_unique(); + } +} // namespace chunk_assignment + namespace host_info { constexpr size_t MAX_HOSTNAME_LENGTH = 256; diff --git a/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp b/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp index e494b175de..7fbb734faa 100644 --- a/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp +++ b/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp @@ -72,4 +72,9 @@ OneDimensionalBlockSlicer::sliceBlock(Extent &totalExtent, int size, int rank) } return std::make_pair(std::move(offs), std::move(localExtent)); } + +std::unique_ptr OneDimensionalBlockSlicer::clone() const +{ + return std::unique_ptr(new OneDimensionalBlockSlicer(m_dim)); +} } // namespace openPMD From e2acd494b7830686fd2220c07fb3f3435144b09f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 2 Mar 2023 13:46:10 +0100 Subject: [PATCH 02/27] Python bindings: Chunk distribution algorithms --- include/openPMD/binding/python/Common.hpp | 10 +++ src/binding/python/ChunkInfo.cpp | 89 +++++++++++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/include/openPMD/binding/python/Common.hpp b/include/openPMD/binding/python/Common.hpp index c72d72ce83..b21d490070 100644 --- a/include/openPMD/binding/python/Common.hpp +++ b/include/openPMD/binding/python/Common.hpp @@ -8,6 +8,7 @@ */ #pragma once +#include "openPMD/ChunkInfo.hpp" #include "openPMD/Iteration.hpp" #include "openPMD/Mesh.hpp" #include "openPMD/ParticlePatches.hpp" @@ -33,6 +34,15 @@ // not yet used: // pybind11/functional.h // for std::function +using PyVecChunkInfo = std::vector; + +PYBIND11_MAKE_OPAQUE(openPMD::ChunkInfo) +PYBIND11_MAKE_OPAQUE(PyVecChunkInfo) +PYBIND11_MAKE_OPAQUE(openPMD::WrittenChunkInfo) +PYBIND11_MAKE_OPAQUE(openPMD::ChunkTable) +PYBIND11_MAKE_OPAQUE(openPMD::chunk_assignment::Assignment) +PYBIND11_MAKE_OPAQUE(openPMD::chunk_assignment::PartialAssignment) + // used exclusively in all our Python .cpp files namespace py = pybind11; using namespace openPMD; diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp index 3d0837d504..76deca3a10 100644 --- a/src/binding/python/ChunkInfo.cpp +++ b/src/binding/python/ChunkInfo.cpp @@ -19,12 +19,14 @@ * If not, see . */ #include "openPMD/ChunkInfo.hpp" +#include "openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp" #include "openPMD/binding/python/Mpi.hpp" #include "openPMD/binding/python/Common.hpp" #include #include +#include // std::move void init_Chunk(py::module &m) { @@ -102,4 +104,91 @@ void init_Chunk(py::module &m) return host_info::byMethod(self); }) .def("available", &host_info::methodAvailable); + + + using namespace chunk_assignment; + + (void)py::class_(m, "PartialStrategy"); + + py::class_(m, "PartialStrategy") + .def( + "assign", + py::overload_cast( + &PartialStrategy::assign), + py::arg("chunk_table"), + py::arg("rank_meta_in") = RankMeta(), + py::arg("rank_meta_out") = RankMeta()) + .def( + "assign", + py::overload_cast< + PartialAssignment, + RankMeta const &, + RankMeta const &>(&PartialStrategy::assign), + py::arg("partial_assignment"), + py::arg("rank_meta_in") = RankMeta(), + py::arg("rank_meta_out") = RankMeta()); + + py::class_(m, "Strategy") + .def( + "assign", + py::overload_cast( + &Strategy::assign), + py::arg("chunk_table"), + py::arg("rank_meta_in") = RankMeta(), + py::arg("rank_meta_out") = RankMeta()) + .def( + "assign", + py::overload_cast< + PartialAssignment, + RankMeta const &, + RankMeta const &>(&Strategy::assign), + py::arg("partial_assignment"), + py::arg("rank_meta_in") = RankMeta(), + py::arg("rank_meta_out") = RankMeta()); + + py::class_(m, "FromPartialStrategy") + .def(py::init([](PartialStrategy const &firstPass, + Strategy const &secondPass) { + return FromPartialStrategy(firstPass.clone(), secondPass.clone()); + })); + + py::class_(m, "RoundRobin").def(py::init<>()); + + py::class_(m, "ByHostname") + .def( + py::init([](Strategy const &withinNode) { + return ByHostname(withinNode.clone()); + }), + py::arg("strategy_within_node")); + + (void)py::class_(m, "BlockSlicer"); + + py::class_( + m, "OneDimensionalBlockSlicer") + .def(py::init<>()) + .def(py::init(), py::arg("dim")); + + py::class_(m, "ByCuboidSlice") + .def( + py::init([](BlockSlicer const &blockSlicer, + Extent totalExtent, + unsigned int mpi_rank, + unsigned int mpi_size) { + return ByCuboidSlice( + blockSlicer.clone(), + std::move(totalExtent), + mpi_rank, + mpi_size); + }), + py::arg("block_slicer"), + py::arg("total_extent"), + py::arg("mpi_rank"), + py::arg("mpi_size")); + + py::class_(m, "BinPacking") + .def(py::init<>()) + .def(py::init(), py::arg("split_along_dimension")); + + py::class_(m, "FailingStrategy") + .def(py::init<>()); } From 72da3986b3277a012fa925c4ddec80e77462f17d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 2 Mar 2023 13:47:06 +0100 Subject: [PATCH 03/27] Use chunk distribtion algorithms in openpmd-pipe --- .../python/openpmd_api/pipe/__main__.py | 145 +++++++++--------- 1 file changed, 70 insertions(+), 75 deletions(-) diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py index 7bd5305c56..96802530b0 100644 --- a/src/binding/python/openpmd_api/pipe/__main__.py +++ b/src/binding/python/openpmd_api/pipe/__main__.py @@ -10,6 +10,7 @@ """ import argparse import os # os.path.basename +import re import sys # sys.stderr.write from .. import openpmd_api_cxx as io @@ -39,8 +40,14 @@ def parse_args(program_name): By default, the openPMD-api will be initialized without an MPI communicator if the MPI size is 1. This is to simplify the use of the JSON backend which is only available in serial openPMD. -With parallelization enabled, each dataset will be equally sliced along -the dimension with the largest extent. +With parallelization enabled, each dataset will be equally sliced according to +a chunk distribution strategy which may be selected via the environment +variable OPENPMD_CHUNK_DISTRIBUTION. Options include "roundrobin", +"binpacking", "slicedataset" and "hostname_<1>_<2>", where <1> should be +replaced with a strategy to be applied within a compute node and <2> with a +secondary strategy in case the hostname strategy does not distribute +all chunks. +The default is `hostname_binpacking_slicedataset`. Examples: {0} --infile simData.h5 --outfile simData_%T.bp @@ -99,65 +106,6 @@ def __init__(self): self.rank = 0 -class Chunk: - """ - A Chunk is an n-dimensional hypercube, defined by an offset and an extent. - Offset and extent must be of the same dimensionality (Chunk.__len__). - """ - def __init__(self, offset, extent): - assert (len(offset) == len(extent)) - self.offset = offset - self.extent = extent - - def __len__(self): - return len(self.offset) - - def slice1D(self, mpi_rank, mpi_size, dimension=None): - """ - Slice this chunk into mpi_size hypercubes along one of its - n dimensions. The dimension is given through the 'dimension' - parameter. If None, the dimension with the largest extent on - this hypercube is automatically picked. - Returns the mpi_rank'th of the sliced chunks. - """ - if dimension is None: - # pick that dimension which has the highest count of items - dimension = 0 - maximum = self.extent[0] - for k, v in enumerate(self.extent): - if v > maximum: - dimension = k - assert (dimension < len(self)) - # no offset - assert (self.offset == [0 for _ in range(len(self))]) - offset = [0 for _ in range(len(self))] - stride = self.extent[dimension] // mpi_size - rest = self.extent[dimension] % mpi_size - - # local function f computes the offset of a rank - # for more equal balancing, we want the start index - # at the upper gaussian bracket of (N/n*rank) - # where N the size of the dataset in dimension dim - # and n the MPI size - # for avoiding integer overflow, this is the same as: - # (N div n)*rank + round((N%n)/n*rank) - def f(rank): - res = stride * rank - padDivident = rest * rank - pad = padDivident // mpi_size - if pad * mpi_size < padDivident: - pad += 1 - return res + pad - - offset[dimension] = f(mpi_rank) - extent = self.extent.copy() - if mpi_rank >= mpi_size - 1: - extent[dimension] -= offset[dimension] - else: - extent[dimension] = f(mpi_rank + 1) - offset[dimension] - return Chunk(offset, extent) - - class deferred_load: def __init__(self, source, dynamicView, offset, extent): self.source = source @@ -166,6 +114,42 @@ def __init__(self, source, dynamicView, offset, extent): self.extent = extent +def distribution_strategy(dataset_extent, + mpi_rank, + mpi_size, + strategy_identifier=None): + if strategy_identifier is None or not strategy_identifier: + if 'OPENPMD_CHUNK_DISTRIBUTION' in os.environ: + strategy_identifier = os.environ[ + 'OPENPMD_CHUNK_DISTRIBUTION'].lower() + else: + strategy_identifier = 'hostname_binpacking_slicedataset' # default + match = re.search('hostname_(.*)_(.*)', strategy_identifier) + if match is not None: + inside_node = distribution_strategy(dataset_extent, + mpi_rank, + mpi_size, + strategy_identifier=match.group(1)) + second_phase = distribution_strategy( + dataset_extent, + mpi_rank, + mpi_size, + strategy_identifier=match.group(2)) + return io.FromPartialStrategy(io.ByHostname(inside_node), second_phase) + elif strategy_identifier == 'roundrobin': + return io.RoundRobin() + elif strategy_identifier == 'binpacking': + return io.BinPacking() + elif strategy_identifier == 'slicedataset': + return io.ByCuboidSlice(io.OneDimensionalBlockSlicer(), dataset_extent, + mpi_rank, mpi_size) + elif strategy_identifier == 'fail': + return io.FailingStrategy() + else: + raise RuntimeError("Unknown distribution strategy: " + + strategy_identifier) + + class pipe: """ Represents the configuration of one "pipe" pass. @@ -177,6 +161,11 @@ def __init__(self, infile, outfile, inconfig, outconfig, comm): self.outconfig = outconfig self.loads = [] self.comm = comm + if HAVE_MPI: + hostinfo = io.HostInfo.MPI_PROCESSOR_NAME + self.outranks = hostinfo.get_collective(self.comm) + else: + self.outranks = {i: str(i) for i in range(self.comm.size)} def run(self): if not HAVE_MPI or (args.mpi is None and self.comm.size == 1): @@ -268,6 +257,9 @@ def __copy(self, src, dest, current_path="/data/"): print("With records:") for r in in_iteration.particles[ps]: print("\t {0}".format(r)) + # With linear read mode, we can only load the source rank table + # inside `read_iterations()` since it's a dataset. + self.inranks = src.get_rank_table(collective=True) out_iteration = write_iterations[in_iteration.iteration_index] sys.stdout.flush() self.__copy( @@ -284,7 +276,6 @@ def __copy(self, src, dest, current_path="/data/"): elif isinstance(src, io.Record_Component) and (not is_container or src.scalar): shape = src.shape - offset = [0 for _ in shape] dtype = src.dtype dest.reset_dataset(io.Dataset(dtype, shape)) if src.empty: @@ -294,19 +285,23 @@ def __copy(self, src, dest, current_path="/data/"): elif src.constant: dest.make_constant(src.get_attribute("value")) else: - chunk = Chunk(offset, shape) - local_chunk = chunk.slice1D(self.comm.rank, self.comm.size) - if debug: - end = local_chunk.offset.copy() - for i in range(len(end)): - end[i] += local_chunk.extent[i] - print("{}\t{}/{}:\t{} -- {}".format( - current_path, self.comm.rank, self.comm.size, - local_chunk.offset, end)) - span = dest.store_chunk(local_chunk.offset, local_chunk.extent) - self.loads.append( - deferred_load(src, span, local_chunk.offset, - local_chunk.extent)) + chunk_table = src.available_chunks() + strategy = distribution_strategy(shape, self.comm.rank, + self.comm.size) + my_chunks = strategy.assign(chunk_table, self.inranks, + self.outranks) + for chunk in my_chunks[ + self.comm.rank] if self.comm.rank in my_chunks else []: + if debug: + end = chunk.offset.copy() + for i in range(len(end)): + end[i] += chunk.extent[i] + print("{}\t{}/{}:\t{} -- {}".format( + current_path, self.comm.rank, self.comm.size, + chunk.offset, end)) + span = dest.store_chunk(chunk.offset, chunk.extent) + self.loads.append( + deferred_load(src, span, chunk.offset, chunk.extent)) elif isinstance(src, io.Iteration): self.__copy(src.meshes, dest.meshes, current_path + "meshes/") self.__copy(src.particles, dest.particles, From 539f61f08a200dbdc259a273d32929b7ab4b78a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 2 Mar 2023 13:49:13 +0100 Subject: [PATCH 04/27] Testing --- CMakeLists.txt | 4 +- test/CoreTest.cpp | 88 +++++++++++ test/ParallelIOTest.cpp | 331 ++++++++++++++++++++++++++++++++++------ 3 files changed, 376 insertions(+), 47 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d6153e8e24..b6f01f3d2e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1346,7 +1346,7 @@ if(openPMD_BUILD_TESTING) ) add_test(NAME CLI.pipe.py COMMAND sh -c - "${MPI_TEST_EXE} ${Python_EXECUTABLE} \ + "${MPI_TEST_EXE} -n 2 ${Python_EXECUTABLE} \ ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ --infile ../samples/git-sample/data%T.h5 \ --outfile ../samples/git-sample/data%T.bp && \ @@ -1357,7 +1357,7 @@ if(openPMD_BUILD_TESTING) --outfile \ ../samples/git-sample/single_iteration_%T.bp && \ \ - ${MPI_TEST_EXE} ${Python_EXECUTABLE} \ + ${MPI_TEST_EXE} -n 2 ${Python_EXECUTABLE} \ ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ --infile ../samples/git-sample/thetaMode/data%T.h5 \ --outfile \ diff --git a/test/CoreTest.cpp b/test/CoreTest.cpp index 6bdefff1fb..b9820d8222 100644 --- a/test/CoreTest.cpp +++ b/test/CoreTest.cpp @@ -3,6 +3,8 @@ #define OPENPMD_private public: #define OPENPMD_protected public: #endif + +#include "openPMD/ChunkInfo.hpp" #include "openPMD/openPMD.hpp" #include "Files_Core/CoreTests.hpp" @@ -45,6 +47,92 @@ using namespace openPMD; Dataset globalDataset(Datatype::CHAR, {1}); +namespace test_chunk_assignment +{ +using namespace openPMD::chunk_assignment; +struct Params +{ + ChunkTable table; + RankMeta metaSource; + RankMeta metaSink; + + void init( + size_t sourceRanks, + size_t sinkRanks, + size_t in_per_host, + size_t out_per_host) + { + for (size_t rank = 0; rank < sourceRanks; ++rank) + { + table.emplace_back(Offset{rank, rank}, Extent{rank, rank}, rank); + table.emplace_back( + Offset{rank, 100 * rank}, Extent{rank, 100 * rank}, rank); + metaSource.emplace(rank, std::to_string(rank / in_per_host)); + } + for (size_t rank = 0; rank < sinkRanks; ++rank) + { + metaSink.emplace(rank, std::to_string(rank / out_per_host)); + } + } +}; +void print(RankMeta const &meta, ChunkTable const &table) +{ + for (auto const &chunk : table) + { + std::cout << "[HOST: " << meta.at(chunk.sourceID) + << ",\tRank: " << chunk.sourceID << ",\tOffset: "; + for (auto offset : chunk.offset) + { + std::cout << offset << ", "; + } + std::cout << "\tExtent: "; + for (auto extent : chunk.extent) + { + std::cout << extent << ", "; + } + std::cout << "]" << std::endl; + } +} +void print(RankMeta const &meta, Assignment const &table) +{ + for (auto &[rank, chunkList] : table) + { + std::cout << "[HOST: " << meta.at(rank) << ",\tRank: " << rank << "]" + << std::endl; + for (auto const &chunk : chunkList) + { + std::cout << "\t[Offset: "; + for (auto offset : chunk.offset) + { + std::cout << offset << ", "; + } + std::cout << "\tExtent: "; + for (auto extent : chunk.extent) + { + std::cout << extent << ", "; + } + std::cout << "]" << std::endl; + } + } +} +} // namespace test_chunk_assignment + +TEST_CASE("chunk_assignment", "[core]") +{ + using namespace chunk_assignment; + test_chunk_assignment::Params params; + params.init(6, 2, 2, 1); + test_chunk_assignment::print(params.metaSource, params.table); + ByHostname byHostname(std::make_unique()); + FromPartialStrategy fullStrategy( + std::make_unique(std::move(byHostname)), + std::make_unique()); + Assignment res = + fullStrategy.assign(params.table, params.metaSource, params.metaSink); + std::cout << "\nRESULTS:" << std::endl; + test_chunk_assignment::print(params.metaSink, res); +} + TEST_CASE("versions_test", "[core]") { auto const apiVersion = getVersion(); diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index 5f38973bed..cb3d2eea92 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -8,6 +8,8 @@ #include "openPMD/auxiliary/Environment.hpp" #include "openPMD/auxiliary/Filesystem.hpp" #include "openPMD/openPMD.hpp" +// @todo change includes +#include "openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp" #include #if !openPMD_HAVE_MPI @@ -1183,6 +1185,53 @@ TEST_CASE("independent_write_with_collective_flush", "[parallel]") } #endif +#if openPMD_HAVE_MPI +TEST_CASE("unavailable_backend", "[core][parallel]") +{ +#if !openPMD_HAVE_ADIOS2 + { + auto fail = []() { + Series( + "unavailable.bp", + Access::CREATE, + MPI_COMM_WORLD, + R"({"backend": "ADIOS2"})"); + }; + REQUIRE_THROWS_WITH( + fail(), + "Wrong API usage: openPMD-api built without support for backend " + "'ADIOS2'."); + } +#endif +#if !openPMD_HAVE_ADIOS2 + { + auto fail = []() { + Series("unavailable.bp", Access::CREATE, MPI_COMM_WORLD); + }; + REQUIRE_THROWS_WITH( + fail(), + "Wrong API usage: openPMD-api built without support for backend " + "'ADIOS2'."); + } +#endif +#if !openPMD_HAVE_HDF5 + { + auto fail = []() { + Series( + "unavailable.h5", + Access::CREATE, + MPI_COMM_WORLD, + R"({"backend": "HDF5"})"); + }; + REQUIRE_THROWS_WITH( + fail(), + "Wrong API usage: openPMD-api built without support for backend " + "'HDF5'."); + } +#endif +} +#endif + #if openPMD_HAVE_ADIOS2 && openPMD_HAVE_MPI void adios2_streaming(bool variableBasedLayout) @@ -1879,51 +1928,6 @@ TEST_CASE("append_mode", "[serial]") } } -TEST_CASE("unavailable_backend", "[core][parallel]") -{ -#if !openPMD_HAVE_ADIOS2 - { - auto fail = []() { - Series( - "unavailable.bp", - Access::CREATE, - MPI_COMM_WORLD, - R"({"backend": "ADIOS2"})"); - }; - REQUIRE_THROWS_WITH( - fail(), - "Wrong API usage: openPMD-api built without support for backend " - "'ADIOS2'."); - } -#endif -#if !openPMD_HAVE_ADIOS2 - { - auto fail = []() { - Series("unavailable.bp", Access::CREATE, MPI_COMM_WORLD); - }; - REQUIRE_THROWS_WITH( - fail(), - "Wrong API usage: openPMD-api built without support for backend " - "'ADIOS2'."); - } -#endif -#if !openPMD_HAVE_HDF5 - { - auto fail = []() { - Series( - "unavailable.h5", - Access::CREATE, - MPI_COMM_WORLD, - R"({"backend": "HDF5"})"); - }; - REQUIRE_THROWS_WITH( - fail(), - "Wrong API usage: openPMD-api built without support for backend " - "'HDF5'."); - } -#endif -} - void joined_dim(std::string const &ext) { using type = float; @@ -2220,6 +2224,243 @@ TEST_CASE("iterate_nonstreaming_series", "[serial][adios2]") iterate_nonstreaming_series::iterate_nonstreaming_series(); } +void adios2_chunk_distribution() +{ + /* + * This test simulates a multi-node streaming setup in order to test some + * of our chunk distribution strategies. + * We don't actually stream (but write a .bp file instead) and also we don't + * actually run anything on multiple nodes, but we can use this for testing + * the distribution strategies anyway. + */ + int mpi_size{-1}; + int mpi_rank{-1}; + MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + + /* + * Mappings: MPI rank -> hostname where the rank is executed. + * For the writing application as well as for the reading one. + */ + chunk_assignment::RankMeta writingRanksHostnames, readingRanksHostnames; + for (int i = 0; i < mpi_size; ++i) + { + /* + * The mapping is intentionally weird. Nodes "node1", "node3", ... + * do not have instances of the reading application running on them. + * Our distribution strategies will need to deal with that situation. + */ + // 0, 0, 1, 1, 2, 2, 3, 3 ... + writingRanksHostnames[i] = "node" + std::to_string(i / 2); + // 0, 0, 0, 0, 2, 2, 2, 2 ... + readingRanksHostnames[i] = "node" + std::to_string(i / 4 * 2); + } + + std::string filename = "../samples/adios2_chunk_distribution.bp"; + // Simulate a stream: BP4 assigns chunk IDs by subfile (i.e. aggregator). + std::stringstream parameters; + parameters << R"END( +{ + "adios2": + { + "engine": + { + "type": "bp4", + "parameters": + { + "NumAggregators":)END" + << "\"" << std::to_string(mpi_size) << "\"" + << R"END( + } + } + } +} +)END"; + + auto printChunktable = [mpi_rank]( + std::string const &strategyName, + ChunkTable const &table, + chunk_assignment::RankMeta const &meta) { + if (mpi_rank != 0) + { + return; + } + std::cout << "WITH STRATEGY '" << strategyName << "':\n"; + for (auto const &chunk : table) + { + std::cout << "[HOST: " << meta.at(chunk.sourceID) + << ",\tRank: " << chunk.sourceID << ",\tOffset: "; + for (auto offset : chunk.offset) + { + std::cout << offset << ", "; + } + std::cout << "\tExtent: "; + for (auto extent : chunk.extent) + { + std::cout << extent << ", "; + } + std::cout << "]" << std::endl; + } + }; + + auto printAssignment = [mpi_rank]( + std::string const &strategyName, + chunk_assignment::Assignment const &table, + chunk_assignment::RankMeta const &meta) { + if (mpi_rank != 0) + { + return; + } + std::cout << "WITH STRATEGY '" << strategyName << "':\n"; + for (auto &[rank, chunkList] : table) + { + std::cout << "[HOST: " << meta.at(rank) << ",\tRank: " << rank + << "]" << std::endl; + for (auto const &chunk : chunkList) + { + std::cout << "\t[Source rank: " << chunk.sourceID + << "\tOffset: "; + for (auto offset : chunk.offset) + { + std::cout << offset << ", "; + } + std::cout << "\tExtent: "; + for (auto extent : chunk.extent) + { + std::cout << extent << ", "; + } + std::cout << "]" << std::endl; + } + } + }; + + // Create a dataset. + { + Series series( + filename, + openPMD::Access::CREATE, + MPI_COMM_WORLD, + parameters.str()); + /* + * The writing application sets an attribute that tells the reading + * application about the "MPI rank -> hostname" mapping. + * Each rank only needs to set its own value. + * (Some other options like setting all at once or reading from a file + * exist as well.) + */ + series.setRankTable(writingRanksHostnames.at(mpi_rank)); + + auto E_x = series.iterations[0].meshes["E"]["x"]; + openPMD::Dataset ds(openPMD::Datatype::INT, {unsigned(mpi_size), 10}); + E_x.resetDataset(ds); + std::vector data(10, 0); + std::iota(data.begin(), data.end(), 0); + E_x.storeChunk(data, {unsigned(mpi_rank), 0}, {1, 10}); + series.flush(); + } + + { + Series series(filename, openPMD::Access::READ_ONLY, MPI_COMM_WORLD); + /* + * Inquire the writing application's "MPI rank -> hostname" mapping. + * The reading application needs to know about its own mapping. + * Having both of these mappings is the basis for an efficient chunk + * distribution since we can use it to figure out which instances + * are running on the same nodes. + */ + auto rankMetaIn = series.rankTable(/* collective = */ true); + REQUIRE(rankMetaIn == writingRanksHostnames); + + auto E_x = series.iterations[0].meshes["E"]["x"]; + /* + * Ask the backend which chunks are available. + */ + auto const chunkTable = E_x.availableChunks(); + + printChunktable("INPUT", chunkTable, rankMetaIn); + + using namespace chunk_assignment; + + /* + * Assign the chunks by distributing them one after the other to reading + * ranks. Easy, but not particularly efficient. + */ + RoundRobin roundRobinStrategy; + auto roundRobinAssignment = roundRobinStrategy.assign( + chunkTable, rankMetaIn, readingRanksHostnames); + printAssignment( + "ROUND ROBIN", roundRobinAssignment, readingRanksHostnames); + + /* + * Assign chunks by hostname. + * Two difficulties: + * * A distribution strategy within one node needs to be picked. + * We pick the BinPacking strategy that tries to assign chunks in a + * balanced manner. Since our chunks have a small extent along + * dimension 0, use dimension 1 for slicing. + * * The assignment is partial since some nodes only have instances of + * the writing application. Those chunks remain unassigned. + */ + ByHostname byHostname( + std::make_unique(/* splitAlongDimension = */ 1)); + auto byHostnamePartialAssignment = + byHostname.assign(chunkTable, rankMetaIn, readingRanksHostnames); + printAssignment( + "HOSTNAME, ASSIGNED", + byHostnamePartialAssignment.assigned, + readingRanksHostnames); + printChunktable( + "HOSTNAME, LEFTOVER", + byHostnamePartialAssignment.notAssigned, + rankMetaIn); + + /* + * Assign chunks by hostnames, once more. + * This time, apply a secondary distribution strategy to assign + * leftovers. We pick BinPacking, once more. + * Notice that the BinPacking strategy does not (yet) take into account + * chunks that have been assigned by the first round. + * Balancing is calculated solely based on the leftover chunks from the + * first round. + */ + FromPartialStrategy fromPartialStrategy( + std::make_unique(std::move(byHostname)), + std::make_unique(/* splitAlongDimension = */ 1)); + auto fromPartialAssignment = fromPartialStrategy.assign( + chunkTable, rankMetaIn, readingRanksHostnames); + printAssignment( + "HOSTNAME WITH SECOND PASS", + fromPartialAssignment, + readingRanksHostnames); + + /* + * Assign chunks by slicing the n-dimensional physical domain and + * intersecting those slices with the available chunks from the backend. + * Notice that this strategy only returns the chunks that the currently + * running rank is supposed to load, whereas the other strategies return + * a chunk table containing all chunks that all ranks will load. + * In principle, a chunk_assignment::Strategy only needs to return the + * chunks that the current rank should load, but is free to emplace the + * other chunks for other reading ranks as well. + * (Reasoning: In some strategies, calculating everything is necessary, + * in others such as this one, it's an unneeded overhead.) + */ + ByCuboidSlice cuboidSliceStrategy( + std::make_unique(1), + E_x.getExtent(), + mpi_rank, + mpi_size); + auto cuboidSliceAssignment = cuboidSliceStrategy.assign( + chunkTable, rankMetaIn, readingRanksHostnames); + printAssignment( + "CUBOID SLICE", cuboidSliceAssignment, readingRanksHostnames); + } +} + +TEST_CASE("adios2_chunk_distribution", "[parallel][adios2]") +{ + adios2_chunk_distribution(); +} #endif // openPMD_HAVE_ADIOS2 && openPMD_HAVE_MPI #if openPMD_HAVE_MPI From cda9e762f8c4e7c82e2a656835fb68fa0557fe8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 3 Mar 2023 15:09:57 +0100 Subject: [PATCH 05/27] Add DiscardingStrategy --- include/openPMD/ChunkInfo.hpp | 21 +++++++++++++++++++++ src/ChunkInfo.cpp | 13 +++++++++++++ src/binding/python/ChunkInfo.cpp | 3 +++ 3 files changed, 37 insertions(+) diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp index b44379b2aa..54454b4f13 100644 --- a/include/openPMD/ChunkInfo.hpp +++ b/include/openPMD/ChunkInfo.hpp @@ -322,6 +322,27 @@ namespace chunk_assignment virtual std::unique_ptr clone() const override; }; + + /** + * @brief Strategy that purposefully discards leftover chunk from + * the PartialAssignment. + * + * Useful as second phase in FromPartialStrategy when knowing that some + * chunks will go unassigned, but still wanting to communicate only within + * the same node. + * + */ + struct DiscardingStrategy : Strategy + { + explicit DiscardingStrategy(); + + Assignment assign( + PartialAssignment, + RankMeta const &in, + RankMeta const &out) override; + + virtual std::unique_ptr clone() const override; + }; } // namespace chunk_assignment namespace host_info diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp index 7b6c1e32ca..190bc8012c 100644 --- a/src/ChunkInfo.cpp +++ b/src/ChunkInfo.cpp @@ -552,6 +552,19 @@ namespace chunk_assignment { return std::make_unique(); } + + DiscardingStrategy::DiscardingStrategy() = default; + + Assignment DiscardingStrategy::assign( + PartialAssignment assignment, RankMeta const &, RankMeta const &) + { + return assignment.assigned; + } + + std::unique_ptr DiscardingStrategy::clone() const + { + return std::make_unique(); + } } // namespace chunk_assignment namespace host_info diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp index 76deca3a10..4c93233f22 100644 --- a/src/binding/python/ChunkInfo.cpp +++ b/src/binding/python/ChunkInfo.cpp @@ -191,4 +191,7 @@ void init_Chunk(py::module &m) py::class_(m, "FailingStrategy") .def(py::init<>()); + + py::class_(m, "DiscardingStrategy") + .def(py::init<>()); } From 8125286f8480eac35dd579cf1b764250aa838bb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 15 Mar 2023 14:56:50 +0100 Subject: [PATCH 06/27] Make Strategy class extensible from Python @todo Why do we need to increase the refcount twice?? --- .../openPMD/backend/BaseRecordComponent.hpp | 1 + src/binding/python/ChunkInfo.cpp | 158 +++++++++++++++++- .../python/openpmd_api/pipe/__main__.py | 33 ++++ 3 files changed, 189 insertions(+), 3 deletions(-) diff --git a/include/openPMD/backend/BaseRecordComponent.hpp b/include/openPMD/backend/BaseRecordComponent.hpp index fe4490830d..a871d67bcf 100644 --- a/include/openPMD/backend/BaseRecordComponent.hpp +++ b/include/openPMD/backend/BaseRecordComponent.hpp @@ -20,6 +20,7 @@ */ #pragma once +#include "openPMD/ChunkInfo.hpp" #include "openPMD/Dataset.hpp" #include "openPMD/Error.hpp" #include "openPMD/backend/Attributable.hpp" diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp index 4c93233f22..9c94329d0e 100644 --- a/src/binding/python/ChunkInfo.cpp +++ b/src/binding/python/ChunkInfo.cpp @@ -28,8 +28,139 @@ #include #include // std::move +/* + * PyStrategy and PyPartialStrategy are the C++ representations for objects + * created in Python. + * One challenge about these classes is that they cannot be easily copied or + * moved in memory, as the clone will lose the relation to the Python object. + * This class has a clone_impl() method that child classes can use for cloning + * the object and at the same time storing a reference to the original Python + * object. + * The template parameters ChildCpp and ChildPy implement a CRT-like pattern, + * split into a C++ class and a Python trampoline class as documented here: + * https://pybind11.readthedocs.io/en/stable/advanced/classes.html?highlight=trampoline#overriding-virtual-functions-in-python + * + * A typical child instantiation would look like: + * struct ChildPy : ChildCpp, ClonableTrampoline; + */ +template +struct ClonableTrampoline +{ + struct OriginalInstance + { + py::handle pythonObject; + + ~OriginalInstance() + { + pythonObject.dec_ref(); + } + }; + /* + * If the shared pointer is empty, this object is the original object owned + * by Python and the Python handle can be acquired by: + * py::cast(static_cast(this)) + * + * Copied instances will refer to the Python object handle via this member. + * By only storing this member in copied instances, but not in the original + * instance, we avoid a memory cycle and ensure clean destruction. + */ + std::shared_ptr m_originalInstance; + + [[nodiscard]] py::handle get_python_handle() const + { + if (m_originalInstance) + { + // std::cout << "Refcount " + // << m_originalInstance->pythonObject.ref_count() + // << std::endl; + return m_originalInstance->pythonObject; + } + else + { + auto self = static_cast(this); + return py::cast(self); + } + } + + template + Res call_virtual(std::string const &nameOfPythonMethod, Args &&...args) + { + py::gil_scoped_acquire gil; + auto ptr = get_python_handle().template cast(); + auto fun = py::get_override(ptr, nameOfPythonMethod.c_str()); + if (!fun) + { + throw std::runtime_error( + "Virtual method not found. Did you define '" + + nameOfPythonMethod + "' as method in Python?"); + } + auto res = fun(std::forward(args)...); + return py::detail::cast_safe(std::move(res)); + } + + [[nodiscard]] std::unique_ptr clone_impl() const + { + auto self = static_cast(this); + if (m_originalInstance) + { + return std::make_unique(*self); + } + else + { + OriginalInstance oi; + oi.pythonObject = py::cast(self); + // no idea why we would need this twice, but we do + oi.pythonObject.inc_ref(); + oi.pythonObject.inc_ref(); + auto res = std::make_unique(*self); + res->m_originalInstance = + std::make_shared(std::move(oi)); + return res; + } + } +}; + +struct PyStrategy + : chunk_assignment::Strategy + , ClonableTrampoline +{ + chunk_assignment::Assignment assign( + chunk_assignment::PartialAssignment assignment, + chunk_assignment::RankMeta const &in, + chunk_assignment::RankMeta const &out) override + { + return call_virtual( + "assign", std::move(assignment), in, out); + } + + [[nodiscard]] std::unique_ptr clone() const override + { + return clone_impl(); + } +}; + +struct PyPartialStrategy + : chunk_assignment::PartialStrategy + , ClonableTrampoline +{ + chunk_assignment::PartialAssignment assign( + chunk_assignment::PartialAssignment assignment, + chunk_assignment::RankMeta const &in, + chunk_assignment::RankMeta const &out) override + { + return call_virtual( + "assign", std::move(assignment), in, out); + } + + [[nodiscard]] std::unique_ptr clone() const override + { + return clone_impl(); + } +}; + void init_Chunk(py::module &m) { + py::class_(m, "ChunkInfo") .def(py::init(), py::arg("offset"), py::arg("extent")) .def( @@ -40,6 +171,8 @@ void init_Chunk(py::module &m) }) .def_readwrite("offset", &ChunkInfo::offset) .def_readwrite("extent", &ChunkInfo::extent); + py::bind_vector(m, "VectorChunkInfo"); + py::implicitly_convertible>(); py::class_(m, "WrittenChunkInfo") .def(py::init(), py::arg("offset"), py::arg("extent")) .def( @@ -105,12 +238,21 @@ void init_Chunk(py::module &m) }) .def("available", &host_info::methodAvailable); + py::bind_vector(m, "ChunkTable"); using namespace chunk_assignment; - (void)py::class_(m, "PartialStrategy"); + py::bind_map(m, "Assignment"); + + py::class_(m, "PartialAssignment") + .def(py::init<>()) + .def_readwrite("not_assigned", &PartialAssignment::notAssigned) + .def_readwrite("assigned", &PartialAssignment::assigned); + + py::bind_map(m, "RankMeta"); - py::class_(m, "PartialStrategy") + py::class_(m, "PartialStrategy") + .def(py::init<>()) .def( "assign", py::overload_cast( @@ -128,7 +270,8 @@ void init_Chunk(py::module &m) py::arg("rank_meta_in") = RankMeta(), py::arg("rank_meta_out") = RankMeta()); - py::class_(m, "Strategy") + py::class_(m, "Strategy") + .def(py::init<>()) .def( "assign", py::overload_cast( @@ -194,4 +337,13 @@ void init_Chunk(py::module &m) py::class_(m, "DiscardingStrategy") .def(py::init<>()); + + // implicit conversions + { + py::implicitly_convertible(); + py::implicitly_convertible(); + py::implicitly_convertible(); + py::implicitly_convertible(); + py::implicitly_convertible(); + } } diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py index 96802530b0..40e616e3bd 100644 --- a/src/binding/python/openpmd_api/pipe/__main__.py +++ b/src/binding/python/openpmd_api/pipe/__main__.py @@ -114,6 +114,37 @@ def __init__(self, source, dynamicView, offset, extent): self.extent = extent +# Example how to implement a simple partial strategy in Python +class LoadOne(io.PartialStrategy): + def __init__(self, rank): + super().__init__() + self.rank = rank + + def assign(self, assignment, *_): + element = assignment.not_assigned.pop() + if self.rank not in assignment.assigned: + assignment.assigned[self.rank] = [element] + else: + assignment.assigned[self.rank].append(element) + return assignment + + +# Example how to implement a simple strategy in Python +class LoadAll(io.Strategy): + + def __init__(self, rank): + super().__init__() + self.rank = rank + + def assign(self, assignment, *_): + res = assignment.assigned + if self.rank not in res: + res[self.rank] = assignment.not_assigned + else: + res[self.rank].extend(assignment.not_assigned) + return res + + def distribution_strategy(dataset_extent, mpi_rank, mpi_size, @@ -136,6 +167,8 @@ def distribution_strategy(dataset_extent, mpi_size, strategy_identifier=match.group(2)) return io.FromPartialStrategy(io.ByHostname(inside_node), second_phase) + elif strategy_identifier == 'all': + return io.FromPartialStrategy(LoadOne(mpi_rank), LoadAll(mpi_rank)) elif strategy_identifier == 'roundrobin': return io.RoundRobin() elif strategy_identifier == 'binpacking': From a66f6562e8432be7a1dbcae3b366ea1322332f3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 14 Mar 2023 17:02:20 +0100 Subject: [PATCH 07/27] Make mergeChunks function public --- include/openPMD/ChunkInfo.hpp | 3 + src/ChunkInfo.cpp | 110 ++++++++++++++++++++++++++++++ src/IO/JSON/JSONIOHandlerImpl.cpp | 103 +--------------------------- src/binding/python/ChunkInfo.cpp | 7 +- 4 files changed, 118 insertions(+), 105 deletions(-) diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp index 54454b4f13..976fac955e 100644 --- a/include/openPMD/ChunkInfo.hpp +++ b/include/openPMD/ChunkInfo.hpp @@ -91,6 +91,9 @@ namespace chunk_assignment using Assignment = std::map>; + template + void mergeChunks(std::vector &); + struct PartialAssignment { ChunkTable notAssigned; diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp index 190bc8012c..d0e10a4e11 100644 --- a/src/ChunkInfo.cpp +++ b/src/ChunkInfo.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #ifdef _WIN32 @@ -68,6 +69,115 @@ bool WrittenChunkInfo::operator==(WrittenChunkInfo const &other) const namespace chunk_assignment { + namespace + { + /* + * Check whether two chunks can be merged to form a large one + * and optionally return that larger chunk + */ + template + std::optional + mergeChunks(Chunk_t const &chunk1, Chunk_t const &chunk2) + { + /* + * Idea: + * If two chunks can be merged into one, they agree on offsets and + * extents in all but exactly one dimension dim. + * At dimension dim, the offset of chunk 2 is equal to the offset + * of chunk 1 plus its extent -- or vice versa. + */ + unsigned dimensionality = chunk1.extent.size(); + for (unsigned dim = 0; dim < dimensionality; ++dim) + { + Chunk_t const *c1(&chunk1), *c2(&chunk2); + // check if one chunk is the extension of the other at + // dimension dim + // first, let's put things in order + if (c1->offset[dim] > c2->offset[dim]) + { + std::swap(c1, c2); + } + // now, c1 begins at the lower of both offsets + // next check, that both chunks border one another exactly + if (c2->offset[dim] != c1->offset[dim] + c1->extent[dim]) + { + continue; + } + // we've got a candidate + // verify that all other dimensions have equal values + auto equalValues = [dimensionality, dim, c1, c2]() { + for (unsigned j = 0; j < dimensionality; ++j) + { + if (j == dim) + { + continue; + } + if (c1->offset[j] != c2->offset[j] || + c1->extent[j] != c2->extent[j]) + { + return false; + } + } + return true; + }; + if (!equalValues()) + { + continue; + } + // we can merge the chunks + Offset offset(c1->offset); + Extent extent(c1->extent); + extent[dim] += c2->extent[dim]; + return std::make_optional(Chunk_t(offset, extent)); + } + return std::optional(); + } + } // namespace + + /* + * Merge chunks in the chunktable until no chunks are left that can be + * merged. + */ + template + void mergeChunks(std::vector &table) + { + bool stillChanging; + do + { + stillChanging = false; + auto innerLoops = [&table]() { + /* + * Iterate over pairs of chunks in the table. + * When a pair that can be merged is found, merge it, + * delete the original two chunks from the table, + * put the new one in and return. + */ + for (auto i = table.begin(); i < table.end(); ++i) + { + for (auto j = i + 1; j < table.end(); ++j) + { + std::optional merged = mergeChunks(*i, *j); + if (merged) + { + // erase order is important due to iterator + // invalidation + table.erase(j); + table.erase(i); + table.emplace_back(std::move(merged.value())); + return true; + } + } + } + return false; + }; + stillChanging = innerLoops(); + } while (stillChanging); + } + + template void mergeChunks(std::vector &); + template void + mergeChunks(std::vector &); + namespace { std::map > diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 2287522d92..0a80e53321 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -875,107 +875,6 @@ namespace } return res; } - - /* - * Check whether two chunks can be merged to form a large one - * and optionally return that larger chunk - */ - std::optional - mergeChunks(WrittenChunkInfo const &chunk1, WrittenChunkInfo const &chunk2) - { - /* - * Idea: - * If two chunks can be merged into one, they agree on offsets and - * extents in all but exactly one dimension dim. - * At dimension dim, the offset of chunk 2 is equal to the offset - * of chunk 1 plus its extent -- or vice versa. - */ - unsigned dimensionality = chunk1.extent.size(); - for (unsigned dim = 0; dim < dimensionality; ++dim) - { - WrittenChunkInfo const *c1(&chunk1), *c2(&chunk2); - // check if one chunk is the extension of the other at - // dimension dim - // first, let's put things in order - if (c1->offset[dim] > c2->offset[dim]) - { - std::swap(c1, c2); - } - // now, c1 begins at the lower of both offsets - // next check, that both chunks border one another exactly - if (c2->offset[dim] != c1->offset[dim] + c1->extent[dim]) - { - continue; - } - // we've got a candidate - // verify that all other dimensions have equal values - auto equalValues = [dimensionality, dim, c1, c2]() { - for (unsigned j = 0; j < dimensionality; ++j) - { - if (j == dim) - { - continue; - } - if (c1->offset[j] != c2->offset[j] || - c1->extent[j] != c2->extent[j]) - { - return false; - } - } - return true; - }; - if (!equalValues()) - { - continue; - } - // we can merge the chunks - Offset offset(c1->offset); - Extent extent(c1->extent); - extent[dim] += c2->extent[dim]; - return std::make_optional(WrittenChunkInfo(offset, extent)); - } - return std::optional(); - } - - /* - * Merge chunks in the chunktable until no chunks are left that can be - * merged. - */ - void mergeChunks(ChunkTable &table) - { - bool stillChanging; - do - { - stillChanging = false; - auto innerLoops = [&table]() { - /* - * Iterate over pairs of chunks in the table. - * When a pair that can be merged is found, merge it, - * delete the original two chunks from the table, - * put the new one in and return. - */ - for (auto i = table.begin(); i < table.end(); ++i) - { - for (auto j = i + 1; j < table.end(); ++j) - { - std::optional merged = - mergeChunks(*i, *j); - if (merged) - { - // erase order is important due to iterator - // invalidation - table.erase(j); - table.erase(i); - table.emplace_back(std::move(merged.value())); - return true; - } - } - } - return false; - }; - stillChanging = innerLoops(); - } while (stillChanging); - } } // namespace void JSONIOHandlerImpl::availableChunks( @@ -985,7 +884,7 @@ void JSONIOHandlerImpl::availableChunks( auto filePosition = setAndGetFilePosition(writable); auto &j = obtainJsonContents(writable)["data"]; *parameters.chunks = chunksInJSON(j); - mergeChunks(*parameters.chunks); + chunk_assignment::mergeChunks(*parameters.chunks); } void JSONIOHandlerImpl::openFile( diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp index 9c94329d0e..c34b7cea1b 100644 --- a/src/binding/python/ChunkInfo.cpp +++ b/src/binding/python/ChunkInfo.cpp @@ -171,8 +171,8 @@ void init_Chunk(py::module &m) }) .def_readwrite("offset", &ChunkInfo::offset) .def_readwrite("extent", &ChunkInfo::extent); - py::bind_vector(m, "VectorChunkInfo"); - py::implicitly_convertible>(); + py::bind_vector(m, "VectorChunkInfo") + .def("merge_chunks", &chunk_assignment::mergeChunks); py::class_(m, "WrittenChunkInfo") .def(py::init(), py::arg("offset"), py::arg("extent")) .def( @@ -238,7 +238,8 @@ void init_Chunk(py::module &m) }) .def("available", &host_info::methodAvailable); - py::bind_vector(m, "ChunkTable"); + py::bind_vector(m, "ChunkTable") + .def("merge_chunks", &chunk_assignment::mergeChunks); using namespace chunk_assignment; From 8fd934a17d4d8aee2d5791dbcd52dde0138e0b3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 9 Feb 2024 13:18:33 +0100 Subject: [PATCH 08/27] Add mergeChunksFromSameSourceID --- include/openPMD/ChunkInfo.hpp | 3 +++ src/ChunkInfo.cpp | 15 +++++++++++++++ src/binding/python/ChunkInfo.cpp | 5 ++++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp index 976fac955e..6787b49eb8 100644 --- a/include/openPMD/ChunkInfo.hpp +++ b/include/openPMD/ChunkInfo.hpp @@ -94,6 +94,9 @@ namespace chunk_assignment template void mergeChunks(std::vector &); + auto mergeChunksFromSameSourceID(std::vector const &) + -> std::map>; + struct PartialAssignment { ChunkTable notAssigned; diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp index d0e10a4e11..0aa6fb4653 100644 --- a/src/ChunkInfo.cpp +++ b/src/ChunkInfo.cpp @@ -174,6 +174,21 @@ namespace chunk_assignment } while (stillChanging); } + auto mergeChunksFromSameSourceID(std::vector const &table) + -> std::map> + { + std::map> sortedBySourceID; + for (auto const &chunk : table) + { + sortedBySourceID[chunk.sourceID].emplace_back(chunk); + } + for (auto &pair : sortedBySourceID) + { + mergeChunks(pair.second); + } + return sortedBySourceID; + } + template void mergeChunks(std::vector &); template void mergeChunks(std::vector &); diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp index c34b7cea1b..76c7120a9b 100644 --- a/src/binding/python/ChunkInfo.cpp +++ b/src/binding/python/ChunkInfo.cpp @@ -239,7 +239,10 @@ void init_Chunk(py::module &m) .def("available", &host_info::methodAvailable); py::bind_vector(m, "ChunkTable") - .def("merge_chunks", &chunk_assignment::mergeChunks); + .def("merge_chunks", &chunk_assignment::mergeChunks) + .def( + "merge_chunks_from_same_sourceID", + &chunk_assignment::mergeChunksFromSameSourceID); using namespace chunk_assignment; From b27716901e4fd3d2298bfccea22b4d16b17e00f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 8 Feb 2024 17:02:58 +0100 Subject: [PATCH 09/27] Add RoundRobinOfSourceRanks strategy --- include/openPMD/ChunkInfo.hpp | 11 ++++++++ src/ChunkInfo.cpp | 47 ++++++++++++++++++++++++++++++-- src/binding/python/ChunkInfo.cpp | 2 ++ test/ParallelIOTest.cpp | 23 ++++++++++++++-- 4 files changed, 78 insertions(+), 5 deletions(-) diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp index 6787b49eb8..2193a6c691 100644 --- a/include/openPMD/ChunkInfo.hpp +++ b/include/openPMD/ChunkInfo.hpp @@ -24,6 +24,7 @@ #include "openPMD/Dataset.hpp" // Offset, Extent #include "openPMD/benchmark/mpi/BlockSlicer.hpp" +#include #if openPMD_HAVE_MPI #include @@ -224,6 +225,16 @@ namespace chunk_assignment virtual std::unique_ptr clone() const override; }; + struct RoundRobinOfSourceRanks : Strategy + { + Assignment assign( + PartialAssignment, + RankMeta const &in, + RankMeta const &out) override; + + virtual std::unique_ptr clone() const override; + }; + /** * @brief Strategy that assigns chunks to be read by processes within * the same host that produced the chunk. diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp index 0aa6fb4653..6c5cff166f 100644 --- a/src/ChunkInfo.cpp +++ b/src/ChunkInfo.cpp @@ -24,10 +24,13 @@ #include "openPMD/auxiliary/Mpi.hpp" #include // std::sort +#include #include +#include #include #include #include +#include #include #ifdef _WIN32 @@ -195,10 +198,10 @@ namespace chunk_assignment namespace { - std::map > + std::map> ranksPerHost(RankMeta const &rankMeta) { - std::map > res; + std::map> res; for (auto const &pair : rankMeta) { auto &list = res[pair.second]; @@ -294,6 +297,44 @@ namespace chunk_assignment return std::unique_ptr(new RoundRobin); } + Assignment RoundRobinOfSourceRanks::assign( + PartialAssignment partialAssignment, + RankMeta const &, // ignored parameter + RankMeta const &out) + { + std::map> + sortSourceChunksBySourceRank; + for (auto &chunk : partialAssignment.notAssigned) + { + auto sourceID = chunk.sourceID; + sortSourceChunksBySourceRank[sourceID].push_back(std::move(chunk)); + } + partialAssignment.notAssigned.clear(); + auto source_it = sortSourceChunksBySourceRank.begin(); + auto sink_it = out.begin(); + for (; source_it != sortSourceChunksBySourceRank.end(); + ++source_it, ++sink_it) + { + if (sink_it == out.end()) + { + sink_it = out.begin(); + } + auto &chunks_go_here = partialAssignment.assigned[sink_it->first]; + chunks_go_here.reserve( + partialAssignment.assigned.size() + source_it->second.size()); + for (auto &chunk : source_it->second) + { + chunks_go_here.push_back(std::move(chunk)); + } + } + return partialAssignment.assigned; + } + + std::unique_ptr RoundRobinOfSourceRanks::clone() const + { + return std::unique_ptr(new RoundRobinOfSourceRanks); + } + ByHostname::ByHostname(std::unique_ptr withinNode) : m_withinNode(std::move(withinNode)) {} @@ -332,7 +373,7 @@ namespace chunk_assignment // the ranks are the source ranks // which ranks live on host in the sink? - std::map > ranksPerHostSink = + std::map> ranksPerHostSink = ranksPerHost(out); for (auto &chunkGroup : chunkGroups) { diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp index 76c7120a9b..6f94cdd8aa 100644 --- a/src/binding/python/ChunkInfo.cpp +++ b/src/binding/python/ChunkInfo.cpp @@ -300,6 +300,8 @@ void init_Chunk(py::module &m) })); py::class_(m, "RoundRobin").def(py::init<>()); + py::class_(m, "RoundRobinOfSourceRanks") + .def(py::init<>()); py::class_(m, "ByHostname") .def( diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index cb3d2eea92..ee2923b178 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -2351,11 +2351,13 @@ void adios2_chunk_distribution() series.setRankTable(writingRanksHostnames.at(mpi_rank)); auto E_x = series.iterations[0].meshes["E"]["x"]; - openPMD::Dataset ds(openPMD::Datatype::INT, {unsigned(mpi_size), 10}); + openPMD::Dataset ds( + openPMD::Datatype::INT, {unsigned(mpi_size * 2), 10}); E_x.resetDataset(ds); std::vector data(10, 0); std::iota(data.begin(), data.end(), 0); - E_x.storeChunk(data, {unsigned(mpi_rank), 0}, {1, 10}); + E_x.storeChunk(data, {unsigned(mpi_rank * 2), 0}, {1, 10}); + E_x.storeChunk(data, {unsigned(mpi_rank * 2 + 1), 0}, {1, 10}); series.flush(); } @@ -2414,6 +2416,23 @@ void adios2_chunk_distribution() byHostnamePartialAssignment.notAssigned, rankMetaIn); + /* + * Same as above, but use RoundRobinOfSourceRanks this time, a strategy + * which ensures that each source rank's data is uniquely mapped to one + * sink rank. Needed in some domains. + */ + ByHostname byHostname2(std::make_unique()); + auto byHostnamePartialAssignment2 = + byHostname2.assign(chunkTable, rankMetaIn, readingRanksHostnames); + printAssignment( + "HOSTNAME2, ASSIGNED", + byHostnamePartialAssignment2.assigned, + readingRanksHostnames); + printChunktable( + "HOSTNAME2, LEFTOVER", + byHostnamePartialAssignment2.notAssigned, + rankMetaIn); + /* * Assign chunks by hostnames, once more. * This time, apply a secondary distribution strategy to assign From 79cec4160e560a0960d269fd9856fcd6ead2b3d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 16 Aug 2024 12:05:39 +0200 Subject: [PATCH 10/27] Add Blocks distribution strategy --- include/openPMD/ChunkInfo.hpp | 16 ++++++ .../mpi/OneDimensionalBlockSlicer.hpp | 3 ++ src/ChunkInfo.cpp | 26 ++++++++++ .../mpi/OneDimensionalBlockSlicer.cpp | 52 +++++++++++-------- src/binding/python/ChunkInfo.cpp | 5 ++ test/ParallelIOTest.cpp | 5 ++ 6 files changed, 85 insertions(+), 22 deletions(-) diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp index 2193a6c691..868478f8f6 100644 --- a/include/openPMD/ChunkInfo.hpp +++ b/include/openPMD/ChunkInfo.hpp @@ -235,6 +235,22 @@ namespace chunk_assignment virtual std::unique_ptr clone() const override; }; + struct Blocks : Strategy + { + private: + unsigned int mpi_size, mpi_rank; + + public: + Blocks(unsigned int mpi_rank, unsigned int mpi_size); + + Assignment assign( + PartialAssignment, + RankMeta const &in, + RankMeta const &out) override; + + [[nodiscard]] std::unique_ptr clone() const override; + }; + /** * @brief Strategy that assigns chunks to be read by processes within * the same host that produced the chunk. diff --git a/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp b/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp index cb12da9350..f0d943d972 100644 --- a/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp +++ b/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp @@ -33,6 +33,9 @@ class OneDimensionalBlockSlicer : public BlockSlicer explicit OneDimensionalBlockSlicer(Extent::value_type dim = 0); + static std::pair + n_th_block_inside(size_t length, size_t rank, size_t size); + std::pair sliceBlock(Extent &totalExtent, int size, int rank) override; diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp index 6c5cff166f..ceb074b887 100644 --- a/src/ChunkInfo.cpp +++ b/src/ChunkInfo.cpp @@ -22,6 +22,7 @@ #include "openPMD/ChunkInfo_internal.hpp" #include "openPMD/auxiliary/Mpi.hpp" +#include "openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp" #include // std::sort #include @@ -29,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -335,6 +337,30 @@ namespace chunk_assignment return std::unique_ptr(new RoundRobinOfSourceRanks); } + Blocks::Blocks(unsigned int mpi_rank_in, unsigned int mpi_size_in) + : mpi_size(mpi_size_in), mpi_rank(mpi_rank_in) + {} + + Assignment + Blocks::assign(PartialAssignment pa, RankMeta const &, RankMeta const &) + { + auto [notAssigned, res] = std::move(pa); + auto [myChunksFrom, myChunksTo] = + OneDimensionalBlockSlicer::n_th_block_inside( + notAssigned.size(), mpi_rank, mpi_size); + std::transform( + notAssigned.begin() + myChunksFrom, + notAssigned.begin() + (myChunksFrom + myChunksTo), + std::back_inserter(res[mpi_rank]), + [](WrittenChunkInfo &chunk) { return std::move(chunk); }); + return res; + } + + std::unique_ptr Blocks::clone() const + { + return std::unique_ptr(new Blocks(*this)); + } + ByHostname::ByHostname(std::unique_ptr withinNode) : m_withinNode(std::move(withinNode)) {} diff --git a/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp b/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp index 7fbb734faa..bb71cc29db 100644 --- a/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp +++ b/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp @@ -29,29 +29,23 @@ OneDimensionalBlockSlicer::OneDimensionalBlockSlicer(Extent::value_type dim) : m_dim{dim} {} -std::pair -OneDimensionalBlockSlicer::sliceBlock(Extent &totalExtent, int size, int rank) +std::pair OneDimensionalBlockSlicer::n_th_block_inside( + size_t length, size_t rank, size_t size) { - Offset offs(totalExtent.size(), 0); - if (rank >= size) { - Extent extent(totalExtent.size(), 0); - return std::make_pair(std::move(offs), std::move(extent)); + return {length, 0}; } - auto dim = this->m_dim; - // for more equal balancing, we want the start index // at the upper gaussian bracket of (N/n*rank) // where N the size of the dataset in dimension dim // and n the MPI size // for avoiding integer overflow, this is the same as: // (N div n)*rank + round((N%n)/n*rank) - auto f = [&totalExtent, size, dim](int threadRank) { - auto N = totalExtent[dim]; - auto res = (N / size) * threadRank; - auto padDivident = (N % size) * threadRank; + auto f = [length, size](size_t rank_lambda) { + auto res = (length / size) * rank_lambda; + auto padDivident = (length % size) * rank_lambda; auto pad = padDivident / size; if (pad * size < padDivident) { @@ -60,17 +54,31 @@ OneDimensionalBlockSlicer::sliceBlock(Extent &totalExtent, int size, int rank) return res + pad; }; - offs[dim] = f(rank); + size_t offset = f(rank); + size_t extent = [&]() { + if (rank >= size - 1) + { + return length - offset; + } + else + { + return f(rank + 1) - offset; + } + }(); + return {offset, extent}; +} + +std::pair +OneDimensionalBlockSlicer::sliceBlock(Extent &totalExtent, int size, int rank) +{ + Offset localOffset(totalExtent.size(), 0); Extent localExtent{totalExtent}; - if (rank >= size - 1) - { - localExtent[dim] -= offs[dim]; - } - else - { - localExtent[dim] = f(rank + 1) - offs[dim]; - } - return std::make_pair(std::move(offs), std::move(localExtent)); + + auto [offset_dim, extent_dim] = + n_th_block_inside(totalExtent.at(this->m_dim), rank, size); + localOffset[m_dim] = offset_dim; + localExtent[m_dim] = extent_dim; + return std::make_pair(std::move(localOffset), std::move(localExtent)); } std::unique_ptr OneDimensionalBlockSlicer::clone() const diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp index 6f94cdd8aa..5727e75403 100644 --- a/src/binding/python/ChunkInfo.cpp +++ b/src/binding/python/ChunkInfo.cpp @@ -302,6 +302,11 @@ void init_Chunk(py::module &m) py::class_(m, "RoundRobin").def(py::init<>()); py::class_(m, "RoundRobinOfSourceRanks") .def(py::init<>()); + py::class_(m, "Blocks") + .def( + py::init(), + py::arg("mpi_rank"), + py::arg("mpi_size")); py::class_(m, "ByHostname") .def( diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index ee2923b178..8204326b94 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -2473,6 +2473,11 @@ void adios2_chunk_distribution() chunkTable, rankMetaIn, readingRanksHostnames); printAssignment( "CUBOID SLICE", cuboidSliceAssignment, readingRanksHostnames); + + Blocks blocksStrategy(mpi_rank, mpi_size); + auto blocksAssignment = blocksStrategy.assign( + chunkTable, rankMetaIn, readingRanksHostnames); + printAssignment("BLOCKS", blocksAssignment, readingRanksHostnames); } } From 46213d7ff777a9ecfa03ead0762d378a0331ca53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 16 Aug 2024 14:15:20 +0200 Subject: [PATCH 11/27] Add BlocksOfSourceRanks strategy --- include/openPMD/ChunkInfo.hpp | 16 +++++++++++++ src/ChunkInfo.cpp | 41 ++++++++++++++++++++++++++++++++ src/binding/python/ChunkInfo.cpp | 5 ++++ test/ParallelIOTest.cpp | 8 +++++++ 4 files changed, 70 insertions(+) diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp index 868478f8f6..daa4cc2c5c 100644 --- a/include/openPMD/ChunkInfo.hpp +++ b/include/openPMD/ChunkInfo.hpp @@ -251,6 +251,22 @@ namespace chunk_assignment [[nodiscard]] std::unique_ptr clone() const override; }; + struct BlocksOfSourceRanks : Strategy + { + private: + unsigned int mpi_size, mpi_rank; + + public: + BlocksOfSourceRanks(unsigned int mpi_rank, unsigned int mpi_size); + + Assignment assign( + PartialAssignment, + RankMeta const &in, + RankMeta const &out) override; + + [[nodiscard]] std::unique_ptr clone() const override; + }; + /** * @brief Strategy that assigns chunks to be read by processes within * the same host that produced the chunk. diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp index ceb074b887..6162a49f3a 100644 --- a/src/ChunkInfo.cpp +++ b/src/ChunkInfo.cpp @@ -361,6 +361,47 @@ namespace chunk_assignment return std::unique_ptr(new Blocks(*this)); } + BlocksOfSourceRanks::BlocksOfSourceRanks( + unsigned int mpi_rank_in, unsigned int mpi_size_in) + : mpi_size(mpi_size_in), mpi_rank(mpi_rank_in) + {} + + Assignment BlocksOfSourceRanks::assign( + PartialAssignment pa, RankMeta const &, RankMeta const &) + { + auto [notAssigned, res] = std::move(pa); + std::map> + sortSourceChunksBySourceRank; + for (auto &chunk : notAssigned) + { + auto sourceID = chunk.sourceID; + sortSourceChunksBySourceRank[sourceID].push_back(std::move(chunk)); + } + notAssigned.clear(); + auto [myChunksFrom, myChunksTo] = + OneDimensionalBlockSlicer::n_th_block_inside( + sortSourceChunksBySourceRank.size(), mpi_rank, mpi_size); + auto it = sortSourceChunksBySourceRank.begin(); + for (size_t i = 0; i < myChunksFrom; ++i) + { + ++it; + } + for (size_t i = 0; i < myChunksTo; ++i, ++it) + { + std::transform( + it->second.begin(), + it->second.end(), + std::back_inserter(res[mpi_rank]), + [](WrittenChunkInfo &chunk) { return std::move(chunk); }); + } + return res; + } + + std::unique_ptr BlocksOfSourceRanks::clone() const + { + return std::unique_ptr(new BlocksOfSourceRanks(*this)); + } + ByHostname::ByHostname(std::unique_ptr withinNode) : m_withinNode(std::move(withinNode)) {} diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp index 5727e75403..75e200f5ea 100644 --- a/src/binding/python/ChunkInfo.cpp +++ b/src/binding/python/ChunkInfo.cpp @@ -307,6 +307,11 @@ void init_Chunk(py::module &m) py::init(), py::arg("mpi_rank"), py::arg("mpi_size")); + py::class_(m, "BlocksOfSourceRanks") + .def( + py::init(), + py::arg("mpi_rank"), + py::arg("mpi_size")); py::class_(m, "ByHostname") .def( diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index 8204326b94..3d495a8b95 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -2478,6 +2478,14 @@ void adios2_chunk_distribution() auto blocksAssignment = blocksStrategy.assign( chunkTable, rankMetaIn, readingRanksHostnames); printAssignment("BLOCKS", blocksAssignment, readingRanksHostnames); + + BlocksOfSourceRanks blocksOfSourceRanksStrategy(mpi_rank, mpi_size); + auto blocksOfSourceRanksAssignment = blocksOfSourceRanksStrategy.assign( + chunkTable, rankMetaIn, readingRanksHostnames); + printAssignment( + "BLOCKS OF SOURCE RANKS", + blocksOfSourceRanksAssignment, + readingRanksHostnames); } } From 7024c3f6891966c3382a17a34c80ba1afe11928b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 4 Apr 2025 08:33:00 +0000 Subject: [PATCH 12/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/binding/python/ChunkInfo.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp index 75e200f5ea..55d5c824e5 100644 --- a/src/binding/python/ChunkInfo.cpp +++ b/src/binding/python/ChunkInfo.cpp @@ -294,10 +294,12 @@ void init_Chunk(py::module &m) py::arg("rank_meta_out") = RankMeta()); py::class_(m, "FromPartialStrategy") - .def(py::init([](PartialStrategy const &firstPass, - Strategy const &secondPass) { - return FromPartialStrategy(firstPass.clone(), secondPass.clone()); - })); + .def( + py::init([](PartialStrategy const &firstPass, + Strategy const &secondPass) { + return FromPartialStrategy( + firstPass.clone(), secondPass.clone()); + })); py::class_(m, "RoundRobin").def(py::init<>()); py::class_(m, "RoundRobinOfSourceRanks") From 3ac9533a0d9b5bc3c884d3f53c2c942afd15c4b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 13 May 2025 15:58:58 +0200 Subject: [PATCH 13/27] Add rank info to assign() params --- include/openPMD/ChunkInfo.hpp | 78 ++++++---- src/ChunkInfo.cpp | 137 ++++++++++++------ src/binding/python/ChunkInfo.cpp | 77 +++++----- .../python/openpmd_api/pipe/__main__.py | 39 ++--- test/CoreTest.cpp | 6 +- test/ParallelIOTest.cpp | 27 ++-- 6 files changed, 212 insertions(+), 152 deletions(-) diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp index daa4cc2c5c..2994a9e4e7 100644 --- a/include/openPMD/ChunkInfo.hpp +++ b/include/openPMD/ChunkInfo.hpp @@ -121,7 +121,9 @@ namespace chunk_assignment Assignment assign( ChunkTable, RankMeta const &rankMetaIn, - RankMeta const &rankMetaOut); + RankMeta const &rankMetaOut, + size_t my_rank, + size_t num_ranks); /** * @brief Assign chunks to be loaded to reading processes. * @@ -136,7 +138,9 @@ namespace chunk_assignment virtual Assignment assign( PartialAssignment partialAssignment, RankMeta const &in, - RankMeta const &out) = 0; + RankMeta const &out, + size_t my_rank, + size_t num_ranks) = 0; virtual std::unique_ptr clone() const = 0; @@ -155,8 +159,12 @@ namespace chunk_assignment */ struct PartialStrategy { - PartialAssignment - assign(ChunkTable table, RankMeta const &in, RankMeta const &out); + PartialAssignment assign( + ChunkTable table, + RankMeta const &in, + RankMeta const &out, + size_t my_rank, + size_t num_ranks); /** * @brief Assign chunks to be loaded to reading processes. * @@ -173,7 +181,9 @@ namespace chunk_assignment virtual PartialAssignment assign( PartialAssignment partialAssignment, RankMeta const &in, - RankMeta const &out) = 0; + RankMeta const &out, + size_t my_rank, + size_t num_ranks) = 0; virtual std::unique_ptr clone() const = 0; @@ -201,7 +211,9 @@ namespace chunk_assignment virtual Assignment assign( PartialAssignment, RankMeta const &in, - RankMeta const &out) override; + RankMeta const &out, + size_t my_rank, + size_t num_ranks) override; virtual std::unique_ptr clone() const override; @@ -220,7 +232,9 @@ namespace chunk_assignment Assignment assign( PartialAssignment, RankMeta const &in, - RankMeta const &out) override; + RankMeta const &out, + size_t my_rank, + size_t num_ranks) override; virtual std::unique_ptr clone() const override; }; @@ -230,39 +244,33 @@ namespace chunk_assignment Assignment assign( PartialAssignment, RankMeta const &in, - RankMeta const &out) override; + RankMeta const &out, + size_t my_rank, + size_t num_ranks) override; virtual std::unique_ptr clone() const override; }; struct Blocks : Strategy { - private: - unsigned int mpi_size, mpi_rank; - - public: - Blocks(unsigned int mpi_rank, unsigned int mpi_size); - Assignment assign( PartialAssignment, RankMeta const &in, - RankMeta const &out) override; + RankMeta const &out, + size_t my_rank, + size_t num_ranks) override; [[nodiscard]] std::unique_ptr clone() const override; }; struct BlocksOfSourceRanks : Strategy { - private: - unsigned int mpi_size, mpi_rank; - - public: - BlocksOfSourceRanks(unsigned int mpi_rank, unsigned int mpi_size); - Assignment assign( PartialAssignment, RankMeta const &in, - RankMeta const &out) override; + RankMeta const &out, + size_t my_rank, + size_t num_ranks) override; [[nodiscard]] std::unique_ptr clone() const override; }; @@ -282,7 +290,9 @@ namespace chunk_assignment PartialAssignment assign( PartialAssignment, RankMeta const &in, - RankMeta const &out) override; + RankMeta const &out, + size_t my_rank, + size_t num_ranks) override; virtual std::unique_ptr clone() const override; @@ -303,22 +313,20 @@ namespace chunk_assignment struct ByCuboidSlice : Strategy { ByCuboidSlice( - std::unique_ptr blockSlicer, - Extent totalExtent, - unsigned int mpi_rank, - unsigned int mpi_size); + std::unique_ptr blockSlicer, Extent totalExtent); Assignment assign( PartialAssignment, RankMeta const &in, - RankMeta const &out) override; + RankMeta const &out, + size_t my_rank, + size_t num_ranks) override; virtual std::unique_ptr clone() const override; private: std::unique_ptr blockSlicer; Extent totalExtent; - unsigned int mpi_rank, mpi_size; }; /** @@ -346,7 +354,9 @@ namespace chunk_assignment Assignment assign( PartialAssignment, RankMeta const &in, - RankMeta const &out) override; + RankMeta const &out, + size_t my_rank, + size_t num_ranks) override; virtual std::unique_ptr clone() const override; }; @@ -367,7 +377,9 @@ namespace chunk_assignment Assignment assign( PartialAssignment, RankMeta const &in, - RankMeta const &out) override; + RankMeta const &out, + size_t my_rank, + size_t num_ranks) override; virtual std::unique_ptr clone() const override; }; @@ -388,7 +400,9 @@ namespace chunk_assignment Assignment assign( PartialAssignment, RankMeta const &in, - RankMeta const &out) override; + RankMeta const &out, + size_t my_rank, + size_t num_ranks) override; virtual std::unique_ptr clone() const override; }; diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp index 6162a49f3a..64b9d7f421 100644 --- a/src/ChunkInfo.cpp +++ b/src/ChunkInfo.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #ifdef _WIN32 @@ -214,14 +215,22 @@ namespace chunk_assignment } // namespace Assignment Strategy::assign( - ChunkTable table, RankMeta const &rankIn, RankMeta const &rankOut) + ChunkTable table, + RankMeta const &rankIn, + RankMeta const &rankOut, + size_t my_rank, + size_t num_ranks) { if (rankOut.size() == 0) { throw std::runtime_error("[assignChunks] No output ranks defined"); } return this->assign( - PartialAssignment(std::move(table)), rankIn, rankOut); + PartialAssignment(std::move(table)), + rankIn, + rankOut, + my_rank, + num_ranks); } PartialAssignment::PartialAssignment( @@ -235,10 +244,18 @@ namespace chunk_assignment {} PartialAssignment PartialStrategy::assign( - ChunkTable table, RankMeta const &rankIn, RankMeta const &rankOut) + ChunkTable table, + RankMeta const &rankIn, + RankMeta const &rankOut, + size_t my_rank, + size_t num_ranks) { return this->assign( - PartialAssignment(std::move(table)), rankIn, rankOut); + PartialAssignment(std::move(table)), + rankIn, + rankOut, + my_rank, + num_ranks); } FromPartialStrategy::FromPartialStrategy( @@ -250,12 +267,17 @@ namespace chunk_assignment Assignment FromPartialStrategy::assign( PartialAssignment partialAssignment, RankMeta const &in, - RankMeta const &out) + RankMeta const &out, + size_t my_rank, + size_t num_ranks) { return m_secondPass->assign( - m_firstPass->assign(std::move(partialAssignment), in, out), + m_firstPass->assign( + std::move(partialAssignment), in, out, my_rank, num_ranks), in, - out); + out, + my_rank, + num_ranks); } std::unique_ptr FromPartialStrategy::clone() const @@ -267,7 +289,9 @@ namespace chunk_assignment Assignment RoundRobin::assign( PartialAssignment partialAssignment, RankMeta const &, // ignored parameter - RankMeta const &out) + RankMeta const &out, + size_t /* my_rank */, + size_t /* num_ranks */) { if (out.size() == 0) { @@ -288,8 +312,8 @@ namespace chunk_assignment Assignment &sinkChunks = partialAssignment.assigned; for (auto &chunk : sourceChunks) { - chunk.sourceID = nextRank(); - sinkChunks[chunk.sourceID].push_back(std::move(chunk)); + auto rank = nextRank(); + sinkChunks[rank].push_back(std::move(chunk)); } return sinkChunks; } @@ -302,7 +326,9 @@ namespace chunk_assignment Assignment RoundRobinOfSourceRanks::assign( PartialAssignment partialAssignment, RankMeta const &, // ignored parameter - RankMeta const &out) + RankMeta const &out, + size_t /* my_rank */, + size_t /* num_ranks */) { std::map> sortSourceChunksBySourceRank; @@ -337,21 +363,21 @@ namespace chunk_assignment return std::unique_ptr(new RoundRobinOfSourceRanks); } - Blocks::Blocks(unsigned int mpi_rank_in, unsigned int mpi_size_in) - : mpi_size(mpi_size_in), mpi_rank(mpi_rank_in) - {} - - Assignment - Blocks::assign(PartialAssignment pa, RankMeta const &, RankMeta const &) + Assignment Blocks::assign( + PartialAssignment pa, + RankMeta const &, + RankMeta const &, + size_t my_rank, + size_t num_ranks) { auto [notAssigned, res] = std::move(pa); auto [myChunksFrom, myChunksTo] = OneDimensionalBlockSlicer::n_th_block_inside( - notAssigned.size(), mpi_rank, mpi_size); + notAssigned.size(), my_rank, num_ranks); std::transform( notAssigned.begin() + myChunksFrom, notAssigned.begin() + (myChunksFrom + myChunksTo), - std::back_inserter(res[mpi_rank]), + std::back_inserter(res[my_rank]), [](WrittenChunkInfo &chunk) { return std::move(chunk); }); return res; } @@ -361,13 +387,12 @@ namespace chunk_assignment return std::unique_ptr(new Blocks(*this)); } - BlocksOfSourceRanks::BlocksOfSourceRanks( - unsigned int mpi_rank_in, unsigned int mpi_size_in) - : mpi_size(mpi_size_in), mpi_rank(mpi_rank_in) - {} - Assignment BlocksOfSourceRanks::assign( - PartialAssignment pa, RankMeta const &, RankMeta const &) + PartialAssignment pa, + RankMeta const &, + RankMeta const &, + size_t my_rank, + size_t num_ranks) { auto [notAssigned, res] = std::move(pa); std::map> @@ -380,7 +405,7 @@ namespace chunk_assignment notAssigned.clear(); auto [myChunksFrom, myChunksTo] = OneDimensionalBlockSlicer::n_th_block_inside( - sortSourceChunksBySourceRank.size(), mpi_rank, mpi_size); + sortSourceChunksBySourceRank.size(), my_rank, num_ranks); auto it = sortSourceChunksBySourceRank.begin(); for (size_t i = 0; i < myChunksFrom; ++i) { @@ -391,7 +416,7 @@ namespace chunk_assignment std::transform( it->second.begin(), it->second.end(), - std::back_inserter(res[mpi_rank]), + std::back_inserter(res[my_rank]), [](WrittenChunkInfo &chunk) { return std::move(chunk); }); } return res; @@ -407,7 +432,11 @@ namespace chunk_assignment {} PartialAssignment ByHostname::assign( - PartialAssignment res, RankMeta const &in, RankMeta const &out) + PartialAssignment res, + RankMeta const &in, + RankMeta const &out, + size_t my_rank, + size_t /* num_ranks */) { // collect chunks by hostname std::map chunkGroups; @@ -460,16 +489,25 @@ namespace chunk_assignment else { RankMeta ranksOnTargetNode; - for (unsigned int rank : it->second) + size_t local_rank = 0; + size_t counter = 0; + for (auto rank : it->second) { ranksOnTargetNode[rank] = hostname; + if (rank == my_rank) + { + local_rank = counter; + } + ++counter; } Assignment swapped; swapped.swap(sinkChunks); sinkChunks = m_withinNode->assign( PartialAssignment(chunkGroup.second, std::move(swapped)), in, - ranksOnTargetNode); + ranksOnTargetNode, + local_rank, + it->second.size()); } } return res; @@ -482,14 +520,9 @@ namespace chunk_assignment } ByCuboidSlice::ByCuboidSlice( - std::unique_ptr blockSlicer_in, - Extent totalExtent_in, - unsigned int mpi_rank_in, - unsigned int mpi_size_in) + std::unique_ptr blockSlicer_in, Extent totalExtent_in) : blockSlicer(std::move(blockSlicer_in)) , totalExtent(std::move(totalExtent_in)) - , mpi_rank(mpi_rank_in) - , mpi_size(mpi_size_in) {} namespace @@ -628,14 +661,18 @@ namespace chunk_assignment } // namespace Assignment ByCuboidSlice::assign( - PartialAssignment res, RankMeta const &, RankMeta const &) + PartialAssignment res, + RankMeta const &, + RankMeta const &, + size_t my_rank, + size_t num_ranks) { ChunkTable &sourceSide = res.notAssigned; Assignment &sinkSide = res.assigned; Offset myOffset; Extent myExtent; std::tie(myOffset, myExtent) = - blockSlicer->sliceBlock(totalExtent, mpi_size, mpi_rank); + blockSlicer->sliceBlock(totalExtent, num_ranks, my_rank); for (auto &chunk : sourceSide) { @@ -647,7 +684,7 @@ namespace chunk_assignment goto outer_loop; } } - sinkSide[mpi_rank].push_back(std::move(chunk)); + sinkSide[my_rank].push_back(std::move(chunk)); outer_loop:; } @@ -656,8 +693,8 @@ namespace chunk_assignment std::unique_ptr ByCuboidSlice::clone() const { - return std::unique_ptr(new ByCuboidSlice( - blockSlicer->clone(), totalExtent, mpi_rank, mpi_size)); + return std::unique_ptr( + new ByCuboidSlice(blockSlicer->clone(), totalExtent)); } BinPacking::BinPacking(size_t splitAlongDimension_in) @@ -665,7 +702,11 @@ namespace chunk_assignment {} Assignment BinPacking::assign( - PartialAssignment res, RankMeta const &, RankMeta const &sinkRanks) + PartialAssignment res, + RankMeta const &, + RankMeta const &sinkRanks, + size_t /* my_rank */, + size_t /* num_ranks */) { ChunkTable &sourceChunks = res.notAssigned; Assignment &sinkChunks = res.assigned; @@ -768,7 +809,11 @@ namespace chunk_assignment FailingStrategy::FailingStrategy() = default; Assignment FailingStrategy::assign( - PartialAssignment assignment, RankMeta const &, RankMeta const &) + PartialAssignment assignment, + RankMeta const &, + RankMeta const &, + size_t /* my_rank */, + size_t /* num_ranks */) { if (assignment.notAssigned.empty()) { @@ -789,7 +834,11 @@ namespace chunk_assignment DiscardingStrategy::DiscardingStrategy() = default; Assignment DiscardingStrategy::assign( - PartialAssignment assignment, RankMeta const &, RankMeta const &) + PartialAssignment assignment, + RankMeta const &, + RankMeta const &, + size_t /* my_rank */, + size_t /* num_ranks */) { return assignment.assigned; } diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp index 55d5c824e5..35cf798e7a 100644 --- a/src/binding/python/ChunkInfo.cpp +++ b/src/binding/python/ChunkInfo.cpp @@ -127,10 +127,12 @@ struct PyStrategy chunk_assignment::Assignment assign( chunk_assignment::PartialAssignment assignment, chunk_assignment::RankMeta const &in, - chunk_assignment::RankMeta const &out) override + chunk_assignment::RankMeta const &out, + size_t my_rank, + size_t num_ranks) override { return call_virtual( - "assign", std::move(assignment), in, out); + "assign", std::move(assignment), in, out, my_rank, num_ranks); } [[nodiscard]] std::unique_ptr clone() const override @@ -146,10 +148,12 @@ struct PyPartialStrategy chunk_assignment::PartialAssignment assign( chunk_assignment::PartialAssignment assignment, chunk_assignment::RankMeta const &in, - chunk_assignment::RankMeta const &out) override + chunk_assignment::RankMeta const &out, + size_t my_rank, + size_t num_ranks) override { return call_virtual( - "assign", std::move(assignment), in, out); + "assign", std::move(assignment), in, out, my_rank, num_ranks); } [[nodiscard]] std::unique_ptr clone() const override @@ -259,39 +263,59 @@ void init_Chunk(py::module &m) .def(py::init<>()) .def( "assign", - py::overload_cast( - &PartialStrategy::assign), + py::overload_cast< + ChunkTable, + RankMeta const &, + RankMeta const &, + size_t, + size_t>(&PartialStrategy::assign), py::arg("chunk_table"), py::arg("rank_meta_in") = RankMeta(), - py::arg("rank_meta_out") = RankMeta()) + py::arg("rank_meta_out") = RankMeta(), + py::arg("my_rank") = 0, + py::arg("num_ranks") = 1) .def( "assign", py::overload_cast< PartialAssignment, RankMeta const &, - RankMeta const &>(&PartialStrategy::assign), + RankMeta const &, + size_t, + size_t>(&PartialStrategy::assign), py::arg("partial_assignment"), py::arg("rank_meta_in") = RankMeta(), - py::arg("rank_meta_out") = RankMeta()); + py::arg("rank_meta_out") = RankMeta(), + py::arg("my_rank") = 0, + py::arg("num_ranks") = 1); py::class_(m, "Strategy") .def(py::init<>()) .def( "assign", - py::overload_cast( - &Strategy::assign), + py::overload_cast< + ChunkTable, + RankMeta const &, + RankMeta const &, + size_t, + size_t>(&Strategy::assign), py::arg("chunk_table"), py::arg("rank_meta_in") = RankMeta(), - py::arg("rank_meta_out") = RankMeta()) + py::arg("rank_meta_out") = RankMeta(), + py::arg("my_rank") = 0, + py::arg("num_ranks") = 1) .def( "assign", py::overload_cast< PartialAssignment, RankMeta const &, - RankMeta const &>(&Strategy::assign), + RankMeta const &, + size_t, + size_t>(&Strategy::assign), py::arg("partial_assignment"), py::arg("rank_meta_in") = RankMeta(), - py::arg("rank_meta_out") = RankMeta()); + py::arg("rank_meta_out") = RankMeta(), + py::arg("my_rank") = 0, + py::arg("num_ranks") = 1); py::class_(m, "FromPartialStrategy") .def( @@ -304,16 +328,9 @@ void init_Chunk(py::module &m) py::class_(m, "RoundRobin").def(py::init<>()); py::class_(m, "RoundRobinOfSourceRanks") .def(py::init<>()); - py::class_(m, "Blocks") - .def( - py::init(), - py::arg("mpi_rank"), - py::arg("mpi_size")); + py::class_(m, "Blocks").def(py::init<>()); py::class_(m, "BlocksOfSourceRanks") - .def( - py::init(), - py::arg("mpi_rank"), - py::arg("mpi_size")); + .def(py::init<>()); py::class_(m, "ByHostname") .def( @@ -331,20 +348,12 @@ void init_Chunk(py::module &m) py::class_(m, "ByCuboidSlice") .def( - py::init([](BlockSlicer const &blockSlicer, - Extent totalExtent, - unsigned int mpi_rank, - unsigned int mpi_size) { + py::init([](BlockSlicer const &blockSlicer, Extent totalExtent) { return ByCuboidSlice( - blockSlicer.clone(), - std::move(totalExtent), - mpi_rank, - mpi_size); + blockSlicer.clone(), std::move(totalExtent)); }), py::arg("block_slicer"), - py::arg("total_extent"), - py::arg("mpi_rank"), - py::arg("mpi_size")); + py::arg("total_extent")); py::class_(m, "BinPacking") .def(py::init<>()) diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py index 40e616e3bd..ad9c31c48e 100644 --- a/src/binding/python/openpmd_api/pipe/__main__.py +++ b/src/binding/python/openpmd_api/pipe/__main__.py @@ -116,38 +116,34 @@ def __init__(self, source, dynamicView, offset, extent): # Example how to implement a simple partial strategy in Python class LoadOne(io.PartialStrategy): - def __init__(self, rank): + def __init__(self): super().__init__() - self.rank = rank - def assign(self, assignment, *_): + def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks): element = assignment.not_assigned.pop() - if self.rank not in assignment.assigned: - assignment.assigned[self.rank] = [element] + if my_rank not in assignment.assigned: + assignment.assigned[my_rank] = [element] else: - assignment.assigned[self.rank].append(element) + assignment.assigned[my_rank].append(element) return assignment # Example how to implement a simple strategy in Python class LoadAll(io.Strategy): - def __init__(self, rank): + def __init__(self): super().__init__() - self.rank = rank - def assign(self, assignment, *_): + def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks): res = assignment.assigned - if self.rank not in res: - res[self.rank] = assignment.not_assigned + if my_rank not in res: + res[my_rank] = assignment.not_assigned else: - res[self.rank].extend(assignment.not_assigned) + res[my_rank].extend(assignment.not_assigned) return res def distribution_strategy(dataset_extent, - mpi_rank, - mpi_size, strategy_identifier=None): if strategy_identifier is None or not strategy_identifier: if 'OPENPMD_CHUNK_DISTRIBUTION' in os.environ: @@ -158,24 +154,19 @@ def distribution_strategy(dataset_extent, match = re.search('hostname_(.*)_(.*)', strategy_identifier) if match is not None: inside_node = distribution_strategy(dataset_extent, - mpi_rank, - mpi_size, strategy_identifier=match.group(1)) second_phase = distribution_strategy( dataset_extent, - mpi_rank, - mpi_size, strategy_identifier=match.group(2)) return io.FromPartialStrategy(io.ByHostname(inside_node), second_phase) elif strategy_identifier == 'all': - return io.FromPartialStrategy(LoadOne(mpi_rank), LoadAll(mpi_rank)) + return io.FromPartialStrategy(LoadOne(), LoadAll()) elif strategy_identifier == 'roundrobin': return io.RoundRobin() elif strategy_identifier == 'binpacking': return io.BinPacking() elif strategy_identifier == 'slicedataset': - return io.ByCuboidSlice(io.OneDimensionalBlockSlicer(), dataset_extent, - mpi_rank, mpi_size) + return io.ByCuboidSlice(io.OneDimensionalBlockSlicer(), dataset_extent) elif strategy_identifier == 'fail': return io.FailingStrategy() else: @@ -319,10 +310,10 @@ def __copy(self, src, dest, current_path="/data/"): dest.make_constant(src.get_attribute("value")) else: chunk_table = src.available_chunks() - strategy = distribution_strategy(shape, self.comm.rank, - self.comm.size) + strategy = distribution_strategy(shape) my_chunks = strategy.assign(chunk_table, self.inranks, - self.outranks) + self.outranks, + self.comm.rank, self.comm.size) for chunk in my_chunks[ self.comm.rank] if self.comm.rank in my_chunks else []: if debug: diff --git a/test/CoreTest.cpp b/test/CoreTest.cpp index b9820d8222..b1e82b7498 100644 --- a/test/CoreTest.cpp +++ b/test/CoreTest.cpp @@ -101,7 +101,7 @@ void print(RankMeta const &meta, Assignment const &table) << std::endl; for (auto const &chunk : chunkList) { - std::cout << "\t[Offset: "; + std::cout << "\t[From " << chunk.sourceID << "\tOffset: "; for (auto offset : chunk.offset) { std::cout << offset << ", "; @@ -127,8 +127,8 @@ TEST_CASE("chunk_assignment", "[core]") FromPartialStrategy fullStrategy( std::make_unique(std::move(byHostname)), std::make_unique()); - Assignment res = - fullStrategy.assign(params.table, params.metaSource, params.metaSink); + Assignment res = fullStrategy.assign( + params.table, params.metaSource, params.metaSink, 0, 2); std::cout << "\nRESULTS:" << std::endl; test_chunk_assignment::print(params.metaSink, res); } diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index 3d495a8b95..c8f60e14fe 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -2389,7 +2389,7 @@ void adios2_chunk_distribution() */ RoundRobin roundRobinStrategy; auto roundRobinAssignment = roundRobinStrategy.assign( - chunkTable, rankMetaIn, readingRanksHostnames); + chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size); printAssignment( "ROUND ROBIN", roundRobinAssignment, readingRanksHostnames); @@ -2405,8 +2405,8 @@ void adios2_chunk_distribution() */ ByHostname byHostname( std::make_unique(/* splitAlongDimension = */ 1)); - auto byHostnamePartialAssignment = - byHostname.assign(chunkTable, rankMetaIn, readingRanksHostnames); + auto byHostnamePartialAssignment = byHostname.assign( + chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size); printAssignment( "HOSTNAME, ASSIGNED", byHostnamePartialAssignment.assigned, @@ -2422,8 +2422,8 @@ void adios2_chunk_distribution() * sink rank. Needed in some domains. */ ByHostname byHostname2(std::make_unique()); - auto byHostnamePartialAssignment2 = - byHostname2.assign(chunkTable, rankMetaIn, readingRanksHostnames); + auto byHostnamePartialAssignment2 = byHostname2.assign( + chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size); printAssignment( "HOSTNAME2, ASSIGNED", byHostnamePartialAssignment2.assigned, @@ -2446,7 +2446,7 @@ void adios2_chunk_distribution() std::make_unique(std::move(byHostname)), std::make_unique(/* splitAlongDimension = */ 1)); auto fromPartialAssignment = fromPartialStrategy.assign( - chunkTable, rankMetaIn, readingRanksHostnames); + chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size); printAssignment( "HOSTNAME WITH SECOND PASS", fromPartialAssignment, @@ -2465,23 +2465,20 @@ void adios2_chunk_distribution() * in others such as this one, it's an unneeded overhead.) */ ByCuboidSlice cuboidSliceStrategy( - std::make_unique(1), - E_x.getExtent(), - mpi_rank, - mpi_size); + std::make_unique(1), E_x.getExtent()); auto cuboidSliceAssignment = cuboidSliceStrategy.assign( - chunkTable, rankMetaIn, readingRanksHostnames); + chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size); printAssignment( "CUBOID SLICE", cuboidSliceAssignment, readingRanksHostnames); - Blocks blocksStrategy(mpi_rank, mpi_size); + Blocks blocksStrategy; auto blocksAssignment = blocksStrategy.assign( - chunkTable, rankMetaIn, readingRanksHostnames); + chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size); printAssignment("BLOCKS", blocksAssignment, readingRanksHostnames); - BlocksOfSourceRanks blocksOfSourceRanksStrategy(mpi_rank, mpi_size); + BlocksOfSourceRanks blocksOfSourceRanksStrategy; auto blocksOfSourceRanksAssignment = blocksOfSourceRanksStrategy.assign( - chunkTable, rankMetaIn, readingRanksHostnames); + chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size); printAssignment( "BLOCKS OF SOURCE RANKS", blocksOfSourceRanksAssignment, From 337d21befcc504190d49951d68f9d2e43b5a0673 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 15 Jul 2025 12:01:14 +0200 Subject: [PATCH 14/27] Fix the Python trampoline logic --- src/binding/python/ChunkInfo.cpp | 37 ++++++++++---------------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp index 35cf798e7a..ed31202c82 100644 --- a/src/binding/python/ChunkInfo.cpp +++ b/src/binding/python/ChunkInfo.cpp @@ -25,6 +25,7 @@ #include "openPMD/binding/python/Common.hpp" #include +#include #include #include // std::move @@ -46,15 +47,6 @@ template struct ClonableTrampoline { - struct OriginalInstance - { - py::handle pythonObject; - - ~OriginalInstance() - { - pythonObject.dec_ref(); - } - }; /* * If the shared pointer is empty, this object is the original object owned * by Python and the Python handle can be acquired by: @@ -64,16 +56,13 @@ struct ClonableTrampoline * By only storing this member in copied instances, but not in the original * instance, we avoid a memory cycle and ensure clean destruction. */ - std::shared_ptr m_originalInstance; + std::shared_ptr m_originalInstance; - [[nodiscard]] py::handle get_python_handle() const + [[nodiscard]] py::object get_python_handle() const { if (m_originalInstance) { - // std::cout << "Refcount " - // << m_originalInstance->pythonObject.ref_count() - // << std::endl; - return m_originalInstance->pythonObject; + return *m_originalInstance; } else { @@ -86,7 +75,7 @@ struct ClonableTrampoline Res call_virtual(std::string const &nameOfPythonMethod, Args &&...args) { py::gil_scoped_acquire gil; - auto ptr = get_python_handle().template cast(); + auto ptr = get_python_handle().template cast(); auto fun = py::get_override(ptr, nameOfPythonMethod.c_str()); if (!fun) { @@ -107,14 +96,9 @@ struct ClonableTrampoline } else { - OriginalInstance oi; - oi.pythonObject = py::cast(self); - // no idea why we would need this twice, but we do - oi.pythonObject.inc_ref(); - oi.pythonObject.inc_ref(); auto res = std::make_unique(*self); res->m_originalInstance = - std::make_shared(std::move(oi)); + std::make_shared(py::cast(self)); return res; } } @@ -259,8 +243,7 @@ void init_Chunk(py::module &m) py::bind_map(m, "RankMeta"); - py::class_(m, "PartialStrategy") - .def(py::init<>()) + py::class_(m, "PartialStrategyCpp") .def( "assign", py::overload_cast< @@ -287,9 +270,10 @@ void init_Chunk(py::module &m) py::arg("rank_meta_out") = RankMeta(), py::arg("my_rank") = 0, py::arg("num_ranks") = 1); + py::class_(m, "PartialStrategy") + .def(py::init<>()); - py::class_(m, "Strategy") - .def(py::init<>()) + py::class_(m, "StrategyCpp") .def( "assign", py::overload_cast< @@ -316,6 +300,7 @@ void init_Chunk(py::module &m) py::arg("rank_meta_out") = RankMeta(), py::arg("my_rank") = 0, py::arg("num_ranks") = 1); + py::class_(m, "Strategy").def(py::init<>()); py::class_(m, "FromPartialStrategy") .def( From 14b2559c5bfd3b8e16b180ca03367f4a3d2d9458 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 14 Jul 2023 14:55:10 +0200 Subject: [PATCH 15/27] Use discard strategy as second run, only consider my own hostname --- src/binding/python/openpmd_api/pipe/__main__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py index ad9c31c48e..0f3348918b 100644 --- a/src/binding/python/openpmd_api/pipe/__main__.py +++ b/src/binding/python/openpmd_api/pipe/__main__.py @@ -169,6 +169,8 @@ def distribution_strategy(dataset_extent, return io.ByCuboidSlice(io.OneDimensionalBlockSlicer(), dataset_extent) elif strategy_identifier == 'fail': return io.FailingStrategy() + elif strategy_identifier == 'discard': + return io.DiscardingStrategy() else: raise RuntimeError("Unknown distribution strategy: " + strategy_identifier) From 941fe8191f1cd822b12710a1f1b49d33def75fa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 7 Mar 2023 16:47:47 +0100 Subject: [PATCH 16/27] IncreaseGranularity strategy supports different granularities at write and read sides --- .../python/openpmd_api/pipe/__main__.py | 125 +++++++++++++++++- 1 file changed, 123 insertions(+), 2 deletions(-) diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py index 0f3348918b..0ec5c91599 100644 --- a/src/binding/python/openpmd_api/pipe/__main__.py +++ b/src/binding/python/openpmd_api/pipe/__main__.py @@ -127,8 +127,121 @@ def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks): assignment.assigned[my_rank].append(element) return assignment +class IncreaseGranularity(io.PartialStrategy): + def __init__( + self, + granularity_in, + granularity_out, + inner_distribution, + ): + super().__init__() + self.inner_distribution = inner_distribution + self.granularity_in = granularity_in + self.granularity_out = granularity_out + + def assign(self, assignment, in_ranks, out_ranks, my_rank, num_ranks): + if "in_ranks_inner" in dir(self): + return self.inner_distribution.assign( + assignment, self.in_ranks_inner, self.out_ranks_inner + ) + + def hosts_in_order(rank_assignment): + already_seen = set() + res = [] + for (_, hostname) in rank_assignment.items(): + if hostname not in already_seen: + already_seen.add(hostname) + res.append(hostname) + return res + + in_hosts_in_order = hosts_in_order(in_ranks) + out_hosts_in_order = hosts_in_order(out_ranks) + + # Creates names "0", "1", "2", ... for the meta hosts and maps the real + # host names to the meta host names + def hostname_to_hostgroup(ordered_hosts, granularity): + res = {} # real host -> host group + current_meta_host = 0 + granularity_counter = 0 + for host in ordered_hosts: + res[host] = str(current_meta_host) + granularity_counter += 1 + if granularity_counter >= granularity: + granularity_counter = 0 + current_meta_host += 1 + return res + + in_hostname_to_hostgroup = hostname_to_hostgroup( + in_hosts_in_order, self.granularity_in + ) + out_hostname_to_hostgroup = hostname_to_hostgroup( + out_hosts_in_order, self.granularity_out + ) + + # Creates `in_ranks` and `out_ranks` for the inner call, based on the + # meta hosts created above + def inner_rank_assignment(outer_rank_assignment, hostname_to_hostgroup): + res = {} + for (rank, hostname) in outer_rank_assignment.items(): + res[rank] = hostname_to_hostgroup[hostname] + return res + + self.in_ranks_inner = inner_rank_assignment(in_ranks, in_hostname_to_hostgroup) + self.out_ranks_inner = inner_rank_assignment( + out_ranks, out_hostname_to_hostgroup + ) + + # # we only care about the local host (why tho?) + # local_host = self.out_ranks_inner[my_rank] + # # restrict out_ranks_inner to those ranks + # # that run on the current meta host + # self.out_ranks_inner = { + # rank: host + # for rank, host in self.out_ranks_inner.items() + # if host == local_host + # } + + return self.inner_distribution.assign( + assignment, self.in_ranks_inner, self.out_ranks_inner, my_rank, num_ranks + ) + +class MergingStrategy(io.Strategy): + def __init__(self, inner_strategy): + super().__init__() + self.inner_strategy = inner_strategy + + def assign(self, assignment, in_ranks, out_ranks): + res = self.inner_strategy.assign(assignment, in_ranks, out_ranks) + for out_rank, assignment in res.items(): + merged = assignment.merge_chunks_from_same_sourceID() + assignment.clear() + for in_rank, chunks in merged.items(): + for chunk in chunks: + assignment.append( + io.WrittenChunkInfo(chunk.offset, chunk.extent, in_rank) + ) + return res + -# Example how to implement a simple strategy in Python +# strategy = IncreaseGranularity(2, 1) +# assignment = [ +# io.WrittenChunkInfo([0], [1], 0), +# io.WrittenChunkInfo([1], [1], 1), +# io.WrittenChunkInfo([2], [1], 2), +# io.WrittenChunkInfo([3], [1], 3), +# ] +# in_ranks = {0: "host0", 1: "host1", 2: "host3", 3: "host4"} +# out_ranks = {0: "host2", 1: "host5"} +# res = strategy.assign(assignment, in_ranks, out_ranks) +# print(f"NOT ASSIGNED: {len(res.not_assigned)} chunks") +# print("ASSIGNED:") +# for rank, chunks in res.assigned.items(): +# print(f"\tRANK {rank}:", end='') +# for chunk in chunks: +# print(f" [{chunk.offset}-{chunk.extent}]", end='') +# print() + +#Example how to implement a simple strategy in Python class LoadAll(io.Strategy): def __init__(self): @@ -159,8 +272,15 @@ def distribution_strategy(dataset_extent, dataset_extent, strategy_identifier=match.group(2)) return io.FromPartialStrategy(io.ByHostname(inside_node), second_phase) + elif strategy_identifier == 'fan_in': + granularity = os.environ['OPENPMD_FAN_IN'] + granularity = int(granularity) + return IncreaseGranularity( + granularity, 1, + io.FromPartialStrategy(io.ByHostname(io.RoundRobin()), + io.DiscardingStrategy())) elif strategy_identifier == 'all': - return io.FromPartialStrategy(LoadOne(), LoadAll()) + return io.FromPartialStrategy(IncreaseGranularity(5), LoadAll()) elif strategy_identifier == 'roundrobin': return io.RoundRobin() elif strategy_identifier == 'binpacking': @@ -312,6 +432,7 @@ def __copy(self, src, dest, current_path="/data/"): dest.make_constant(src.get_attribute("value")) else: chunk_table = src.available_chunks() + # todo buffer the strategy strategy = distribution_strategy(shape) my_chunks = strategy.assign(chunk_table, self.inranks, self.outranks, From 1e6889927a865e7b318611144aeb977adce4a8a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 7 May 2025 17:00:03 +0200 Subject: [PATCH 17/27] Add blocksofsourcerank to pipe script --- src/binding/python/openpmd_api/pipe/__main__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py index 0ec5c91599..79e3c730ec 100644 --- a/src/binding/python/openpmd_api/pipe/__main__.py +++ b/src/binding/python/openpmd_api/pipe/__main__.py @@ -291,6 +291,8 @@ def distribution_strategy(dataset_extent, return io.FailingStrategy() elif strategy_identifier == 'discard': return io.DiscardingStrategy() + elif strategy_identifier == 'blocksofsourceranks': + return io.BlocksOfSourceRanks() else: raise RuntimeError("Unknown distribution strategy: " + strategy_identifier) From 2511f6c1ef4aecefaf0474c6918c654c2bdc16ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 15 Jul 2025 14:33:16 +0200 Subject: [PATCH 18/27] CI fixes --- include/openPMD/ChunkInfo.hpp | 20 ++++++++++++++++++ .../python/openpmd_api/pipe/__main__.py | 21 ++++++++++++------- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp index 2994a9e4e7..96733c85d2 100644 --- a/include/openPMD/ChunkInfo.hpp +++ b/include/openPMD/ChunkInfo.hpp @@ -133,6 +133,16 @@ namespace chunk_assignment * Merge the unassigned chunks into the partially assigned table. * @param in Meta information on writing processes, e.g. hostnames. * @param out Meta information on reading processes, e.g. hostnames. + * @param my_rank Rank identifier for the current process. Will be + * considered by some distribution strategies that may be called + * for only a subselection of the data space (e.g. for + * distributing data within processes on the same compute node + * in a cluster). + * @param num_ranks Number of processes among which chunks are to be + * distributed. Will be considered by some distribution + * strategies that may be called for only a subselection of the + * data space (e.g. for distributing data within processes on the + * same compute node in a cluster). * @return ChunkTable A table that assigns chunks to reading processes. */ virtual Assignment assign( @@ -174,6 +184,16 @@ namespace chunk_assignment * Merge the unassigned chunks into the partially assigned table. * @param in Meta information on writing processes, e.g. hostnames. * @param out Meta information on reading processes, e.g. hostnames. + * @param my_rank Rank identifier for the current process. Will be + * considered by some distribution strategies that may be called + * for only a subselection of the data space (e.g. for + * distributing data within processes on the same compute node + * in a cluster). + * @param num_ranks Number of processes among which chunks are to be + * distributed. Will be considered by some distribution + * strategies that may be called for only a subselection of the + * data space (e.g. for distributing data within processes on the + * same compute node in a cluster). * @return PartialAssignment Two chunktables, one of leftover chunks * that were not assigned and one that assigns chunks to * reading processes. diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py index 79e3c730ec..9eae8085d1 100644 --- a/src/binding/python/openpmd_api/pipe/__main__.py +++ b/src/binding/python/openpmd_api/pipe/__main__.py @@ -127,6 +127,7 @@ def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks): assignment.assigned[my_rank].append(element) return assignment + class IncreaseGranularity(io.PartialStrategy): def __init__( self, @@ -180,13 +181,15 @@ def hostname_to_hostgroup(ordered_hosts, granularity): # Creates `in_ranks` and `out_ranks` for the inner call, based on the # meta hosts created above - def inner_rank_assignment(outer_rank_assignment, hostname_to_hostgroup): + def inner_rank_assignment( + outer_rank_assignment, hostname_to_hostgroup): res = {} for (rank, hostname) in outer_rank_assignment.items(): res[rank] = hostname_to_hostgroup[hostname] return res - self.in_ranks_inner = inner_rank_assignment(in_ranks, in_hostname_to_hostgroup) + self.in_ranks_inner = \ + inner_rank_assignment(in_ranks, in_hostname_to_hostgroup) self.out_ranks_inner = inner_rank_assignment( out_ranks, out_hostname_to_hostgroup ) @@ -202,9 +205,12 @@ def inner_rank_assignment(outer_rank_assignment, hostname_to_hostgroup): # } return self.inner_distribution.assign( - assignment, self.in_ranks_inner, self.out_ranks_inner, my_rank, num_ranks + assignment, + self.in_ranks_inner, self.out_ranks_inner, + my_rank, num_ranks ) + class MergingStrategy(io.Strategy): def __init__(self, inner_strategy): super().__init__() @@ -218,7 +224,8 @@ def assign(self, assignment, in_ranks, out_ranks): for in_rank, chunks in merged.items(): for chunk in chunks: assignment.append( - io.WrittenChunkInfo(chunk.offset, chunk.extent, in_rank) + io.WrittenChunkInfo( + chunk.offset, chunk.extent, in_rank) ) return res @@ -241,7 +248,7 @@ def assign(self, assignment, in_ranks, out_ranks): # print(f" [{chunk.offset}-{chunk.extent}]", end='') # print() -#Example how to implement a simple strategy in Python +# Example how to implement a simple strategy in Python class LoadAll(io.Strategy): def __init__(self): @@ -266,8 +273,8 @@ def distribution_strategy(dataset_extent, strategy_identifier = 'hostname_binpacking_slicedataset' # default match = re.search('hostname_(.*)_(.*)', strategy_identifier) if match is not None: - inside_node = distribution_strategy(dataset_extent, - strategy_identifier=match.group(1)) + inside_node = distribution_strategy( + dataset_extent, strategy_identifier=match.group(1)) second_phase = distribution_strategy( dataset_extent, strategy_identifier=match.group(2)) From 409b2dd74d09579b6ce0a11cc38b33fd1ba89168 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 15 Jul 2025 16:51:21 +0200 Subject: [PATCH 19/27] Move BlockSlicer somewhere else --- CMakeLists.txt | 2 +- include/openPMD/ChunkInfo.hpp | 9 ++-- include/openPMD/Streaming.hpp | 9 ---- include/openPMD/auxiliary/BlockSlicer.hpp | 53 +++++++++++++++++++ .../auxiliary/OneDimensionalBlockSlicer.hpp | 44 +++++++++++++++ include/openPMD/benchmark/mpi/BlockSlicer.hpp | 34 +++--------- .../mpi/OneDimensionalBlockSlicer.hpp | 24 +++------ src/ChunkInfo.cpp | 9 ++-- .../OneDimensionalBlockSlicer.cpp | 8 ++- src/binding/python/ChunkInfo.cpp | 10 ++-- test/ParallelIOTest.cpp | 5 +- 11 files changed, 131 insertions(+), 76 deletions(-) create mode 100644 include/openPMD/auxiliary/BlockSlicer.hpp create mode 100644 include/openPMD/auxiliary/OneDimensionalBlockSlicer.hpp rename src/{benchmark/mpi => auxiliary}/OneDimensionalBlockSlicer.cpp (95%) diff --git a/CMakeLists.txt b/CMakeLists.txt index b6f01f3d2e..49b62a8bbd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -422,7 +422,7 @@ set(CORE_SOURCE src/backend/PatchRecord.cpp src/backend/PatchRecordComponent.cpp src/backend/Writable.cpp - src/benchmark/mpi/OneDimensionalBlockSlicer.cpp + src/auxiliary/OneDimensionalBlockSlicer.cpp src/helper/list_series.cpp src/snapshots/ContainerImpls.cpp src/snapshots/ContainerTraits.cpp diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp index 96733c85d2..b0b9e549f3 100644 --- a/include/openPMD/ChunkInfo.hpp +++ b/include/openPMD/ChunkInfo.hpp @@ -23,14 +23,14 @@ #include "openPMD/config.hpp" #include "openPMD/Dataset.hpp" // Offset, Extent -#include "openPMD/benchmark/mpi/BlockSlicer.hpp" -#include +#include "openPMD/auxiliary/BlockSlicer.hpp" #if openPMD_HAVE_MPI #include #endif #include +#include #include #include @@ -333,7 +333,8 @@ namespace chunk_assignment struct ByCuboidSlice : Strategy { ByCuboidSlice( - std::unique_ptr blockSlicer, Extent totalExtent); + std::unique_ptr blockSlicer, + Extent totalExtent); Assignment assign( PartialAssignment, @@ -345,7 +346,7 @@ namespace chunk_assignment virtual std::unique_ptr clone() const override; private: - std::unique_ptr blockSlicer; + std::unique_ptr blockSlicer; Extent totalExtent; }; diff --git a/include/openPMD/Streaming.hpp b/include/openPMD/Streaming.hpp index 8d1a283761..47e7c39681 100644 --- a/include/openPMD/Streaming.hpp +++ b/include/openPMD/Streaming.hpp @@ -1,14 +1,5 @@ #pragma once -#include -#include -#include -#include - -#include "openPMD/Dataset.hpp" -#include "openPMD/benchmark/mpi/BlockSlicer.hpp" -#include - namespace openPMD { /** diff --git a/include/openPMD/auxiliary/BlockSlicer.hpp b/include/openPMD/auxiliary/BlockSlicer.hpp new file mode 100644 index 0000000000..a2569aa002 --- /dev/null +++ b/include/openPMD/auxiliary/BlockSlicer.hpp @@ -0,0 +1,53 @@ +/* Copyright 2018-2021 Franz Poeschel + * + * This file is part of openPMD-api. + * + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * openPMD-api is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with openPMD-api. + * If not, see . + */ + +#pragma once + +#include "openPMD/Dataset.hpp" + +#include + +namespace openPMD::auxiliary +{ +/** + * Abstract class to associate a thread with its local cuboid in the total + * cuboid. + */ +class BlockSlicer +{ +public: + /** + * Associate the current thread with its cuboid. + * @param totalExtent The total extent of the cuboid. + * @param size The number of threads to be used (not greater than MPI size). + * @param rank The MPI rank. + * @return A pair of the cuboid's offset and extent. + */ + virtual std::pair + sliceBlock(Extent &totalExtent, int size, int rank) = 0; + + virtual std::unique_ptr clone() const = 0; + + /** This class will be derived from + */ + virtual ~BlockSlicer() = default; +}; +} // namespace openPMD::auxiliary diff --git a/include/openPMD/auxiliary/OneDimensionalBlockSlicer.hpp b/include/openPMD/auxiliary/OneDimensionalBlockSlicer.hpp new file mode 100644 index 0000000000..79fb68dcdf --- /dev/null +++ b/include/openPMD/auxiliary/OneDimensionalBlockSlicer.hpp @@ -0,0 +1,44 @@ +/* Copyright 2018-2021 Franz Poeschel + * + * This file is part of openPMD-api. + * + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * openPMD-api is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with openPMD-api. + * If not, see . + */ + +#pragma once + +#include "openPMD/Dataset.hpp" +#include "openPMD/auxiliary/BlockSlicer.hpp" + +namespace openPMD::auxiliary +{ +class OneDimensionalBlockSlicer : public BlockSlicer +{ +public: + Extent::value_type m_dim; + + explicit OneDimensionalBlockSlicer(Extent::value_type dim = 0); + + static std::pair + n_th_block_inside(size_t length, size_t rank, size_t size); + + std::pair + sliceBlock(Extent &totalExtent, int size, int rank) override; + + virtual std::unique_ptr clone() const override; +}; +} // namespace openPMD::auxiliary diff --git a/include/openPMD/benchmark/mpi/BlockSlicer.hpp b/include/openPMD/benchmark/mpi/BlockSlicer.hpp index a720793b41..0670bb91a2 100644 --- a/include/openPMD/benchmark/mpi/BlockSlicer.hpp +++ b/include/openPMD/benchmark/mpi/BlockSlicer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2021 Franz Poeschel +/* Copyright 2025 Franz Poeschel * * This file is part of openPMD-api. * @@ -19,35 +19,13 @@ * If not, see . */ -#pragma once +/* Legacy header for backward compatibility */ -#include "openPMD/Dataset.hpp" +#pragma once -#include +#include "openPMD/auxiliary/BlockSlicer.hpp" namespace openPMD { -/** - * Abstract class to associate a thread with its local cuboid in the total - * cuboid. - */ -class BlockSlicer -{ -public: - /** - * Associate the current thread with its cuboid. - * @param totalExtent The total extent of the cuboid. - * @param size The number of threads to be used (not greater than MPI size). - * @param rank The MPI rank. - * @return A pair of the cuboid's offset and extent. - */ - virtual std::pair - sliceBlock(Extent &totalExtent, int size, int rank) = 0; - - virtual std::unique_ptr clone() const = 0; - - /** This class will be derived from - */ - virtual ~BlockSlicer() = default; -}; -} // namespace openPMD +using auxiliary::BlockSlicer; +} diff --git a/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp b/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp index f0d943d972..510bdc2731 100644 --- a/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp +++ b/include/openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2021 Franz Poeschel +/* Copyright 2025 Franz Poeschel * * This file is part of openPMD-api. * @@ -19,26 +19,14 @@ * If not, see . */ +/* Legacy header for backward compatibility */ + #pragma once -#include "openPMD/Dataset.hpp" +#include "openPMD/auxiliary/OneDimensionalBlockSlicer.hpp" #include "openPMD/benchmark/mpi/BlockSlicer.hpp" namespace openPMD { -class OneDimensionalBlockSlicer : public BlockSlicer -{ -public: - Extent::value_type m_dim; - - explicit OneDimensionalBlockSlicer(Extent::value_type dim = 0); - - static std::pair - n_th_block_inside(size_t length, size_t rank, size_t size); - - std::pair - sliceBlock(Extent &totalExtent, int size, int rank) override; - - virtual std::unique_ptr clone() const override; -}; -} // namespace openPMD +using auxiliary::OneDimensionalBlockSlicer; +} diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp index 64b9d7f421..4461eb0227 100644 --- a/src/ChunkInfo.cpp +++ b/src/ChunkInfo.cpp @@ -22,7 +22,7 @@ #include "openPMD/ChunkInfo_internal.hpp" #include "openPMD/auxiliary/Mpi.hpp" -#include "openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp" +#include "openPMD/auxiliary/OneDimensionalBlockSlicer.hpp" #include // std::sort #include @@ -372,7 +372,7 @@ namespace chunk_assignment { auto [notAssigned, res] = std::move(pa); auto [myChunksFrom, myChunksTo] = - OneDimensionalBlockSlicer::n_th_block_inside( + auxiliary::OneDimensionalBlockSlicer::n_th_block_inside( notAssigned.size(), my_rank, num_ranks); std::transform( notAssigned.begin() + myChunksFrom, @@ -404,7 +404,7 @@ namespace chunk_assignment } notAssigned.clear(); auto [myChunksFrom, myChunksTo] = - OneDimensionalBlockSlicer::n_th_block_inside( + auxiliary::OneDimensionalBlockSlicer::n_th_block_inside( sortSourceChunksBySourceRank.size(), my_rank, num_ranks); auto it = sortSourceChunksBySourceRank.begin(); for (size_t i = 0; i < myChunksFrom; ++i) @@ -520,7 +520,8 @@ namespace chunk_assignment } ByCuboidSlice::ByCuboidSlice( - std::unique_ptr blockSlicer_in, Extent totalExtent_in) + std::unique_ptr blockSlicer_in, + Extent totalExtent_in) : blockSlicer(std::move(blockSlicer_in)) , totalExtent(std::move(totalExtent_in)) {} diff --git a/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp b/src/auxiliary/OneDimensionalBlockSlicer.cpp similarity index 95% rename from src/benchmark/mpi/OneDimensionalBlockSlicer.cpp rename to src/auxiliary/OneDimensionalBlockSlicer.cpp index bb71cc29db..5520ee9cd3 100644 --- a/src/benchmark/mpi/OneDimensionalBlockSlicer.cpp +++ b/src/auxiliary/OneDimensionalBlockSlicer.cpp @@ -19,11 +19,9 @@ * If not, see . */ -#include "openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp" +#include "openPMD/auxiliary/OneDimensionalBlockSlicer.hpp" -#include - -namespace openPMD +namespace openPMD::auxiliary { OneDimensionalBlockSlicer::OneDimensionalBlockSlicer(Extent::value_type dim) : m_dim{dim} @@ -85,4 +83,4 @@ std::unique_ptr OneDimensionalBlockSlicer::clone() const { return std::unique_ptr(new OneDimensionalBlockSlicer(m_dim)); } -} // namespace openPMD +} // namespace openPMD::auxiliary diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp index ed31202c82..b95e7caf56 100644 --- a/src/binding/python/ChunkInfo.cpp +++ b/src/binding/python/ChunkInfo.cpp @@ -19,12 +19,11 @@ * If not, see . */ #include "openPMD/ChunkInfo.hpp" -#include "openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp" +#include "openPMD/auxiliary/OneDimensionalBlockSlicer.hpp" #include "openPMD/binding/python/Mpi.hpp" #include "openPMD/binding/python/Common.hpp" -#include #include #include #include // std::move @@ -324,16 +323,17 @@ void init_Chunk(py::module &m) }), py::arg("strategy_within_node")); - (void)py::class_(m, "BlockSlicer"); + (void)py::class_(m, "BlockSlicer"); - py::class_( + py::class_( m, "OneDimensionalBlockSlicer") .def(py::init<>()) .def(py::init(), py::arg("dim")); py::class_(m, "ByCuboidSlice") .def( - py::init([](BlockSlicer const &blockSlicer, Extent totalExtent) { + py::init([](auxiliary::BlockSlicer const &blockSlicer, + Extent totalExtent) { return ByCuboidSlice( blockSlicer.clone(), std::move(totalExtent)); }), diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index c8f60e14fe..79cbfb7a82 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -9,7 +9,7 @@ #include "openPMD/auxiliary/Filesystem.hpp" #include "openPMD/openPMD.hpp" // @todo change includes -#include "openPMD/benchmark/mpi/OneDimensionalBlockSlicer.hpp" +#include "openPMD/auxiliary/OneDimensionalBlockSlicer.hpp" #include #if !openPMD_HAVE_MPI @@ -2465,7 +2465,8 @@ void adios2_chunk_distribution() * in others such as this one, it's an unneeded overhead.) */ ByCuboidSlice cuboidSliceStrategy( - std::make_unique(1), E_x.getExtent()); + std::make_unique(1), + E_x.getExtent()); auto cuboidSliceAssignment = cuboidSliceStrategy.assign( chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size); printAssignment( From 01bcc0f2cdc8e63f65ad96194f3f72b600c3c081 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 15 Jul 2025 17:36:17 +0200 Subject: [PATCH 20/27] More thorough documentation --- include/openPMD/ChunkInfo.hpp | 147 +++++++++++++++++++++++++++++++--- 1 file changed, 134 insertions(+), 13 deletions(-) diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp index b0b9e549f3..06522fade2 100644 --- a/include/openPMD/ChunkInfo.hpp +++ b/include/openPMD/ChunkInfo.hpp @@ -86,18 +86,53 @@ using ChunkTable = std::vector; namespace chunk_assignment { - constexpr char const *HOSTFILE_VARNAME = "MPI_WRITTEN_HOSTFILE"; - + /** @brief Meta information on processes by ID (MPI rank). + * Typically a hostname. + */ using RankMeta = std::map; + /** @brief Result type for chunk assignment strategies. + * + * Chunks sorted by the destination process ID (MPI rank). Distribution + * strategies will in general only need to fill the chunks for the current + * (calling) process ID, but some (such as RoundRobin) will fill the + * information for other processes as well. + * + * The chunks have type WrittenChunkInfo, hence carrying information on the + * sourceID. + */ using Assignment = std::map>; + /** + * @brief Pairwise merge all chunks if they can be merged into a larger. + * + * Note that this function is in no way optimized, but follows a naive + * O(n^2) implementation. Use a more sophisticated method for large numbers + * of chunks. + * + * @param chunks A list of chunks. Merging will occur in-place. + */ template - void mergeChunks(std::vector &); + void mergeChunks(std::vector &chunks); - auto mergeChunksFromSameSourceID(std::vector const &) + /** + * @brief Pairwise merge all chunks from the same source ID if they can be + * merged into a larger. + * + * @param chunks A list of chunks. + * @return Ordered by source ID, lists of merged chunks for each source ID. + */ + auto + mergeChunksFromSameSourceID(std::vector const &chunks) -> std::map>; + /** + * @brief Return type for partial chunk assignment strategies. + * + * A typical partial assignment strategy is ByHostname, which can assign + * chunks only within one compute node and will fail if there is no consumer + * in that same compute node. + */ struct PartialAssignment { ChunkTable notAssigned; @@ -118,14 +153,39 @@ namespace chunk_assignment */ struct Strategy { + /** + * @brief Assign chunks to be loaded to reading processes. + * + * @param chunkTable Chunk table obtained by + * BaseRecordComponent::availableChunks(). + * @param in Meta information on writing processes, e.g. hostnames. + * @param out Meta information on reading processes, e.g. hostnames. + * @param my_rank Rank identifier for the current process. Will be + * considered by some distribution strategies that may be called + * for only a subselection of the data space (e.g. for + * distributing data within processes on the same compute node + * in a cluster). + * @param num_ranks Number of processes among which chunks are to be + * distributed. Will be considered by some distribution + * strategies that may be called for only a subselection of the + * data space (e.g. for distributing data within processes on the + * same compute node in a cluster). + * @return A table that assigns chunks to reading processes. Chunks are + * sorted by the destination process ID (MPI rank). Distribution + * strategies will in general only need to fill the chunks for + * the current (calling) process ID, but some (such as + * RoundRobin) will fill the information for other processes + * as well. + */ Assignment assign( - ChunkTable, - RankMeta const &rankMetaIn, - RankMeta const &rankMetaOut, + ChunkTable chunkTable, + RankMeta const &in, + RankMeta const &out, size_t my_rank, size_t num_ranks); /** - * @brief Assign chunks to be loaded to reading processes. + * @brief Assign chunks to be loaded to reading processes. To be defined + * by implementors. * * @param partialAssignment Two chunktables, one of unassigned chunks * and one of chunks that might have already been assigned @@ -143,7 +203,12 @@ namespace chunk_assignment * strategies that may be called for only a subselection of the * data space (e.g. for distributing data within processes on the * same compute node in a cluster). - * @return ChunkTable A table that assigns chunks to reading processes. + * @return A table that assigns chunks to reading processes. Chunks are + * sorted by the destination process ID (MPI rank). Distribution + * strategies will in general only need to fill the chunks for + * the current (calling) process ID, but some (such as + * RoundRobin) will fill the information for other processes + * as well. */ virtual Assignment assign( PartialAssignment partialAssignment, @@ -166,9 +231,38 @@ namespace chunk_assignment * 1. Apply the partial strategy. * 2. Apply the full strategy to assign unassigned leftovers. * + * A typical partial assignment strategy is ByHostname, which can assign + * chunks only within one compute node and will fail if there is no consumer + * in that same compute node. */ struct PartialStrategy { + /** + * @brief Assign chunks to be loaded to reading processes. + * + * @param table Chunk table obtained by + * BaseRecordComponent::availableChunks(). + * Merge the unassigned chunks into the partially assigned table. + * @param in Meta information on writing processes, e.g. hostnames. + * @param out Meta information on reading processes, e.g. hostnames. + * @param my_rank Rank identifier for the current process. Will be + * considered by some distribution strategies that may be called + * for only a subselection of the data space (e.g. for + * distributing data within processes on the same compute node + * in a cluster). + * @param num_ranks Number of processes among which chunks are to be + * distributed. Will be considered by some distribution + * strategies that may be called for only a subselection of the + * data space (e.g. for distributing data within processes on the + * same compute node in a cluster). + * @return Two chunktables, one of leftover chunks that were not + * assigned and one that assigns chunks to reading processes. + * Assigned chunks are sorted by the destination process ID + * (MPI rank). Distribution strategies will in general only need + * to fill the chunks for the current (calling) process ID. + * Chunks assigned to another destination processes may be + * silently dropped. + */ PartialAssignment assign( ChunkTable table, RankMeta const &in, @@ -176,7 +270,8 @@ namespace chunk_assignment size_t my_rank, size_t num_ranks); /** - * @brief Assign chunks to be loaded to reading processes. + * @brief Assign chunks to be loaded to reading processes. To be defined + * by implementors. * * @param partialAssignment Two chunktables, one of unassigned chunks * and one of chunks that might have already been assigned @@ -194,9 +289,13 @@ namespace chunk_assignment * strategies that may be called for only a subselection of the * data space (e.g. for distributing data within processes on the * same compute node in a cluster). - * @return PartialAssignment Two chunktables, one of leftover chunks - * that were not assigned and one that assigns chunks to - * reading processes. + * @return Two chunktables, one of leftover chunks that were not + * assigned and one that assigns chunks to reading processes. + * Assigned chunks are sorted by the destination process ID + * (MPI rank). Distribution strategies will in general only need + * to fill the chunks for the current (calling) process ID. + * Chunks assigned to another destination processes may be + * silently dropped. */ virtual PartialAssignment assign( PartialAssignment partialAssignment, @@ -259,6 +358,12 @@ namespace chunk_assignment virtual std::unique_ptr clone() const override; }; + /** + * @brief Round-Robin at process level. + * + * Assign all chunks from the first source rank to the first reader rank, + * all from the second source rank to the second reader, and so on. + */ struct RoundRobinOfSourceRanks : Strategy { Assignment assign( @@ -271,6 +376,16 @@ namespace chunk_assignment virtual std::unique_ptr clone() const override; }; + /** + * @brief Alternative to RoundRobin, but instead gives every reader a + * sequential range of blocks. + * + * Sequential in here means the order as returned by + * BaseRecordComponent::availableChunks(). + * E.g. 6 blocks distributed to 2 processes will result in: + * The first three blocks go to the first process, the last three blocks to + * the second. + */ struct Blocks : Strategy { Assignment assign( @@ -283,6 +398,12 @@ namespace chunk_assignment [[nodiscard]] std::unique_ptr clone() const override; }; + /** + * @brief Blocks at processs level. + * + * Assign writer processes to reader processes, instead of assigning blocks + * to processes. + */ struct BlocksOfSourceRanks : Strategy { Assignment assign( From 4385fc79e2b3baee5e8af5ce5f38df68f53b0ee6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 17 Jul 2025 11:51:12 +0200 Subject: [PATCH 21/27] Cleanup, documentation, proper use of rank specification --- src/ChunkInfo.cpp | 90 +++++++++++-------- .../python/openpmd_api/pipe/__main__.py | 74 ++++++--------- 2 files changed, 81 insertions(+), 83 deletions(-) diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp index 4461eb0227..785527867c 100644 --- a/src/ChunkInfo.cpp +++ b/src/ChunkInfo.cpp @@ -134,7 +134,13 @@ namespace chunk_assignment Offset offset(c1->offset); Extent extent(c1->extent); extent[dim] += c2->extent[dim]; - return std::make_optional(Chunk_t(offset, extent)); + // Copy from chunk1 in order to initialize with meta information + // from instantiations of Chunk_t that we cannot generically + // state here (such as the source ID) + Chunk_t res = chunk1; + res.offset = offset; + res.extent = extent; + return std::make_optional(std::move(res)); } return std::optional(); } @@ -289,23 +295,19 @@ namespace chunk_assignment Assignment RoundRobin::assign( PartialAssignment partialAssignment, RankMeta const &, // ignored parameter - RankMeta const &out, + RankMeta const &, size_t /* my_rank */, - size_t /* num_ranks */) + size_t num_ranks) { - if (out.size() == 0) + if (num_ranks == 0) { throw std::runtime_error( "[RoundRobin] Cannot round-robin to zero ranks."); } - auto it = out.begin(); - auto nextRank = [&it, &out]() { - if (it == out.end()) - { - it = out.begin(); - } - auto res = it->first; - it++; + size_t it = 0; + auto nextRank = [&it, num_ranks]() { + auto res = it; + it = (it + 1) % num_ranks; return res; }; ChunkTable &sourceChunks = partialAssignment.notAssigned; @@ -326,9 +328,9 @@ namespace chunk_assignment Assignment RoundRobinOfSourceRanks::assign( PartialAssignment partialAssignment, RankMeta const &, // ignored parameter - RankMeta const &out, + RankMeta const &, size_t /* my_rank */, - size_t /* num_ranks */) + size_t num_ranks) { std::map> sortSourceChunksBySourceRank; @@ -339,15 +341,12 @@ namespace chunk_assignment } partialAssignment.notAssigned.clear(); auto source_it = sortSourceChunksBySourceRank.begin(); - auto sink_it = out.begin(); + size_t sink_it = 0; for (; source_it != sortSourceChunksBySourceRank.end(); ++source_it, ++sink_it) { - if (sink_it == out.end()) - { - sink_it = out.begin(); - } - auto &chunks_go_here = partialAssignment.assigned[sink_it->first]; + sink_it %= num_ranks; + auto &chunks_go_here = partialAssignment.assigned[sink_it]; chunks_go_here.reserve( partialAssignment.assigned.size() + source_it->second.size()); for (auto &chunk : source_it->second) @@ -436,8 +435,14 @@ namespace chunk_assignment RankMeta const &in, RankMeta const &out, size_t my_rank, - size_t /* num_ranks */) + size_t num_ranks) { + if (out.size() != num_ranks) + { + throw std::runtime_error( + "[ByHostname] Invalid call: Rank meta information (hostnames) " + "incomplete."); + } // collect chunks by hostname std::map chunkGroups; ChunkTable &sourceChunks = res.notAssigned; @@ -447,6 +452,8 @@ namespace chunk_assignment for (auto &chunk : sourceChunks) { auto it = in.find(chunk.sourceID); + // If the writer rank has no meta information, move its chunk + // back to the leftover if (it == in.end()) { leftover.push_back(std::move(chunk)); @@ -489,24 +496,33 @@ namespace chunk_assignment else { RankMeta ranksOnTargetNode; - size_t local_rank = 0; + std::optional local_rank = 0; size_t counter = 0; for (auto rank : it->second) { - ranksOnTargetNode[rank] = hostname; + ranksOnTargetNode[counter] = hostname; if (rank == my_rank) { local_rank = counter; } ++counter; } + if (!local_rank.has_value()) + { + /* + * We are running on another compute node. This is fine, we + * have ensured above that some other process will take care + * of these chunks, they need not go back to the leftover. + */ + continue; + } Assignment swapped; swapped.swap(sinkChunks); sinkChunks = m_withinNode->assign( PartialAssignment(chunkGroup.second, std::move(swapped)), in, ranksOnTargetNode, - local_rank, + *local_rank, it->second.size()); } } @@ -678,15 +694,13 @@ namespace chunk_assignment for (auto &chunk : sourceSide) { restrictToSelection(chunk.offset, chunk.extent, myOffset, myExtent); - for (auto ext : chunk.extent) + if (std::all_of( + chunk.extent.begin(), chunk.extent.end(), [](auto const e) { + return e > 0; + })) { - if (ext == 0) - { - goto outer_loop; - } + sinkSide[my_rank].push_back(std::move(chunk)); } - sinkSide[my_rank].push_back(std::move(chunk)); - outer_loop:; } return res.assigned; @@ -705,9 +719,9 @@ namespace chunk_assignment Assignment BinPacking::assign( PartialAssignment res, RankMeta const &, - RankMeta const &sinkRanks, + RankMeta const &, size_t /* my_rank */, - size_t /* num_ranks */) + size_t num_ranks) { ChunkTable &sourceChunks = res.notAssigned; Assignment &sinkChunks = res.assigned; @@ -721,7 +735,7 @@ namespace chunk_assignment } totalExtent += chunkExtent; } - size_t const idealSize = totalExtent / sinkRanks.size(); + size_t const idealSize = totalExtent / num_ranks; /* * Split chunks into subchunks of size at most idealSize. * The resulting list of chunks is sorted by chunk size in decreasing @@ -740,8 +754,8 @@ namespace chunk_assignment * data per process. */ auto worker = - [&sinkRanks, &digestibleChunks, &sinkChunks, idealSize]() { - for (auto const &destRank : sinkRanks) + [&num_ranks, &digestibleChunks, &sinkChunks, idealSize]() { + for (size_t destRank = 0; destRank < num_ranks; ++destRank) { /* * Within the second call of the worker lambda, this will @@ -763,7 +777,7 @@ namespace chunk_assignment * process within this call of the worker * lambda, so the loop can be broken out of. */ - sinkChunks[destRank.first].push_back( + sinkChunks[destRank].push_back( std::move(it->chunk)); digestibleChunks.erase(it); break; @@ -771,7 +785,7 @@ namespace chunk_assignment else if (it->dataSize <= leftoverSize) { // assign smaller chunks as long as they fit - sinkChunks[destRank.first].push_back( + sinkChunks[destRank].push_back( std::move(it->chunk)); leftoverSize -= it->dataSize; it = digestibleChunks.erase(it); diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py index 9eae8085d1..2cdffeae9e 100644 --- a/src/binding/python/openpmd_api/pipe/__main__.py +++ b/src/binding/python/openpmd_api/pipe/__main__.py @@ -113,6 +113,11 @@ def __init__(self, source, dynamicView, offset, extent): self.offset = offset self.extent = extent +# Find below a couple of examples on how to define chunk distribution +# strategies in Python by extending classes PartialStrategy or Strategy. +# These strategies may then be used inside composing strategies +# such as ByHostname. They may also call other strategies, as in +# IncreaseGranularity defined below. # Example how to implement a simple partial strategy in Python class LoadOne(io.PartialStrategy): @@ -127,7 +132,26 @@ def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks): assignment.assigned[my_rank].append(element) return assignment +# Example how to implement a simple strategy in Python +class LoadAll(io.Strategy): + + def __init__(self): + super().__init__() + + def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks): + res = assignment.assigned + if my_rank not in res: + res[my_rank] = assignment.not_assigned + else: + res[my_rank].extend(assignment.not_assigned) + return res +# A more complex distribution strategy. This creates supergroups of hostnames, +# separately for the writer and reader ranks. +# Every `granularity_in` writer hostnames are merged into one new hostname +# each, same for every `granularity_out` reader hostnames. +# An example usage is defining granularity_in=32, granularity_out=1 for a +# 32-to-1 fan-in pattern. class IncreaseGranularity(io.PartialStrategy): def __init__( self, @@ -194,16 +218,6 @@ def inner_rank_assignment( out_ranks, out_hostname_to_hostgroup ) - # # we only care about the local host (why tho?) - # local_host = self.out_ranks_inner[my_rank] - # # restrict out_ranks_inner to those ranks - # # that run on the current meta host - # self.out_ranks_inner = { - # rank: host - # for rank, host in self.out_ranks_inner.items() - # if host == local_host - # } - return self.inner_distribution.assign( assignment, self.in_ranks_inner, self.out_ranks_inner, @@ -211,13 +225,16 @@ def inner_rank_assignment( ) +# Merge chunks into larger chunks as much as possible within +# each source process for reducing the number of load requests. class MergingStrategy(io.Strategy): def __init__(self, inner_strategy): super().__init__() self.inner_strategy = inner_strategy - def assign(self, assignment, in_ranks, out_ranks): - res = self.inner_strategy.assign(assignment, in_ranks, out_ranks) + def assign(self, assignment, in_ranks, out_ranks, my_rank, num_ranks): + res = self.inner_strategy.assign( + assignment, in_ranks, out_ranks, my_rank, num_ranks) for out_rank, assignment in res.items(): merged = assignment.merge_chunks_from_same_sourceID() assignment.clear() @@ -230,39 +247,6 @@ def assign(self, assignment, in_ranks, out_ranks): return res -# strategy = IncreaseGranularity(2, 1) -# assignment = [ -# io.WrittenChunkInfo([0], [1], 0), -# io.WrittenChunkInfo([1], [1], 1), -# io.WrittenChunkInfo([2], [1], 2), -# io.WrittenChunkInfo([3], [1], 3), -# ] -# in_ranks = {0: "host0", 1: "host1", 2: "host3", 3: "host4"} -# out_ranks = {0: "host2", 1: "host5"} -# res = strategy.assign(assignment, in_ranks, out_ranks) -# print(f"NOT ASSIGNED: {len(res.not_assigned)} chunks") -# print("ASSIGNED:") -# for rank, chunks in res.assigned.items(): -# print(f"\tRANK {rank}:", end='') -# for chunk in chunks: -# print(f" [{chunk.offset}-{chunk.extent}]", end='') -# print() - -# Example how to implement a simple strategy in Python -class LoadAll(io.Strategy): - - def __init__(self): - super().__init__() - - def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks): - res = assignment.assigned - if my_rank not in res: - res[my_rank] = assignment.not_assigned - else: - res[my_rank].extend(assignment.not_assigned) - return res - - def distribution_strategy(dataset_extent, strategy_identifier=None): if strategy_identifier is None or not strategy_identifier: From e72e1b425ec43112543b182318b513ef161857d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 17 Jul 2025 14:17:33 +0200 Subject: [PATCH 22/27] Actual testing --- test/ParallelIOTest.cpp | 190 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 176 insertions(+), 14 deletions(-) diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index 79cbfb7a82..f8516baf49 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -3,10 +3,12 @@ */ #include "Files_ParallelIO/ParallelIOTests.hpp" +#include "openPMD/ChunkInfo.hpp" #include "openPMD/IO/ADIOS/macros.hpp" #include "openPMD/IO/Access.hpp" #include "openPMD/auxiliary/Environment.hpp" #include "openPMD/auxiliary/Filesystem.hpp" +#include "openPMD/auxiliary/Mpi.hpp" #include "openPMD/openPMD.hpp" // @todo change includes #include "openPMD/auxiliary/OneDimensionalBlockSlicer.hpp" @@ -2224,7 +2226,121 @@ TEST_CASE("iterate_nonstreaming_series", "[serial][adios2]") iterate_nonstreaming_series::iterate_nonstreaming_series(); } -void adios2_chunk_distribution() +namespace adios2_chunk_distribution +{ +static auto add = [](size_t left, size_t right) { return left + right; }; +auto mergeTable(ChunkTable const &chunkTable) -> ChunkTable const & +{ + return chunkTable; +} +auto mergeTable(chunk_assignment::Assignment const &assignment) -> ChunkTable +{ + ChunkTable merged; + merged.reserve( + std::transform_reduce( + assignment.begin(), + assignment.end(), + 0u, + add, + [](chunk_assignment::Assignment::value_type const &pair) { + return pair.second.size(); + })); + for (auto const &pair : assignment) + { + for (auto const &chunk : pair.second) + { + merged.insert(merged.end(), chunk); + } + } + return merged; +} +auto mergeTable(chunk_assignment::PartialAssignment const &assignment) +{ + auto const &[not_assigned, assigned] = assignment; + ChunkTable merged = mergeTable(assigned); + merged.reserve(merged.size() + not_assigned.size()); + for (auto const &chunk : not_assigned) + { + merged.insert(merged.end(), chunk); + } + return merged; +} + +template +auto equalTables(ChunkTable1 &&availableChunks, ChunkTable2 &&assignedChunks) +{ + return chunk_assignment::mergeChunksFromSameSourceID( + mergeTable(availableChunks)) == + chunk_assignment::mergeChunksFromSameSourceID( + mergeTable(assignedChunks)); +} + +auto totalVolume(ChunkInfo const &chunk) -> size_t +{ + return std::reduce( + chunk.extent.begin(), + chunk.extent.end(), + 1, + [](size_t left, size_t right) { return left * right; }); +} + +auto totalVolume(ChunkTable const &chunkTable) -> size_t +{ + return std::transform_reduce( + chunkTable.begin(), + chunkTable.end(), + 0u, + add, + static_cast(&totalVolume)); +} + +auto totalVolume(chunk_assignment::Assignment const &assignment) -> size_t +{ + return std::transform_reduce( + assignment.begin(), + assignment.end(), + 0u, + add, + [](chunk_assignment::Assignment::value_type const &pair) { + return totalVolume(pair.second); + }); +} + +template +auto parallelDisjointVolume(Assignment_t &&assignment, MPI_Comm communicator) + -> size_t +{ + size_t myVolume = totalVolume(assignment); + size_t summedVolume = 0; + MPI_Allreduce( + &myVolume, + &summedVolume, + 1, + auxiliary::openPMD_MPI_type(), + MPI_SUM, + communicator); + return summedVolume; +} + +template +auto equalDisjointByVolume( + ChunkTable const &availableChunks, + Assignment_t &&assignment, + std::optional const &leftover, + MPI_Comm communicator) -> bool +{ + size_t targetVolume = totalVolume(availableChunks); + if (leftover.has_value()) + { + targetVolume -= totalVolume(*leftover); + } + size_t summarizedVolume = parallelDisjointVolume(assignment, communicator); + // std::cout << "Left: " << targetVolume << ", right: " << summarizedVolume + // << std::endl; + return targetVolume == summarizedVolume; +} + +void run_test() { /* * This test simulates a multi-node streaming setup in order to test some @@ -2276,12 +2392,13 @@ void adios2_chunk_distribution() } } )END"; + constexpr bool verbose = true; auto printChunktable = [mpi_rank]( std::string const &strategyName, ChunkTable const &table, chunk_assignment::RankMeta const &meta) { - if (mpi_rank != 0) + if (!verbose || mpi_rank != 0) { return; } @@ -2307,7 +2424,7 @@ void adios2_chunk_distribution() std::string const &strategyName, chunk_assignment::Assignment const &table, chunk_assignment::RankMeta const &meta) { - if (mpi_rank != 0) + if (!verbose || mpi_rank != 0) { return; } @@ -2392,6 +2509,7 @@ void adios2_chunk_distribution() chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size); printAssignment( "ROUND ROBIN", roundRobinAssignment, readingRanksHostnames); + REQUIRE(equalTables(chunkTable, roundRobinAssignment)); /* * Assign chunks by hostname. @@ -2415,6 +2533,17 @@ void adios2_chunk_distribution() "HOSTNAME, LEFTOVER", byHostnamePartialAssignment.notAssigned, rankMetaIn); + REQUIRE(equalDisjointByVolume( + chunkTable, + // Must restrict assignment to current rank, since + // ByHostname strategy output *may* also contain chunks from + // other ranks, but only partially. This is due to two + // effects: (1) Other processes are considered only as long + // as they live on the same node. (2) The within-node distribution + // is subject to a secondary distribution strategy. + byHostnamePartialAssignment.assigned[mpi_rank], + byHostnamePartialAssignment.notAssigned, + MPI_COMM_WORLD)); /* * Same as above, but use RoundRobinOfSourceRanks this time, a strategy @@ -2432,6 +2561,17 @@ void adios2_chunk_distribution() "HOSTNAME2, LEFTOVER", byHostnamePartialAssignment2.notAssigned, rankMetaIn); + REQUIRE(equalDisjointByVolume( + chunkTable, + // Must restrict assignment to current rank, since + // ByHostname strategy output *may* also contain chunks from + // other ranks, but only partially. This is due to two + // effects: (1) Other processes are considered only as long + // as they live on the same node. (2) The within-node distribution + // is subject to a secondary distribution strategy. + byHostnamePartialAssignment2.assigned[mpi_rank], + byHostnamePartialAssignment2.notAssigned, + MPI_COMM_WORLD)); /* * Assign chunks by hostnames, once more. @@ -2444,25 +2584,39 @@ void adios2_chunk_distribution() */ FromPartialStrategy fromPartialStrategy( std::make_unique(std::move(byHostname)), - std::make_unique(/* splitAlongDimension = */ 1)); + std::make_unique()); auto fromPartialAssignment = fromPartialStrategy.assign( chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size); printAssignment( "HOSTNAME WITH SECOND PASS", fromPartialAssignment, readingRanksHostnames); + REQUIRE(equalDisjointByVolume( + chunkTable, + // Must restrict assignment to current rank, since + // ByHostname strategy output *may* also contain chunks from + // other ranks, but only partially. This is due to two + // effects: (1) Other processes are considered only as long + // as they live on the same node. (2) The within-node and + // leftover distributions are each subject to a secondary + // distribution strategies. + fromPartialAssignment[mpi_rank], + std::nullopt, + MPI_COMM_WORLD)); /* - * Assign chunks by slicing the n-dimensional physical domain and - * intersecting those slices with the available chunks from the backend. - * Notice that this strategy only returns the chunks that the currently - * running rank is supposed to load, whereas the other strategies return - * a chunk table containing all chunks that all ranks will load. - * In principle, a chunk_assignment::Strategy only needs to return the - * chunks that the current rank should load, but is free to emplace the + * Assign chunks by slicing the n-dimensional physical domain + * and intersecting those slices with the available chunks from + * the backend. Notice that this strategy only returns the + * chunks that the currently running rank is supposed to load, + * whereas the other strategies return a chunk table containing + * all chunks that all ranks will load. In principle, a + * chunk_assignment::Strategy only needs to return the chunks + * that the current rank should load, but is free to emplace the * other chunks for other reading ranks as well. - * (Reasoning: In some strategies, calculating everything is necessary, - * in others such as this one, it's an unneeded overhead.) + * (Reasoning: In some strategies, calculating everything is + * necessary, in others such as this one, it's an unneeded + * overhead.) */ ByCuboidSlice cuboidSliceStrategy( std::make_unique(1), @@ -2476,6 +2630,8 @@ void adios2_chunk_distribution() auto blocksAssignment = blocksStrategy.assign( chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size); printAssignment("BLOCKS", blocksAssignment, readingRanksHostnames); + REQUIRE(equalDisjointByVolume( + chunkTable, blocksAssignment, std::nullopt, MPI_COMM_WORLD)); BlocksOfSourceRanks blocksOfSourceRanksStrategy; auto blocksOfSourceRanksAssignment = blocksOfSourceRanksStrategy.assign( @@ -2484,12 +2640,18 @@ void adios2_chunk_distribution() "BLOCKS OF SOURCE RANKS", blocksOfSourceRanksAssignment, readingRanksHostnames); + REQUIRE(equalDisjointByVolume( + chunkTable, + blocksOfSourceRanksAssignment, + std::nullopt, + MPI_COMM_WORLD)); } } +} // namespace adios2_chunk_distribution TEST_CASE("adios2_chunk_distribution", "[parallel][adios2]") { - adios2_chunk_distribution(); + adios2_chunk_distribution::run_test(); } #endif // openPMD_HAVE_ADIOS2 && openPMD_HAVE_MPI From 10cbdd8bc3e79236cc74443f0bcebc15e0f56d45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 17 Jul 2025 14:54:11 +0200 Subject: [PATCH 23/27] Test and fix ByHostname strategy --- src/ChunkInfo.cpp | 35 +++++++++++++++++++++++++---------- test/ParallelIOTest.cpp | 31 +++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 10 deletions(-) diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp index 785527867c..646aa2a74c 100644 --- a/src/ChunkInfo.cpp +++ b/src/ChunkInfo.cpp @@ -19,8 +19,9 @@ * If not, see . */ #include "openPMD/ChunkInfo.hpp" -#include "openPMD/ChunkInfo_internal.hpp" +#include "openPMD/ChunkInfo_internal.hpp" +#include "openPMD/Error.hpp" #include "openPMD/auxiliary/Mpi.hpp" #include "openPMD/auxiliary/OneDimensionalBlockSlicer.hpp" @@ -32,8 +33,8 @@ #include #include #include -#include #include +#include #include #ifdef _WIN32 @@ -496,18 +497,21 @@ namespace chunk_assignment else { RankMeta ranksOnTargetNode; - std::optional local_rank = 0; + std::vector mapLocalRanksBackToGlobal; + mapLocalRanksBackToGlobal.reserve(it->second.size()); + std::optional my_rank_local = 0; size_t counter = 0; for (auto rank : it->second) { + mapLocalRanksBackToGlobal.emplace_back(rank); ranksOnTargetNode[counter] = hostname; if (rank == my_rank) { - local_rank = counter; + my_rank_local = counter; } ++counter; } - if (!local_rank.has_value()) + if (!my_rank_local.has_value()) { /* * We are running on another compute node. This is fine, we @@ -516,14 +520,25 @@ namespace chunk_assignment */ continue; } - Assignment swapped; - swapped.swap(sinkChunks); - sinkChunks = m_withinNode->assign( - PartialAssignment(chunkGroup.second, std::move(swapped)), + auto newlyAssigned = m_withinNode->assign( + PartialAssignment(chunkGroup.second, {}), in, ranksOnTargetNode, - *local_rank, + *my_rank_local, it->second.size()); + for (auto &[local_rank, chunks] : newlyAssigned) + { + size_t global_rank = mapLocalRanksBackToGlobal[local_rank]; + auto it_sinkChunks = sinkChunks.find(global_rank); + if (it_sinkChunks != sinkChunks.end()) + { + throw error::Internal( + "Target rank " + std::to_string(global_rank) + + " assigned multiple times?"); + } + sinkChunks.emplace_hint( + it_sinkChunks, global_rank, std::move(chunks)); + } } } return res; diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index f8516baf49..1cffd47a79 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -2340,6 +2340,33 @@ auto equalDisjointByVolume( return targetVolume == summarizedVolume; } +void verifyHostnameAssignment( + chunk_assignment::PartialAssignment const &assignment, + chunk_assignment::RankMeta const &in, + chunk_assignment::RankMeta const &out) +{ + REQUIRE(!assignment.assigned.empty()); + for (auto const &[out_rank, chunks] : assignment.assigned) + { + for (auto const &chunk : chunks) + { + REQUIRE(in.at(chunk.sourceID) == out.at(out_rank)); + } + } + for (auto const &chunk : assignment.notAssigned) + { + auto const &hostname = in.at(chunk.sourceID); + REQUIRE( + std::none_of( + out.begin(), + out.end(), + [&hostname]( + chunk_assignment::RankMeta::value_type const &pair) { + return pair.second == hostname; + })); + } +} + void run_test() { /* @@ -2544,6 +2571,8 @@ void run_test() byHostnamePartialAssignment.assigned[mpi_rank], byHostnamePartialAssignment.notAssigned, MPI_COMM_WORLD)); + verifyHostnameAssignment( + byHostnamePartialAssignment, rankMetaIn, readingRanksHostnames); /* * Same as above, but use RoundRobinOfSourceRanks this time, a strategy @@ -2572,6 +2601,8 @@ void run_test() byHostnamePartialAssignment2.assigned[mpi_rank], byHostnamePartialAssignment2.notAssigned, MPI_COMM_WORLD)); + verifyHostnameAssignment( + byHostnamePartialAssignment2, rankMetaIn, readingRanksHostnames); /* * Assign chunks by hostnames, once more. From 83efecbff2ac233e7456ca8efd330864e8f3bc85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 17 Jul 2025 14:58:04 +0200 Subject: [PATCH 24/27] CI fixes --- src/binding/python/openpmd_api/pipe/__main__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py index 2cdffeae9e..f5cdc30996 100644 --- a/src/binding/python/openpmd_api/pipe/__main__.py +++ b/src/binding/python/openpmd_api/pipe/__main__.py @@ -119,6 +119,7 @@ def __init__(self, source, dynamicView, offset, extent): # such as ByHostname. They may also call other strategies, as in # IncreaseGranularity defined below. + # Example how to implement a simple partial strategy in Python class LoadOne(io.PartialStrategy): def __init__(self): @@ -132,6 +133,7 @@ def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks): assignment.assigned[my_rank].append(element) return assignment + # Example how to implement a simple strategy in Python class LoadAll(io.Strategy): @@ -146,6 +148,7 @@ def assign(self, assignment, ranks_in, ranks_out, my_rank, num_ranks): res[my_rank].extend(assignment.not_assigned) return res + # A more complex distribution strategy. This creates supergroups of hostnames, # separately for the writer and reader ranks. # Every `granularity_in` writer hostnames are merged into one new hostname From 206341bddf8137adaa5bce3cc82cbfa55d104845 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 18 Jul 2025 10:58:21 +0200 Subject: [PATCH 25/27] Automate CoreTest --- src/ChunkInfo.cpp | 6 +++ test/CoreTest.cpp | 109 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 112 insertions(+), 3 deletions(-) diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp index 646aa2a74c..0a98b2ddb5 100644 --- a/src/ChunkInfo.cpp +++ b/src/ChunkInfo.cpp @@ -444,6 +444,12 @@ namespace chunk_assignment "[ByHostname] Invalid call: Rank meta information (hostnames) " "incomplete."); } + if (!res.assigned.empty()) + { + throw std::runtime_error( + "[ByHostname] No support for merging into partial " + "assignments."); + } // collect chunks by hostname std::map chunkGroups; ChunkTable &sourceChunks = res.notAssigned; diff --git a/test/CoreTest.cpp b/test/CoreTest.cpp index b1e82b7498..0e1034edf1 100644 --- a/test/CoreTest.cpp +++ b/test/CoreTest.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include // cstdlib does not have setenv #include // NOLINT(modernize-deprecated-headers) @@ -75,8 +76,14 @@ struct Params } } }; + +static constexpr bool verbose = false; void print(RankMeta const &meta, ChunkTable const &table) { + if (!verbose) + { + return; + } for (auto const &chunk : table) { std::cout << "[HOST: " << meta.at(chunk.sourceID) @@ -95,6 +102,10 @@ void print(RankMeta const &meta, ChunkTable const &table) } void print(RankMeta const &meta, Assignment const &table) { + if (!verbose) + { + return; + } for (auto &[rank, chunkList] : table) { std::cout << "[HOST: " << meta.at(rank) << ",\tRank: " << rank << "]" @@ -115,6 +126,80 @@ void print(RankMeta const &meta, Assignment const &table) } } } + +static auto add = [](size_t left, size_t right) { return left + right; }; +auto mergeTable(ChunkTable const &chunkTable) -> ChunkTable const & +{ + return chunkTable; +} +auto mergeTable(chunk_assignment::Assignment const &assignment) -> ChunkTable +{ + ChunkTable merged; + merged.reserve( + std::transform_reduce( + assignment.begin(), + assignment.end(), + 0u, + add, + [](chunk_assignment::Assignment::value_type const &pair) { + return pair.second.size(); + })); + for (auto const &pair : assignment) + { + for (auto const &chunk : pair.second) + { + merged.insert(merged.end(), chunk); + } + } + return merged; +} +auto mergeTable(chunk_assignment::PartialAssignment const &assignment) +{ + auto const &[not_assigned, assigned] = assignment; + ChunkTable merged = mergeTable(assigned); + merged.reserve(merged.size() + not_assigned.size()); + for (auto const &chunk : not_assigned) + { + merged.insert(merged.end(), chunk); + } + return merged; +} + +template +auto equalTables(ChunkTable1 &&availableChunks, ChunkTable2 &&assignedChunks) +{ + return chunk_assignment::mergeChunksFromSameSourceID( + mergeTable(availableChunks)) == + chunk_assignment::mergeChunksFromSameSourceID( + mergeTable(assignedChunks)); +} + +void verifyHostnameAssignment( + chunk_assignment::PartialAssignment const &assignment, + chunk_assignment::RankMeta const &in, + chunk_assignment::RankMeta const &out) +{ + REQUIRE(!assignment.assigned.empty()); + for (auto const &[out_rank, chunks] : assignment.assigned) + { + for (auto const &chunk : chunks) + { + REQUIRE(in.at(chunk.sourceID) == out.at(out_rank)); + } + } + for (auto const &chunk : assignment.notAssigned) + { + auto const &hostname = in.at(chunk.sourceID); + REQUIRE( + std::none_of( + out.begin(), + out.end(), + [&hostname]( + chunk_assignment::RankMeta::value_type const &pair) { + return pair.second == hostname; + })); + } +} } // namespace test_chunk_assignment TEST_CASE("chunk_assignment", "[core]") @@ -124,13 +209,31 @@ TEST_CASE("chunk_assignment", "[core]") params.init(6, 2, 2, 1); test_chunk_assignment::print(params.metaSource, params.table); ByHostname byHostname(std::make_unique()); + + PartialAssignment partial_res0 = byHostname.assign( + params.table, params.metaSource, params.metaSink, 0, 2); + PartialAssignment partial_res1 = byHostname.assign( + params.table, params.metaSource, params.metaSink, 0, 2); + + REQUIRE(partial_res0.notAssigned == partial_res1.notAssigned); + PartialAssignment partial_res{ + partial_res0.notAssigned, + {{0, partial_res0.assigned[0]}, {1, partial_res1.assigned[1]}}}; + test_chunk_assignment::verifyHostnameAssignment( + partial_res, params.metaSource, params.metaSink); + FromPartialStrategy fullStrategy( std::make_unique(std::move(byHostname)), std::make_unique()); - Assignment res = fullStrategy.assign( + Assignment res0 = fullStrategy.assign( params.table, params.metaSource, params.metaSink, 0, 2); - std::cout << "\nRESULTS:" << std::endl; - test_chunk_assignment::print(params.metaSink, res); + Assignment res1 = fullStrategy.assign( + params.table, params.metaSource, params.metaSink, 1, 2); + Assignment res = {{0, res0[0]}, {1, res1[1]}}; + + REQUIRE(test_chunk_assignment::equalTables(params.table, res)); + + test_chunk_assignment::print(params.metaSink, res1); } TEST_CASE("versions_test", "[core]") From d82045cdb7f0f9f72fbf52d3f125b3cd1c22f2b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 18 Jul 2025 11:23:37 +0200 Subject: [PATCH 26/27] Guard against unprintableString issue on Windows --- test/CoreTest.cpp | 8 +++++--- test/ParallelIOTest.cpp | 35 ++++++++++++++++++++++++++--------- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/test/CoreTest.cpp b/test/CoreTest.cpp index 0e1034edf1..6c62e1d82e 100644 --- a/test/CoreTest.cpp +++ b/test/CoreTest.cpp @@ -190,7 +190,7 @@ void verifyHostnameAssignment( for (auto const &chunk : assignment.notAssigned) { auto const &hostname = in.at(chunk.sourceID); - REQUIRE( + OPENPMD_REQUIRE_GUARD_WINDOWS( std::none_of( out.begin(), out.end(), @@ -215,7 +215,8 @@ TEST_CASE("chunk_assignment", "[core]") PartialAssignment partial_res1 = byHostname.assign( params.table, params.metaSource, params.metaSink, 0, 2); - REQUIRE(partial_res0.notAssigned == partial_res1.notAssigned); + OPENPMD_REQUIRE_GUARD_WINDOWS( + partial_res0.notAssigned == partial_res1.notAssigned); PartialAssignment partial_res{ partial_res0.notAssigned, {{0, partial_res0.assigned[0]}, {1, partial_res1.assigned[1]}}}; @@ -231,7 +232,8 @@ TEST_CASE("chunk_assignment", "[core]") params.table, params.metaSource, params.metaSink, 1, 2); Assignment res = {{0, res0[0]}, {1, res1[1]}}; - REQUIRE(test_chunk_assignment::equalTables(params.table, res)); + OPENPMD_REQUIRE_GUARD_WINDOWS( + test_chunk_assignment::equalTables(params.table, res)); test_chunk_assignment::print(params.metaSink, res1); } diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index 1cffd47a79..0bf5b972c5 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -41,6 +41,21 @@ TEST_CASE("none", "[parallel]") #include #include +// On Windows, REQUIRE() might not be able to print more complex data structures +// upon failure: +// CoreTest.obj : error LNK2001: unresolved external symbol +// "class std::string const Catch::Detail::unprintableString" (...) +#ifdef _WIN32 +#define OPENPMD_REQUIRE_GUARD_WINDOWS(...) \ + do \ + { \ + bool guarded_require_boolean = __VA_ARGS__; \ + REQUIRE(guarded_require_boolean); \ + } while (0); +#else +#define OPENPMD_REQUIRE_GUARD_WINDOWS(...) REQUIRE(__VA_ARGS__) +#endif + using namespace openPMD; TEST_CASE("parallel_multi_series_test", "[parallel]") @@ -2350,13 +2365,14 @@ void verifyHostnameAssignment( { for (auto const &chunk : chunks) { - REQUIRE(in.at(chunk.sourceID) == out.at(out_rank)); + OPENPMD_REQUIRE_GUARD_WINDOWS( + in.at(chunk.sourceID) == out.at(out_rank)); } } for (auto const &chunk : assignment.notAssigned) { auto const &hostname = in.at(chunk.sourceID); - REQUIRE( + OPENPMD_REQUIRE_GUARD_WINDOWS( std::none_of( out.begin(), out.end(), @@ -2515,7 +2531,7 @@ void run_test() * are running on the same nodes. */ auto rankMetaIn = series.rankTable(/* collective = */ true); - REQUIRE(rankMetaIn == writingRanksHostnames); + OPENPMD_REQUIRE_GUARD_WINDOWS(rankMetaIn == writingRanksHostnames); auto E_x = series.iterations[0].meshes["E"]["x"]; /* @@ -2536,7 +2552,8 @@ void run_test() chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size); printAssignment( "ROUND ROBIN", roundRobinAssignment, readingRanksHostnames); - REQUIRE(equalTables(chunkTable, roundRobinAssignment)); + OPENPMD_REQUIRE_GUARD_WINDOWS( + equalTables(chunkTable, roundRobinAssignment)); /* * Assign chunks by hostname. @@ -2560,7 +2577,7 @@ void run_test() "HOSTNAME, LEFTOVER", byHostnamePartialAssignment.notAssigned, rankMetaIn); - REQUIRE(equalDisjointByVolume( + OPENPMD_REQUIRE_GUARD_WINDOWS(equalDisjointByVolume( chunkTable, // Must restrict assignment to current rank, since // ByHostname strategy output *may* also contain chunks from @@ -2590,7 +2607,7 @@ void run_test() "HOSTNAME2, LEFTOVER", byHostnamePartialAssignment2.notAssigned, rankMetaIn); - REQUIRE(equalDisjointByVolume( + OPENPMD_REQUIRE_GUARD_WINDOWS(equalDisjointByVolume( chunkTable, // Must restrict assignment to current rank, since // ByHostname strategy output *may* also contain chunks from @@ -2622,7 +2639,7 @@ void run_test() "HOSTNAME WITH SECOND PASS", fromPartialAssignment, readingRanksHostnames); - REQUIRE(equalDisjointByVolume( + OPENPMD_REQUIRE_GUARD_WINDOWS(equalDisjointByVolume( chunkTable, // Must restrict assignment to current rank, since // ByHostname strategy output *may* also contain chunks from @@ -2661,7 +2678,7 @@ void run_test() auto blocksAssignment = blocksStrategy.assign( chunkTable, rankMetaIn, readingRanksHostnames, mpi_rank, mpi_size); printAssignment("BLOCKS", blocksAssignment, readingRanksHostnames); - REQUIRE(equalDisjointByVolume( + OPENPMD_REQUIRE_GUARD_WINDOWS(equalDisjointByVolume( chunkTable, blocksAssignment, std::nullopt, MPI_COMM_WORLD)); BlocksOfSourceRanks blocksOfSourceRanksStrategy; @@ -2671,7 +2688,7 @@ void run_test() "BLOCKS OF SOURCE RANKS", blocksOfSourceRanksAssignment, readingRanksHostnames); - REQUIRE(equalDisjointByVolume( + OPENPMD_REQUIRE_GUARD_WINDOWS(equalDisjointByVolume( chunkTable, blocksOfSourceRanksAssignment, std::nullopt, From 62e3bf05cd8ad36c8422e7fefdd137a8653fb622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 7 Aug 2025 18:11:54 +0200 Subject: [PATCH 27/27] Use generic flag instead of -n 2 --- CMakeLists.txt | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 49b62a8bbd..ddd2cfd716 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1346,30 +1346,30 @@ if(openPMD_BUILD_TESTING) ) add_test(NAME CLI.pipe.py COMMAND sh -c - "${MPI_TEST_EXE} -n 2 ${Python_EXECUTABLE} \ - ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ - --infile ../samples/git-sample/data%T.h5 \ - --outfile ../samples/git-sample/data%T.bp && \ - \ - ${MPI_TEST_EXE} ${Python_EXECUTABLE} \ - ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ - --infile ../samples/git-sample/data00000100.h5 \ - --outfile \ - ../samples/git-sample/single_iteration_%T.bp && \ - \ - ${MPI_TEST_EXE} -n 2 ${Python_EXECUTABLE} \ - ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ - --infile ../samples/git-sample/thetaMode/data%T.h5 \ - --outfile \ - ../samples/git-sample/thetaMode/data_%T.bp && \ - \ - ${MPI_TEST_EXE} ${Python_EXECUTABLE} \ - ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ - --infile ../samples/git-sample/thetaMode/data_%T.bp \ - --outfile ../samples/git-sample/thetaMode/data%T.json \ - --outconfig ' \ - json.attribute.mode = \"short\" \n\ - json.dataset.mode = \"template_no_warn\"' \ + "${MPI_TEST_EXE} ${MPIEXEC_NUMPROC_FLAG} 2 ${Python_EXECUTABLE} \ + ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ + --infile ../samples/git-sample/data%T.h5 \ + --outfile ../samples/git-sample/data%T.bp && \ + \ + ${MPI_TEST_EXE} ${Python_EXECUTABLE} \ + ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ + --infile ../samples/git-sample/data00000100.h5 \ + --outfile \ + ../samples/git-sample/single_iteration_%T.bp && \ + \ + ${MPI_TEST_EXE} ${MPIEXEC_NUMPROC_FLAG} 2 ${Python_EXECUTABLE} \ + ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ + --infile ../samples/git-sample/thetaMode/data%T.h5 \ + --outfile \ + ../samples/git-sample/thetaMode/data_%T.bp && \ + \ + ${MPI_TEST_EXE} ${Python_EXECUTABLE} \ + ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ + --infile ../samples/git-sample/thetaMode/data_%T.bp \ + --outfile ../samples/git-sample/thetaMode/data%T.json \ + --outconfig ' \ + json.attribute.mode = \"short\" \n\ + json.dataset.mode = \"template_no_warn\"' \ " WORKING_DIRECTORY ${openPMD_RUNTIME_OUTPUT_DIRECTORY} )