From 50ce5158b03ee6c439bb9458a6c2eb4b012931b1 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Wed, 7 Jul 2021 23:02:07 -0700 Subject: [PATCH 01/13] HDF5: Early Chunk Read Add test. --- test/SerialIOTest.cpp | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 075194656d..99770ab481 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -2402,6 +2402,35 @@ TEST_CASE( "git_hdf5_sample_fileBased_read_test", "[serial][hdf5]" ) } } +TEST_CASE( "git_hdf5_early_chunk_query", "[serial][hdf5]" ) +{ + try + { + Series s = Series( + "../samples/git-sample/data%T.h5", + Access::READ_ONLY + ); + + auto electrons = s.iterations[400].particles["electrons"]; + + for( auto & r : electrons ) + { + std::cout << r.first << ": "; + for( auto & r_c : r.second ) + { + std::cout << r_c.first << "\n"; + if( !r_c.second.constant() ) + auto chunks = r_c.second.availableChunks(); + } + } + + } catch (no_such_file_error& e) + { + std::cerr << "git sample not accessible. (" << e.what() << ")\n"; + return; + } +} + TEST_CASE( "git_hdf5_sample_read_thetaMode", "[serial][hdf5][thetaMode]" ) { try From e2d54bacf34b323f8df24a973d27a22bdd55eb80 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 8 Jul 2021 13:58:51 -0700 Subject: [PATCH 02/13] Fix availableChunks: open Series --- src/backend/BaseRecordComponent.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/backend/BaseRecordComponent.cpp b/src/backend/BaseRecordComponent.cpp index f1730249ee..e4710a742a 100644 --- a/src/backend/BaseRecordComponent.cpp +++ b/src/backend/BaseRecordComponent.cpp @@ -64,10 +64,17 @@ BaseRecordComponent::availableChunks() Offset offset( m_dataset->extent.size(), 0 ); return ChunkTable{ { std::move( offset ), m_dataset->extent } }; } + + // set dirty, so Series::flush will open the file if needed + this->dirty() = true; + this->seriesFlush(); + this->dirty() = false; + Parameter< Operation::AVAILABLE_CHUNKS > param; IOTask task( this, param ); IOHandler()->enqueue( task ); IOHandler()->flush(); + return std::move( *param.chunks ); } } // namespace openPMD From 13ad57273cd25386cdafbf0bc8622d92332e2560 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 15 Jul 2021 09:57:43 +0200 Subject: [PATCH 03/13] Revert "Fix availableChunks: open Series" This reverts commit e2d54bacf34b323f8df24a973d27a22bdd55eb80. --- src/backend/BaseRecordComponent.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/backend/BaseRecordComponent.cpp b/src/backend/BaseRecordComponent.cpp index e4710a742a..f1730249ee 100644 --- a/src/backend/BaseRecordComponent.cpp +++ b/src/backend/BaseRecordComponent.cpp @@ -64,17 +64,10 @@ BaseRecordComponent::availableChunks() Offset offset( m_dataset->extent.size(), 0 ); return ChunkTable{ { std::move( offset ), m_dataset->extent } }; } - - // set dirty, so Series::flush will open the file if needed - this->dirty() = true; - this->seriesFlush(); - this->dirty() = false; - Parameter< Operation::AVAILABLE_CHUNKS > param; IOTask task( this, param ); IOHandler()->enqueue( task ); IOHandler()->flush(); - return std::move( *param.chunks ); } } // namespace openPMD From ad1735aa759e18666784752873921476c334e216 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 15 Jul 2021 11:10:11 +0200 Subject: [PATCH 04/13] Refactor procedures for opening an iteration --- include/openPMD/Series.hpp | 7 + src/Series.cpp | 317 +++++++++++++++++++------------------ 2 files changed, 171 insertions(+), 153 deletions(-) diff --git a/include/openPMD/Series.hpp b/include/openPMD/Series.hpp index 9bd0da6f84..f2576ac5d4 100644 --- a/include/openPMD/Series.hpp +++ b/include/openPMD/Series.hpp @@ -385,6 +385,13 @@ class SeriesImpl : public AttributableImpl void readGorVBased( bool init = true ); void readBase(); std::string iterationFilename( uint64_t i ); + + enum class IterationOpened : bool + { + HasBeenOpened, + RemainsClosed + }; + IterationOpened openIterationIfDirty( uint64_t index, Iteration iteration ); void openIteration( uint64_t index, Iteration iteration ); /** diff --git a/src/Series.cpp b/src/Series.cpp index 15c2478982..577874bf99 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -547,42 +547,20 @@ SeriesImpl::flushFileBased( iterations_iterator begin, iterations_iterator end ) if( IOHandler()->m_frontendAccess == Access::READ_ONLY ) for( auto it = begin; it != end; ++it ) { - if( *it->second.m_closed - == Iteration::CloseStatus::ParseAccessDeferred ) + switch( openIterationIfDirty( it->first, it->second ) ) { + using IO = IterationOpened; + case IO::RemainsClosed: continue; - } - bool const dirtyRecursive = it->second.dirtyRecursive(); - if( *it->second.m_closed - == Iteration::CloseStatus::ClosedInBackend ) - { - // file corresponding with the iteration has previously been - // closed and fully flushed - // verify that there have been no further accesses - if( dirtyRecursive ) - { - throw std::runtime_error( - "[Series] Detected illegal access to iteration that " - "has been closed previously." ); - } - continue; - } - /* - * Opening a file is expensive, so let's do it only if necessary. - * Necessary if: - * 1. The iteration itself has been changed somewhere. - * 2. Or the Series has been changed globally in a manner that - * requires adapting all iterations. - */ - if( dirtyRecursive || this->dirty() ) - { - // openIteration() will update the close status - openIteration( it->first, it->second ); - it->second.flush(); + case IO::HasBeenOpened: + // continue below + break; } - if( *it->second.m_closed - == Iteration::CloseStatus::ClosedInFrontend ) + it->second.flush(); + + if( *it->second.m_closed == + Iteration::CloseStatus::ClosedInFrontend ) { Parameter< Operation::CLOSE_FILE > fClose; IOHandler()->enqueue( @@ -596,68 +574,41 @@ SeriesImpl::flushFileBased( iterations_iterator begin, iterations_iterator end ) bool allDirty = dirty(); for( auto it = begin; it != end; ++it ) { - if( *it->second.m_closed - == Iteration::CloseStatus::ParseAccessDeferred ) + switch( openIterationIfDirty( it->first, it->second ) ) { + using IO = IterationOpened; + case IO::RemainsClosed: continue; - } - bool const dirtyRecursive = it->second.dirtyRecursive(); - if( *it->second.m_closed - == Iteration::CloseStatus::ClosedInBackend ) - { - // file corresponding with the iteration has previously been - // closed and fully flushed - // verify that there have been no further accesses - if (!it->second.written()) - { - throw std::runtime_error( - "[Series] Closed iteration has not been written. This " - "is an internal error." ); - } - if( dirtyRecursive ) - { - throw std::runtime_error( - "[Series] Detected illegal access to iteration that " - "has been closed previously." ); - } - continue; + case IO::HasBeenOpened: + // continue below + break; } - /* - * Opening a file is expensive, so let's do it only if necessary. - * Necessary if: - * 1. The iteration itself has been changed somewhere. - * 2. Or the Series has been changed globally in a manner that - * requires adapting all iterations. + /* as there is only one series, + * emulate the file belonging to each iteration as not yet written */ - if( dirtyRecursive || this->dirty() ) - { - /* as there is only one series, - * emulate the file belonging to each iteration as not yet written - */ - written() = false; - series.iterations.written() = false; + written() = false; + series.iterations.written() = false; - dirty() |= it->second.dirty(); - std::string filename = iterationFilename( it->first ); - it->second.flushFileBased(filename, it->first); + dirty() |= it->second.dirty(); + std::string filename = iterationFilename( it->first ); + it->second.flushFileBased( filename, it->first ); - series.iterations.flush( - auxiliary::replace_first(basePath(), "%T/", "")); + series.iterations.flush( + auxiliary::replace_first( basePath(), "%T/", "" ) ); - flushAttributes(); + flushAttributes(); - switch( *it->second.m_closed ) - { - using CL = Iteration::CloseStatus; - case CL::Open: - case CL::ClosedTemporarily: - *it->second.m_closed = CL::Open; - break; - default: - // keep it - break; - } + switch( *it->second.m_closed ) + { + using CL = Iteration::CloseStatus; + case CL::Open: + case CL::ClosedTemporarily: + *it->second.m_closed = CL::Open; + break; + default: + // keep it + break; } if( *it->second.m_closed == @@ -686,28 +637,19 @@ SeriesImpl::flushGorVBased( iterations_iterator begin, iterations_iterator end ) if( IOHandler()->m_frontendAccess == Access::READ_ONLY ) for( auto it = begin; it != end; ++it ) { - if( *it->second.m_closed - == Iteration::CloseStatus::ParseAccessDeferred ) + switch( openIterationIfDirty( it->first, it->second ) ) { + using IO = IterationOpened; + case IO::RemainsClosed: continue; + case IO::HasBeenOpened: + // continue below + break; } - if( *it->second.m_closed == - Iteration::CloseStatus::ClosedInBackend ) - { - // file corresponding with the iteration has previously been - // closed and fully flushed - // verify that there have been no further accesses - if( it->second.dirtyRecursive() ) - { - throw std::runtime_error( - "[Series] Illegal access to iteration " + - std::to_string( it->first ) + - " that has been closed previously." ); - } - continue; - } + it->second.flush(); - if( *it->second.m_closed == Iteration::CloseStatus::ClosedInFrontend ) + if( *it->second.m_closed == + Iteration::CloseStatus::ClosedInFrontend ) { // the iteration has no dedicated file in group-based mode *it->second.m_closed = Iteration::CloseStatus::ClosedInBackend; @@ -728,31 +670,14 @@ SeriesImpl::flushGorVBased( iterations_iterator begin, iterations_iterator end ) for( auto it = begin; it != end; ++it ) { - if( *it->second.m_closed - == Iteration::CloseStatus::ParseAccessDeferred ) + switch( openIterationIfDirty( it->first, it->second ) ) { + using IO = IterationOpened; + case IO::RemainsClosed: continue; - } - if( *it->second.m_closed == - Iteration::CloseStatus::ClosedInBackend ) - { - // file corresponding with the iteration has previously been - // closed and fully flushed - // verify that there have been no further accesses - if (!it->second.written()) - { - throw std::runtime_error( - "[Series] Closed iteration has not been written. This " - "is an internal error." ); - } - if( it->second.dirtyRecursive() ) - { - throw std::runtime_error( - "[Series] Illegal access to iteration " + - std::to_string( it->first ) + - " that has been closed previously." ); - } - continue; + case IO::HasBeenOpened: + // continue below + break; } if( !it->second.written() ) { @@ -1329,41 +1254,127 @@ SeriesImpl::advance( return *param.status; } -void -SeriesImpl::openIteration( uint64_t index, Iteration iteration ) +auto SeriesImpl::openIterationIfDirty( uint64_t index, Iteration iteration ) + -> IterationOpened { - auto & series = get(); - // open the iteration's file again - Parameter< Operation::OPEN_FILE > fOpen; - fOpen.encoding = iterationEncoding(); - fOpen.name = iterationFilename( index ); - IOHandler()->enqueue( IOTask( this, fOpen ) ); - - /* open base path */ - Parameter< Operation::OPEN_PATH > pOpen; - pOpen.path = auxiliary::replace_first( basePath(), "%T/", "" ); - IOHandler()->enqueue( IOTask( &series.iterations, pOpen ) ); - /* open iteration path */ - pOpen.path = iterationEncoding() == IterationEncoding::variableBased - ? "" - : std::to_string( index ); - IOHandler()->enqueue( IOTask( &iteration, pOpen ) ); - switch( *iteration.m_closed ) + if( *iteration.m_closed == Iteration::CloseStatus::ParseAccessDeferred ) { - using CL = Iteration::CloseStatus; - case CL::ClosedInBackend: + return IterationOpened::RemainsClosed; + } + bool const dirtyRecursive = iteration.dirtyRecursive(); + if( *iteration.m_closed == Iteration::CloseStatus::ClosedInBackend ) + { + // file corresponding with the iteration has previously been + // closed and fully flushed + // verify that there have been no further accesses + if( !iteration.written() ) + { + throw std::runtime_error( + "[Series] Closed iteration has not been written. This " + "is an internal error." ); + } + if( dirtyRecursive ) + { throw std::runtime_error( "[Series] Detected illegal access to iteration that " "has been closed previously." ); - case CL::ParseAccessDeferred: - case CL::Open: - case CL::ClosedTemporarily: - *iteration.m_closed = CL::Open; + } + return IterationOpened::RemainsClosed; + } + + switch( iterationEncoding() ) + { + using IE = IterationEncoding; + case IE::fileBased: + /* + * Opening a file is expensive, so let's do it only if necessary. + * Necessary if: + * 1. The iteration itself has been changed somewhere. + * 2. Or the Series has been changed globally in a manner that + * requires adapting all iterations. + */ + if( dirtyRecursive || this->dirty() ) + { + // openIteration() will update the close status + openIteration( index, iteration ); + return IterationOpened::HasBeenOpened; + } + break; + case IE::groupBased: + case IE::variableBased: + // open unconditionally + // openIteration() will update the close status + openIteration( index, iteration ); + return IterationOpened::HasBeenOpened; + } + return IterationOpened::RemainsClosed; +} + +void SeriesImpl::openIteration( uint64_t index, Iteration iteration ) +{ + switch( *iteration.m_closed ) + { + using CL = Iteration::CloseStatus; + case CL::ClosedInBackend: + throw std::runtime_error( + "[Series] Detected illegal access to iteration that " + "has been closed previously." ); + case CL::ParseAccessDeferred: + case CL::Open: + case CL::ClosedTemporarily: + *iteration.m_closed = CL::Open; + break; + case CL::ClosedInFrontend: + // just keep it like it is + break; + } + + switch( IOHandler()->m_frontendAccess ) + { + case Access::READ_ONLY: + switch( iterationEncoding() ) + { + using IE = IterationEncoding; + case IE::fileBased: { + auto & series = get(); + // open the iteration's file again + Parameter< Operation::OPEN_FILE > fOpen; + fOpen.encoding = iterationEncoding(); + fOpen.name = iterationFilename( index ); + IOHandler()->enqueue( IOTask( this, fOpen ) ); + + /* open base path */ + Parameter< Operation::OPEN_PATH > pOpen; + pOpen.path = auxiliary::replace_first( basePath(), "%T/", "" ); + IOHandler()->enqueue( IOTask( &series.iterations, pOpen ) ); + /* open iteration path */ + pOpen.path = iterationEncoding() == IterationEncoding::variableBased + ? "" + : std::to_string( index ); + IOHandler()->enqueue( IOTask( &iteration, pOpen ) ); break; - case CL::ClosedInFrontend: - // just keep it like it is + } + case IE::groupBased: + case IE::variableBased: + // nothing to do, no opening necessary in those modes break; } + break; + case Access::CREATE: + case Access::READ_WRITE: + switch( iterationEncoding() ) + { + using IE = IterationEncoding; + case IE::fileBased: + // nothing to do, file will be opened by writing routines + break; + case IE::groupBased: + case IE::variableBased: + // nothing to do, no opening necessary in those modes + break; + } + break; + } } namespace From b594c04720b8c1272d41d299c1528ed07e38f076 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 15 Jul 2021 12:12:18 +0200 Subject: [PATCH 05/13] Use last commit in Iteration::open() --- src/Iteration.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Iteration.cpp b/src/Iteration.cpp index de6f7c8e63..1ce7537706 100644 --- a/src/Iteration.cpp +++ b/src/Iteration.cpp @@ -156,13 +156,8 @@ Iteration::open() internal::SeriesInternal * s = &retrieveSeries(); // figure out my iteration number auto begin = s->indexOf( *this ); - auto end = begin; - ++end; - // set dirty, so Series::flush will open the file - this->dirty() = true; - s->flush_impl( begin, end, FlushLevel::UserFlush ); - this->dirty() = false; - + s->openIteration( begin->first, *this ); + IOHandler()->flush(); return *this; } From fe312ace9563f4333fa627041a496040d4e76398 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 15 Jul 2021 14:04:56 +0200 Subject: [PATCH 06/13] Use improved Iteration::open() in availableChunks --- include/openPMD/backend/Attributable.hpp | 6 ++- src/backend/Attributable.cpp | 47 ++++++++++++++++++++++++ src/backend/BaseRecordComponent.cpp | 3 +- 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/include/openPMD/backend/Attributable.hpp b/include/openPMD/backend/Attributable.hpp index bb17b3e2d4..b0308d0a51 100644 --- a/include/openPMD/backend/Attributable.hpp +++ b/include/openPMD/backend/Attributable.hpp @@ -47,9 +47,10 @@ namespace traits } // traits class AbstractFilePosition; class AttributableImpl; +class Iteration; namespace internal { -class SeriesInternal; + class SeriesInternal; } class no_such_attribute_error : public std::runtime_error @@ -235,6 +236,9 @@ class AttributableImpl internal::SeriesInternal const & retrieveSeries() const; internal::SeriesInternal & retrieveSeries(); + Iteration const & containingIteration() const; + Iteration & containingIteration(); + void seriesFlush( FlushLevel ); void flushAttributes(); diff --git a/src/backend/Attributable.cpp b/src/backend/Attributable.cpp index de568f7117..c50d666576 100644 --- a/src/backend/Attributable.cpp +++ b/src/backend/Attributable.cpp @@ -19,6 +19,7 @@ * If not, see . */ #include "openPMD/backend/Attributable.hpp" +#include "openPMD/Iteration.hpp" #include "openPMD/Series.hpp" #include "openPMD/auxiliary/DerefDynamicCast.hpp" #include "openPMD/auxiliary/StringManip.hpp" @@ -134,6 +135,52 @@ internal::SeriesInternal & AttributableImpl::retrieveSeries() static_cast< AttributableImpl const * >( this )->retrieveSeries() ); } +Iteration const & AttributableImpl::containingIteration() const +{ + std::vector< Writable const * > searchQueue; + searchQueue.reserve( 7 ); + Writable const * findSeries = &writable(); + while( findSeries ) + { + searchQueue.push_back( findSeries ); + // we don't need to push the last Writable since it's the Series anyway + findSeries = findSeries->parent; + } + // End of the queue: + // Iteration -> Series.iterations -> Series + if( searchQueue.size() < 3 ) + { + throw std::runtime_error( + "containingIteration(): Must be called for an object contained in " + "an iteration." ); + } + auto end = searchQueue.rbegin(); + internal::AttributableData const * attr = ( *( end + 2 ) )->attributable; + /* + * We now know the unique instance of Attributable that corresponds with + * the iteration. + * Since the class Iteration itself still follows the old class design, + * we will have to take a detour via Series. + */ + auto & series = auxiliary::deref_dynamic_cast< internal::SeriesInternal >( + ( *searchQueue.rbegin() )->attributable ); + for( auto const & pair : series.iterations ) + { + if( &pair.second.get() == attr ) + { + return pair.second; + } + } + throw std::runtime_error( + "Containing iteration not found in containing Series." ); +} + +Iteration & AttributableImpl::containingIteration() +{ + return const_cast< Iteration & >( + static_cast< Iteration const * >( this )->containingIteration() ); +} + std::string Attributable::MyPath::filePath() const { return directory + seriesName + seriesExtension; diff --git a/src/backend/BaseRecordComponent.cpp b/src/backend/BaseRecordComponent.cpp index f1730249ee..e099e21b06 100644 --- a/src/backend/BaseRecordComponent.cpp +++ b/src/backend/BaseRecordComponent.cpp @@ -19,7 +19,7 @@ * If not, see . */ #include "openPMD/backend/BaseRecordComponent.hpp" - +#include "openPMD/Iteration.hpp" namespace openPMD { @@ -64,6 +64,7 @@ BaseRecordComponent::availableChunks() Offset offset( m_dataset->extent.size(), 0 ); return ChunkTable{ { std::move( offset ), m_dataset->extent } }; } + containingIteration().open(); Parameter< Operation::AVAILABLE_CHUNKS > param; IOTask task( this, param ); IOHandler()->enqueue( task ); From 2d6bf77ca41675d2f13507e19cd85ad7d771b962 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 15 Jul 2021 16:32:32 +0200 Subject: [PATCH 07/13] Remove redundant status writing --- src/Series.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/Series.cpp b/src/Series.cpp index 577874bf99..8cd379c558 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -599,18 +599,6 @@ SeriesImpl::flushFileBased( iterations_iterator begin, iterations_iterator end ) flushAttributes(); - switch( *it->second.m_closed ) - { - using CL = Iteration::CloseStatus; - case CL::Open: - case CL::ClosedTemporarily: - *it->second.m_closed = CL::Open; - break; - default: - // keep it - break; - } - if( *it->second.m_closed == Iteration::CloseStatus::ClosedInFrontend ) { From 5589a07c81deb4773055ece30785948cae1377c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 15 Jul 2021 16:41:31 +0200 Subject: [PATCH 08/13] Some inline commenting --- include/openPMD/Series.hpp | 12 ++++++++++++ src/Series.cpp | 13 +++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/openPMD/Series.hpp b/include/openPMD/Series.hpp index f2576ac5d4..3293605178 100644 --- a/include/openPMD/Series.hpp +++ b/include/openPMD/Series.hpp @@ -391,7 +391,19 @@ class SeriesImpl : public AttributableImpl HasBeenOpened, RemainsClosed }; + /* + * For use by flushFileBased, flushGorVBased + * Open an iteration, but only if necessary. + * Only open if the iteration is dirty and if it is not in deferred + * parse state. + */ IterationOpened openIterationIfDirty( uint64_t index, Iteration iteration ); + /* + * Open an iteration. Ensures that the iteration's m_closed status + * is set properly and that any files pertaining to the iteration + * is opened. + * Does not create files when called in CREATE mode. + */ void openIteration( uint64_t index, Iteration iteration ); /** diff --git a/src/Series.cpp b/src/Series.cpp index 8cd379c558..2e01e00a26 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -1245,6 +1245,10 @@ SeriesImpl::advance( auto SeriesImpl::openIterationIfDirty( uint64_t index, Iteration iteration ) -> IterationOpened { + /* + * Check side conditions on accessing iterations, and if they are fulfilled, + * forward function params to openIteration(). + */ if( *iteration.m_closed == Iteration::CloseStatus::ParseAccessDeferred ) { return IterationOpened::RemainsClosed; @@ -1291,6 +1295,8 @@ auto SeriesImpl::openIterationIfDirty( uint64_t index, Iteration iteration ) case IE::groupBased: case IE::variableBased: // open unconditionally + // this makes groupBased encoding safer for parallel usage + // (variable-based encoding runs in lockstep anyway) // openIteration() will update the close status openIteration( index, iteration ); return IterationOpened::HasBeenOpened; @@ -1317,6 +1323,13 @@ void SeriesImpl::openIteration( uint64_t index, Iteration iteration ) break; } + /* + * There's only something to do in filebased encoding in READ_ONLY mode. + * @todo What about READ_WRITE and CREATE mode in filebased encoding? + * Currently handled by flushFileBased(), should we put that here too? + * Use two nested switches anyway to ensure compiler warnings upon adding + * values to the enums. + */ switch( IOHandler()->m_frontendAccess ) { case Access::READ_ONLY: From 5b72c3cb2dddfa4c0acc1393bee55f14abeaa39a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 16 Jul 2021 11:34:33 +0200 Subject: [PATCH 09/13] Make this whole thing work in READ_WRITE mode too --- src/Series.cpp | 51 ++++++++++++++++++++------------------ test/SerialIOTest.cpp | 57 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 81 insertions(+), 27 deletions(-) diff --git a/src/Series.cpp b/src/Series.cpp index 2e01e00a26..961ffebbb5 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -1306,6 +1306,7 @@ auto SeriesImpl::openIterationIfDirty( uint64_t index, Iteration iteration ) void SeriesImpl::openIteration( uint64_t index, Iteration iteration ) { + auto oldStatus = *iteration.m_closed; switch( *iteration.m_closed ) { using CL = Iteration::CloseStatus; @@ -1324,19 +1325,32 @@ void SeriesImpl::openIteration( uint64_t index, Iteration iteration ) } /* - * There's only something to do in filebased encoding in READ_ONLY mode. - * @todo What about READ_WRITE and CREATE mode in filebased encoding? - * Currently handled by flushFileBased(), should we put that here too? + * There's only something to do in filebased encoding in READ_ONLY and + * READ_WRITE modes. * Use two nested switches anyway to ensure compiler warnings upon adding * values to the enums. */ - switch( IOHandler()->m_frontendAccess ) + switch( iterationEncoding() ) { - case Access::READ_ONLY: - switch( iterationEncoding() ) + using IE = IterationEncoding; + case IE::fileBased: { + switch( IOHandler()->m_frontendAccess ) { - using IE = IterationEncoding; - case IE::fileBased: { + case Access::READ_ONLY: + case Access::READ_WRITE: { + /* + * The iteration is marked written() as soon as its file has been + * either created or opened. + * If the iteration has not been created yet, it cannot be opened. + * In that case, it is not written() and its old close status was + * not ParseAccessDeferred. + */ + if( !iteration.written() && + oldStatus != Iteration::CloseStatus::ParseAccessDeferred ) + { + // nothing to do, file will be opened by writing routines + break; + } auto & series = get(); // open the iteration's file again Parameter< Operation::OPEN_FILE > fOpen; @@ -1355,25 +1369,14 @@ void SeriesImpl::openIteration( uint64_t index, Iteration iteration ) IOHandler()->enqueue( IOTask( &iteration, pOpen ) ); break; } - case IE::groupBased: - case IE::variableBased: - // nothing to do, no opening necessary in those modes - break; - } - break; - case Access::CREATE: - case Access::READ_WRITE: - switch( iterationEncoding() ) - { - using IE = IterationEncoding; - case IE::fileBased: + case Access::CREATE: // nothing to do, file will be opened by writing routines break; - case IE::groupBased: - case IE::variableBased: - // nothing to do, no opening necessary in those modes - break; } + } + case IE::groupBased: + case IE::variableBased: + // nothing to do, no opening necessary in those modes break; } } diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 99770ab481..be01f60d20 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -4279,10 +4279,12 @@ TEST_CASE( "extend_dataset", "[serial]" ) void deferred_parsing( std::string const & extension ) { + if( auxiliary::directory_exists( "../samples/lazy_parsing" ) ) + auxiliary::remove_directory( "../samples/lazy_parsing" ); std::string const basename = "../samples/lazy_parsing/lazy_parsing_"; // create a single iteration { - Series series( basename + "%T." + extension, Access::CREATE ); + Series series( basename + "%06T." + extension, Access::CREATE ); std::vector< float > buffer( 20 ); std::iota( buffer.begin(), buffer.end(), 0.f ); auto dataset = series.iterations[ 1000 ].meshes[ "E" ][ "x" ]; @@ -4295,14 +4297,21 @@ void deferred_parsing( std::string const & extension ) { for( size_t i = 0; i < 1000; i += 100 ) { + std::string infix = std::to_string( i ); + std::string padding; + for( size_t j = 0; j < 6 - infix.size(); ++j ) + { + padding += "0"; + } + infix = padding + infix; std::ofstream file; - file.open( basename + std::to_string( i ) + "." + extension ); + file.open( basename + infix + "." + extension ); file.close(); } } { Series series( - basename + "%T." + extension, + basename + "%06T." + extension, Access::READ_ONLY, "{\"defer_iteration_parsing\": true}" ); auto dataset = series.iterations[ 1000 ] @@ -4317,6 +4326,48 @@ void deferred_parsing( std::string const & extension ) std::numeric_limits< float >::epsilon() ); } } + { + Series series( + basename + "%06T." + extension, + Access::READ_WRITE, + "{\"defer_iteration_parsing\": true}" ); + auto dataset = series.iterations[ 1000 ] + .open() + .meshes[ "E" ][ "x" ] + .loadChunk< float >( { 0 }, { 20 } ); + series.flush(); + for( size_t i = 0; i < 20; ++i ) + { + REQUIRE( + std::abs( dataset.get()[ i ] - float( i ) ) <= + std::numeric_limits< float >::epsilon() ); + } + + // create a new iteration + std::vector< float > buffer( 20 ); + std::iota( buffer.begin(), buffer.end(), 0.f ); + auto writeDataset = series.iterations[ 1001 ].meshes[ "E" ][ "x" ]; + writeDataset.resetDataset( { Datatype::FLOAT, { 20 } } ); + writeDataset.storeChunk( buffer, { 0 }, { 20 } ); + series.flush(); + } + { + Series series( + basename + "%06T." + extension, + Access::READ_ONLY, + "{\"defer_iteration_parsing\": true}" ); + auto dataset = series.iterations[ 1001 ] + .open() + .meshes[ "E" ][ "x" ] + .loadChunk< float >( { 0 }, { 20 } ); + series.flush(); + for( size_t i = 0; i < 20; ++i ) + { + REQUIRE( + std::abs( dataset.get()[ i ] - float( i ) ) <= + std::numeric_limits< float >::epsilon() ); + } + } } TEST_CASE( "deferred_parsing", "[serial]" ) From 22dddf3adf6c755338bc118c2fd0215097a7460f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 16 Jul 2021 13:28:10 +0200 Subject: [PATCH 10/13] Use specific commit for samples download --- share/openPMD/download_samples.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/share/openPMD/download_samples.sh b/share/openPMD/download_samples.sh index bdf17b7d9e..4055513e33 100755 --- a/share/openPMD/download_samples.sh +++ b/share/openPMD/download_samples.sh @@ -3,9 +3,9 @@ mkdir -p samples/git-sample/thetaMode mkdir -p samples/git-sample/3d-bp4 -curl -sOL https://github.com/openPMD/openPMD-example-datasets/raw/draft/example-3d.tar.gz -curl -sOL https://github.com/openPMD/openPMD-example-datasets/raw/draft/example-thetaMode.tar.gz -curl -sOL https://github.com/openPMD/openPMD-example-datasets/raw/draft/example-3d-bp4.tar.gz +curl -sOL https://github.com/openPMD/openPMD-example-datasets/raw/72545c4d6bcca2c258bffd2eabe38679b2507c80/example-3d.tar.gz +curl -sOL https://github.com/openPMD/openPMD-example-datasets/raw/72545c4d6bcca2c258bffd2eabe38679b2507c80/example-thetaMode.tar.gz +curl -sOL https://github.com/openPMD/openPMD-example-datasets/raw/72545c4d6bcca2c258bffd2eabe38679b2507c80/example-3d-bp4.tar.gz tar -xzf example-3d.tar.gz tar -xzf example-thetaMode.tar.gz tar -xzf example-3d-bp4.tar.gz From 1bb9ca22a554a8032d710c176eb3b9784d923e28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 16 Jul 2021 13:57:34 +0200 Subject: [PATCH 11/13] Fix bad static_cast --- src/backend/Attributable.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/backend/Attributable.cpp b/src/backend/Attributable.cpp index c50d666576..7a69d6eeab 100644 --- a/src/backend/Attributable.cpp +++ b/src/backend/Attributable.cpp @@ -178,7 +178,8 @@ Iteration const & AttributableImpl::containingIteration() const Iteration & AttributableImpl::containingIteration() { return const_cast< Iteration & >( - static_cast< Iteration const * >( this )->containingIteration() ); + static_cast< AttributableImpl const * >( this ) + ->containingIteration() ); } std::string Attributable::MyPath::filePath() const From c315ff3720728a95238565c2ce5b0af84a7a1027 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 16 Jul 2021 15:09:08 +0200 Subject: [PATCH 12/13] More complete documentation on Iteration::open() --- docs/source/details/mpi.rst | 30 ++++++++++++++++-------------- docs/source/usage/streaming.rst | 2 ++ include/openPMD/Iteration.hpp | 3 +++ 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/docs/source/details/mpi.rst b/docs/source/details/mpi.rst index 918b413648..7b75df4b08 100644 --- a/docs/source/details/mpi.rst +++ b/docs/source/details/mpi.rst @@ -13,20 +13,21 @@ A **collective** operation needs to be executed by *all* MPI ranks of the MPI co Contrarily, **independent** operations can also be called by a subset of these MPI ranks. For more information, please see the `MPI standard documents `_, for example MPI-3.1 in `"Section 2.4 - Semantic Terms" `_. -======================== ================== =========================== -Functionality Behavior Description -======================== ================== =========================== -``Series`` **collective** open and close -``::flush()`` **collective** read and write -``Iteration`` [1]_ independent declare and open -``::open()`` [3]_ **collective** explicit open -``Mesh`` [1]_ independent declare, open, write -``ParticleSpecies`` [1]_ independent declare, open, write -``::setAttribute`` [2]_ *backend-specific* declare, write -``::getAttribute`` independent open, reading -``::storeChunk`` [1]_ independent write -``::loadChunk`` independent read -======================== ================== =========================== +========================== ================== =========================== +Functionality Behavior Description +========================== ================== =========================== +``Series`` **collective** open and close +``::flush()`` **collective** read and write +``Iteration`` [1]_ independent declare and open +``::open()`` [3]_ **collective** explicit open +``Mesh`` [1]_ independent declare, open, write +``ParticleSpecies`` [1]_ independent declare, open, write +``::setAttribute`` [2]_ *backend-specific* declare, write +``::getAttribute`` independent open, reading +``::storeChunk`` [1]_ independent write +``::loadChunk`` independent read +``::availableChunks`` [3]_ collective read, immediate result +========================== ================== =========================== .. [1] Individual backends, e.g. :ref:`HDF5 `, will only support independent operations if the default, non-collective behavior is kept. (Otherwise these operations are collective.) @@ -35,6 +36,7 @@ Functionality Behavior Description If you want to support all backends equally, treat as a collective operation. .. [3] We usually open iterations delayed on first access. This first access is usually the ``flush()`` call after a ``storeChunk``/``loadChunk`` operation. If the first access is non-collective, an explicit, collective ``Iteration::open()`` can be used to have the files already open. +Alternatively, iterations might be accessed for the first time by immediate operations such as ``::availableChunks()``. .. tip:: diff --git a/docs/source/usage/streaming.rst b/docs/source/usage/streaming.rst index 4be444a8e4..adf2d4f4d7 100644 --- a/docs/source/usage/streaming.rst +++ b/docs/source/usage/streaming.rst @@ -25,6 +25,7 @@ The reading end of the streaming API is activated through use of ``Series::readI The returned object of type ``ReadIterations`` can be used in a C++11 range-based for loop to iterate over objects of type ``IndexedIteration``. This class extends the ``Iteration`` class with a field ``IndexedIteration::iterationIndex``, denoting this iteration's index. +Iterations are implicitly opened by the Streaming API and ``Iteration::open()`` needs not be called explicitly. Users are encouraged to explicitly ``.close()`` the iteration after reading from it. Closing the iteration will flush all pending operations on that iteration. If an iteration is not closed until the beginning of the next iteration, it will be closed automatically. @@ -42,6 +43,7 @@ The reading end of the streaming API is activated through use of ``Series.read_i The returned object of type ``ReadIterations`` can be used in a Python range-based for loop to iterate over objects of type ``IndexedIteration``. This class extends the ``Iteration`` class with a field ``IndexedIteration.iteration_index``, denoting this iteration's index. +Iterations are implicitly opened by the Streaming API and ``Iteration.open()`` needs not be called explicitly. Users are encouraged to explicitly ``.close()`` the iteration after reading from it. Closing the iteration will flush all pending operations on that iteration. If an iteration is not closed until the beginning of the next iteration, it will be closed automatically. diff --git a/include/openPMD/Iteration.hpp b/include/openPMD/Iteration.hpp index e950ecd770..794ee34055 100644 --- a/include/openPMD/Iteration.hpp +++ b/include/openPMD/Iteration.hpp @@ -118,6 +118,9 @@ class Iteration : public LegacyAttributable * operation is flush-ed. In parallel contexts where it is know that such a * first access needs to be run non-collectively, one can explicitly open * an iteration through this collective call. + * Also necessary when using defer_iteration_parsing. + * The Streaming API (i.e. Series::readIterations()) will call this method + * implicitly and users need not call it. * * @return Reference to iteration. */ From 782c78e6e673202effb17777f3ced8c7896f9e2e Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Tue, 20 Jul 2021 10:42:52 -0700 Subject: [PATCH 13/13] Add VC code review --- include/openPMD/backend/Attributable.hpp | 8 ++++++++ src/backend/Attributable.cpp | 2 ++ 2 files changed, 10 insertions(+) diff --git a/include/openPMD/backend/Attributable.hpp b/include/openPMD/backend/Attributable.hpp index b0308d0a51..99c49984cb 100644 --- a/include/openPMD/backend/Attributable.hpp +++ b/include/openPMD/backend/Attributable.hpp @@ -236,8 +236,16 @@ class AttributableImpl internal::SeriesInternal const & retrieveSeries() const; internal::SeriesInternal & retrieveSeries(); + /** Returns the corresponding Iteration + * + * Return the openPMD::iteration that this Attributable is contained in. + * This walks up the linked parents until it finds the Iteration object. + * Throws an error otherwise, e.g., for Series objects. + * @{ + */ Iteration const & containingIteration() const; Iteration & containingIteration(); + /** @} */ void seriesFlush( FlushLevel ); diff --git a/src/backend/Attributable.cpp b/src/backend/Attributable.cpp index 7a69d6eeab..d906be0a8a 100644 --- a/src/backend/Attributable.cpp +++ b/src/backend/Attributable.cpp @@ -156,6 +156,8 @@ Iteration const & AttributableImpl::containingIteration() const } auto end = searchQueue.rbegin(); internal::AttributableData const * attr = ( *( end + 2 ) )->attributable; + if( attr == nullptr ) + throw std::runtime_error( "containingIteration(): attributable must not be a nullptr." ); /* * We now know the unique instance of Attributable that corresponds with * the iteration.