From 21eb08a7d5cff4ad201c093948fbacb9d2577543 Mon Sep 17 00:00:00 2001 From: Fernando Pereira Date: Fri, 31 Jan 2020 18:16:09 +0100 Subject: [PATCH] [wip] auto checkpointing --- coreneuron/apps/main1.cpp | 23 +++++++++++++++++- coreneuron/io/file_utils.cpp | 10 ++++++++ coreneuron/io/file_utils.hpp | 11 +++++++++ coreneuron/io/nrn_checkpoint.cpp | 8 +++++++ coreneuron/io/nrn_checkpoint.hpp | 6 +++++ coreneuron/io/nrn_setup.cpp | 10 ++++---- coreneuron/nrniv/nrniv_decl.h | 3 +++ coreneuron/sim/fadvance_core.cpp | 40 ++++++++++++++++++++++++++++++++ 8 files changed, 105 insertions(+), 6 deletions(-) diff --git a/coreneuron/apps/main1.cpp b/coreneuron/apps/main1.cpp index b9367fa80..b23201178 100644 --- a/coreneuron/apps/main1.cpp +++ b/coreneuron/apps/main1.cpp @@ -217,6 +217,17 @@ void nrn_init_and_load_data(int argc, std::string restore_path = nrnopt_get_str("--restore"); t = restore_time(restore_path.c_str()); + auto auto_restore = get_checkpoint_path("auto"); + if (fs_exists(auto_restore.c_str())) { + double auto_t = restore_time(auto_restore.c_str()); + if (auto_t > t) { + if (t > 0) + std::cout << "Warning: Using AUTO checkpoint instead of user provided" + << std::endl; + t = auto_t; + } + } + if (nrnopt_get_dbl("--dt") != -1000.) { // command line arg highest precedence dt = nrnopt_get_dbl("--dt"); } else if (dt == -1000.) { // not on command line and no dt in globals.dat @@ -275,7 +286,17 @@ void nrn_init_and_load_data(int argc, use_phase2_ = (nrnopt_get_int("--ms-phases") == 2) ? 1 : 0; // reading *.dat files and setting up the data structures, setting mindelay - nrn_setup(filesdat.c_str(), is_mapping_needed, nrn_need_byteswap, run_setup_cleanup); + double mindelay = nrnopt_get_dbl("--mindelay"); + nrn_setup(filesdat.c_str(), + is_mapping_needed, + nrn_need_byteswap, + mindelay, // value (by-ref) gets updated + nrnopt_get_str("--datpath"), + restore_path, + run_setup_cleanup); + + // store to options + nrnopt_modify_dbl("--mindelay", mindelay); // Allgather spike compression and bin queuing. nrn_use_bin_queue_ = nrnopt_get_flag("--binqueue"); diff --git a/coreneuron/io/file_utils.cpp b/coreneuron/io/file_utils.cpp index 7674bff49..3814a226b 100644 --- a/coreneuron/io/file_utils.cpp +++ b/coreneuron/io/file_utils.cpp @@ -72,3 +72,13 @@ int mkdir_p(const char* path) { delete[] dirpath; return 0; } + +bool fs_exists(const char* path) { + struct stat buffer; + return (stat (path, &buffer) == 0); +} + +bool fs_isdir(const char* path) { + struct stat buffer; + return (stat (path, &buffer) == 0 && S_ISDIR(buffer.st_mode)); +} diff --git a/coreneuron/io/file_utils.hpp b/coreneuron/io/file_utils.hpp index 0d410d219..f0c955da2 100644 --- a/coreneuron/io/file_utils.hpp +++ b/coreneuron/io/file_utils.hpp @@ -41,4 +41,15 @@ THE POSSIBILITY OF SUCH DAMAGE. */ int mkdir_p(const char* path); +/** + * @brief Checks whether a path exists + */ +bool fs_exists(const char* path); + +/** + * @brief Checks whether a path is a directory + */ +bool fs_isdir(const char* path) ; + + #endif /* ifndef NRN_FILE_UTILS */ diff --git a/coreneuron/io/nrn_checkpoint.cpp b/coreneuron/io/nrn_checkpoint.cpp index 19017b870..c2bf51ad1 100644 --- a/coreneuron/io/nrn_checkpoint.cpp +++ b/coreneuron/io/nrn_checkpoint.cpp @@ -896,4 +896,12 @@ bool checkpoint_initialize() { return checkpoint_restored_; } + + +std::string get_checkpoint_path(const std::string& suffix) { + static std::string base_loc = nrnopt_get_str("--outpath"); + return base_loc + "/checkpoint" + (suffix.empty()? "" : ("_" + suffix)); +} + + } // namespace coreneuron diff --git a/coreneuron/io/nrn_checkpoint.hpp b/coreneuron/io/nrn_checkpoint.hpp index 1b62da337..df56ef2b1 100644 --- a/coreneuron/io/nrn_checkpoint.hpp +++ b/coreneuron/io/nrn_checkpoint.hpp @@ -54,6 +54,12 @@ bool checkpoint_initialize(); */ double restore_time(const char* restore_path); +/** + * @return The checkpoint path, optionally with a suffix + */ +std::string get_checkpoint_path(const std::string& suffix=""); + + extern int patstimtype; #ifndef CHKPNTDEBUG diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp index db8f2f904..06678e383 100644 --- a/coreneuron/io/nrn_setup.cpp +++ b/coreneuron/io/nrn_setup.cpp @@ -677,6 +677,9 @@ void nrn_setup_cleanup() { void nrn_setup(const char* filesdat, bool is_mapping_needed, int byte_swap, + double & min_delay, + const std::string& datapath, + std::string restore_path, bool run_setup_cleanup) { /// Number of local cell groups int ngroup = 0; @@ -738,9 +741,6 @@ void nrn_setup(const char* filesdat, FileHandler* file_reader = new FileHandler[ngroup]; - std::string datapath = nrnopt_get_str("--datpath"); - std::string restore_path = nrnopt_get_str("--restore"); - // if not restoring then phase2 files will be read from dataset directory if (!restore_path.length()) { restore_path = datapath; @@ -788,8 +788,8 @@ void nrn_setup(const char* filesdat, if (is_mapping_needed) coreneuron::phase_wrapper<(coreneuron::phase)3>(); - double mindelay = set_mindelay(nrnopt_get_dbl("--mindelay")); - nrnopt_modify_dbl("--mindelay", mindelay); + // Set and adjust min_delay + min_delay = set_mindelay(min_delay); if (run_setup_cleanup) // if run_setup_cleanup==false, user must call nrn_setup_cleanup() later nrn_setup_cleanup(); diff --git a/coreneuron/nrniv/nrniv_decl.h b/coreneuron/nrniv/nrniv_decl.h index bcce293f2..c21b0059c 100644 --- a/coreneuron/nrniv/nrniv_decl.h +++ b/coreneuron/nrniv/nrniv_decl.h @@ -60,6 +60,9 @@ extern void nrn_p_construct(void); extern void nrn_setup(const char* filesdat, bool is_mapping_needed, int byte_swap, + double & mindelay, + const std::string& datapath, + std::string restore_path = "", bool run_setup_cleanup = true); extern double* stdindex2ptr(int mtype, int index, NrnThread&); extern void delete_trajectory_requests(NrnThread&); diff --git a/coreneuron/sim/fadvance_core.cpp b/coreneuron/sim/fadvance_core.cpp index 5c6560c15..210b4a605 100644 --- a/coreneuron/sim/fadvance_core.cpp +++ b/coreneuron/sim/fadvance_core.cpp @@ -26,6 +26,8 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include +#include #include "coreneuron/coreneuron.hpp" #include "coreneuron/nrnconf.h" #include "coreneuron/sim/multicore.hpp" @@ -39,12 +41,20 @@ THE POSSIBILITY OF SUCH DAMAGE. #include "coreneuron/utils/progressbar/progressbar.h" #include "coreneuron/utils/profile/profiler_interface.h" #include "coreneuron/io/nrn2core_direct.h" +#include "coreneuron/io/nrn_checkpoint.hpp" namespace coreneuron { static void* nrn_fixed_step_thread(NrnThread*); static void* nrn_fixed_step_group_thread(NrnThread*); +// constants + +extern int nrn_need_byteswap; + +static bool nrn_auto_checkpoint(); + + void dt2thread(double adt) { /* copied from nrnoc/fadvance.c */ if (adt != nrn_threads[0]._dt) { int i; @@ -127,6 +137,10 @@ void nrn_fixed_step_group_minimal(int n) { #ifdef ENABLE_REPORTING nrn_flush_reports(nrn_threads[0]._t); #endif + if( /*auto_ckeckpointing == */ true) { + nrn_auto_checkpoint(); + } + if (stoprun) { break; } @@ -357,4 +371,30 @@ void* nrn_fixed_step_lastpart(NrnThread* nth) { return (void*)0; } + + +/// +/// \brief Does a checkpoint of the simulation in enough time has passed +/// \return True if a checkpoint was performed. False otherwise (not enough elapsed time) +static bool nrn_auto_checkpoint() { + static time_t previous_time = time(NULL); + + time_t cur_time = time(NULL); + int elapsed_secs = difftime(previous_time, cur_time); + if (elapsed_secs < /* options->checkpoint_interval*/ 3600) { + return false; + } + Instrumentor::phase p("AUTO Checkpointing..."); + std::string auto_final = get_checkpoint_path("auto"); + std::string auto_tmp = get_checkpoint_path("_auto_dirty"); + write_checkpoint(nrn_threads, nrn_nthread, auto_tmp.c_str(), nrn_need_byteswap); + // TODO: Not as Quick & Dirty + system(("/bin/rm -rf '" + auto_final + "';" + "/bin/mv '" + auto_tmp + "' '" + auto_final + "'").c_str()); + + previous_time = cur_time; + return true; +} + + } // namespace coreneuron