diff --git a/CMake/ClangFormatHelper.cmake b/CMake/ClangFormatHelper.cmake index 69494c339..173757f53 100644 --- a/CMake/ClangFormatHelper.cmake +++ b/CMake/ClangFormatHelper.cmake @@ -23,12 +23,6 @@ if(CLANG_FORMAT_FOUND) ${CORENEURON_PROJECT_SOURCE_DIR}/coreneuron/*.h* ${CORENEURON_PROJECT_SOURCE_DIR}/coreneuron/*.ipp*) - # exclude ezoption headers - list(REMOVE_ITEM SRC_FILES_FOR_CLANG_FORMAT - ${CORENEURON_PROJECT_SOURCE_DIR}/coreneuron/utils/ezoption/ezOptionParser.hpp - ${CORENEURON_PROJECT_SOURCE_DIR}/coreneuron/utils/endianness.h - ${CORENEURON_PROJECT_SOURCE_DIR}/coreneuron/utils/swap_endian.h) - # exclude random123 file(GLOB_RECURSE RANDOM123_FILES ${CORENEURON_PROJECT_SOURCE_DIR}/coreneuron/utils/randoms/Random123/*.cpp diff --git a/coreneuron/apps/main1.cpp b/coreneuron/apps/main1.cpp index f76e12d46..0f6c3bea0 100644 --- a/coreneuron/apps/main1.cpp +++ b/coreneuron/apps/main1.cpp @@ -603,7 +603,7 @@ extern "C" int run_solve_core(int argc, char** argv) { { Instrumentor::phase p("checkpoint"); - write_checkpoint(nrn_threads, nrn_nthread, corenrn_param.checkpointpath.c_str(), nrn_need_byteswap); + write_checkpoint(nrn_threads, nrn_nthread, corenrn_param.checkpointpath.c_str()); } // must be done after checkpoint (to avoid deleting events) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index 52c41d071..a9237e542 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -22,7 +22,7 @@ #endif namespace coreneuron { extern InterleaveInfo* interleave_info; -void copy_ivoc_vect_to_device(IvocVect*& iv, IvocVect*& div); +void copy_ivoc_vect_to_device(const IvocVect& iv, IvocVect& div); void nrn_ion_global_map_copyto_device(); void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay); @@ -389,21 +389,19 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) { #endif } -void copy_ivoc_vect_to_device(IvocVect*& iv, IvocVect*& div) { +void copy_ivoc_vect_to_device(const IvocVect& from, IvocVect& to) { #ifdef _OPENACC - if (iv) { - IvocVect* d_iv = (IvocVect*)acc_copyin(iv, sizeof(IvocVect)); - acc_memcpy_to_device(&div, &d_iv, sizeof(IvocVect*)); - - size_t n = iv->size(); - if (n) { - double* d_data = (double*)acc_copyin(iv->data(), sizeof(double) * n); - acc_memcpy_to_device(&(d_iv->data_), &d_data, sizeof(double*)); - } + IvocVect* d_iv = (IvocVect*)acc_copyin((void*)&from, sizeof(IvocVect)); + acc_memcpy_to_device(&to, d_iv, sizeof(IvocVect)); + + size_t n = from.size(); + if (n) { + double* d_data = (double*)acc_copyin((void*)from.data(), sizeof(double) * n); + acc_memcpy_to_device(&(d_iv->data_), &d_data, sizeof(double*)); } #else - (void)iv; - (void)div; + (void)from; + (void)to; #endif } @@ -1004,8 +1002,10 @@ void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay) { /** copy y_, t_ and discon_indices_ */ copy_ivoc_vect_to_device(vecplay_instance->y_, d_vecplay_instance->y_); copy_ivoc_vect_to_device(vecplay_instance->t_, d_vecplay_instance->t_); - copy_ivoc_vect_to_device(vecplay_instance->discon_indices_, - d_vecplay_instance->discon_indices_); + if (vecplay_instance->discon_indices_) { + copy_ivoc_vect_to_device(*(vecplay_instance->discon_indices_), + *(d_vecplay_instance->discon_indices_)); + } /** copy PlayRecordEvent : todo: verify this */ PlayRecordEvent* d_e_ = diff --git a/coreneuron/io/mem_layout_util.cpp b/coreneuron/io/mem_layout_util.cpp new file mode 100644 index 000000000..dfc7ee0f0 --- /dev/null +++ b/coreneuron/io/mem_layout_util.cpp @@ -0,0 +1,56 @@ +#include "mem_layout_util.hpp" + +namespace coreneuron { + +/// calculate size after padding for specific memory layout +// Warning: this function is declared extern in nrniv_decl.h +int nrn_soa_padded_size(int cnt, int layout) { + return soa_padded_size(cnt, layout); +} + +/// return the new offset considering the byte aligment settings +size_t nrn_soa_byte_align(size_t size) { + if (LAYOUT == Layout::SoA) { + size_t dbl_align = NRN_SOA_BYTE_ALIGN / sizeof(double); + size_t remainder = size % dbl_align; + if (remainder) { + size += dbl_align - remainder; + } + nrn_assert((size * sizeof(double)) % NRN_SOA_BYTE_ALIGN == 0); + } + return size; +} + +int nrn_i_layout(int icnt, int cnt, int isz, int sz, int layout) { + switch(layout) { + case Layout::AoS: + return icnt * sz + isz; + case Layout::SoA: + int padded_cnt = nrn_soa_padded_size(cnt, layout); // may want to factor out to save time + return icnt + isz * padded_cnt; + } + + nrn_assert(false); + return 0; +} + +// file data is AoS. ie. +// organized as cnt array instances of mtype each of size sz. +// So input index i refers to i_instance*sz + i_item offset +// Return the corresponding SoA index -- taking into account the +// alignment requirements. Ie. i_instance + i_item*align_cnt. + +int nrn_param_layout(int i, int mtype, Memb_list* ml) { + int layout = corenrn.get_mech_data_layout()[mtype]; + switch(layout) { + case Layout::AoS: + return i; + case Layout::SoA: + nrn_assert(layout == Layout::SoA); + int sz = corenrn.get_prop_param_size()[mtype]; + return nrn_i_layout(i / sz, ml->nodecount, i % sz, sz, layout); + } + nrn_assert(false); + return 0; +} +} // namespace coreneuron diff --git a/coreneuron/io/mem_layout_util.hpp b/coreneuron/io/mem_layout_util.hpp new file mode 100644 index 000000000..b47d7373d --- /dev/null +++ b/coreneuron/io/mem_layout_util.hpp @@ -0,0 +1,45 @@ +#pragma once + +#include "coreneuron/coreneuron.hpp" +#include "coreneuron/nrniv/nrniv_decl.h" + +namespace coreneuron { + +#if !defined(NRN_SOA_PAD) +// for layout 0, every range variable array must have a size which +// is a multiple of NRN_SOA_PAD doubles +#define NRN_SOA_PAD 8 +#endif + +// If MATRIX_LAYOUT is 1 then a,b,d,rhs,v,area is not padded using NRN_SOA_PAD +// When MATRIX_LAYOUT is 0 then mechanism pdata index values into _actual_v +// and _actual_area data need to be updated. +enum Layout { + SoA = 0, + AoS = 1 +}; + +#if !defined(LAYOUT) +#define LAYOUT Layout::AoS +#define MATRIX_LAYOUT Layout::AoS +#else +#define MATRIX_LAYOUT LAYOUT +#endif + +/// return the new offset considering the byte aligment settings +size_t nrn_soa_byte_align(size_t i); + +/// This function return the index in a flat array of a matrix coordinate (icnt, isz). +/// The matrix size is (cnt, sz) +/// Depending of the layout some padding can be calculated +int nrn_i_layout(int icnt, int cnt, int isz, int sz, int layout); + +// file data is AoS. ie. +// organized as cnt array instances of mtype each of size sz. +// So input index i refers to i_instance*sz + i_item offset +// Return the corresponding SoA index -- taking into account the +// alignment requirements. Ie. i_instance + i_item*align_cnt. + +int nrn_param_layout(int i, int mtype, Memb_list* ml); +} // namespace coreneuron + diff --git a/coreneuron/io/nrn_checkpoint.cpp b/coreneuron/io/nrn_checkpoint.cpp index ca1029e62..fc995030a 100644 --- a/coreneuron/io/nrn_checkpoint.cpp +++ b/coreneuron/io/nrn_checkpoint.cpp @@ -28,6 +28,7 @@ THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include "coreneuron/sim/multicore.hpp" #include "coreneuron/nrniv/nrniv_decl.h" @@ -60,11 +61,11 @@ class FileHandlerWrap { std::fstream G; FileHandlerWrap(){}; - void open(const char* filename, bool reorder, std::ios::openmode mode = std::ios::in) { - F.open(filename, reorder, mode); + void open(const std::string& filename, std::ios::openmode mode = std::ios::in) { + F.open(filename, mode); std::ostringstream fname; fname << filename << ".txt"; - G.open(fname.str().c_str(), mode); + G.open(fname.str(), mode); } void close() { @@ -143,13 +144,12 @@ int patstimtype; // output directory to for checkpoint static const char* output_dir; -static bool swap_bytes; static void write_phase2(NrnThread& nt, FileHandlerWrap& file_handle); static void write_tqueue(NrnThread& nt, FileHandlerWrap& file_handle); static void write_time(const char* dir); -void write_checkpoint(NrnThread* nt, int nb_threads, const char* dir, bool swap_bytes_order) { +void write_checkpoint(NrnThread* nt, int nb_threads, const char* dir) { // empty directory means the option is not enabled if (!strlen(dir)) { return; @@ -162,7 +162,6 @@ void write_checkpoint(NrnThread* nt, int nb_threads, const char* dir, bool swap_ #if NRNMPI nrnmpi_barrier(); #endif - swap_bytes = swap_bytes_order; /** * if openmp threading needed: @@ -189,7 +188,7 @@ static void write_phase2(NrnThread& nt, FileHandlerWrap& fh) { NrnThreadChkpnt& ntc = nrnthread_chkpnt[nt.id]; filename << output_dir << "/" << ntc.file_id << "_2.dat"; - fh.open(filename.str().c_str(), swap_bytes, std::ios::out); + fh.open(filename.str(), std::ios::out); fh.checkpoint(2); int n_outputgid = 0; // calculate PreSyn with gid >= 0 @@ -580,10 +579,10 @@ static void write_phase2(NrnThread& nt, FileHandlerWrap& fh) { #endif if (vtype == VecPlayContinuousType) { VecPlayContinuous* vpc = (VecPlayContinuous*)pr; - int sz = vector_capacity(vpc->y_); + int sz = vpc->y_.size(); fh << sz << "\n"; - fh.write_array(vector_vec(vpc->y_), sz); - fh.write_array(vector_vec(vpc->t_), sz); + fh.write_array(vpc->y_.data(), sz); + fh.write_array(vpc->t_.data(), sz); } else { std::cerr << "Error checkpointing vecplay type" << std::endl; assert(0); @@ -605,7 +604,7 @@ static void write_time(const char* output_dir) { std::ostringstream filename; FileHandler f; filename << output_dir << "/time.dat"; - f.open(filename.str().c_str(), swap_bytes, std::ios::out); + f.open(filename.str(), std::ios::out); f.write_array(&t, 1); f.close(); } @@ -617,7 +616,7 @@ double restore_time(const char* restore_dir) { std::ostringstream filename; FileHandler f; filename << restore_dir << "/time.dat"; - f.open(filename.str().c_str(), swap_bytes, std::ios::in); + f.open(filename.str(), std::ios::in); f.read_array(&rtime, 1); f.close(); } @@ -698,48 +697,40 @@ static void write_tqueue(TQItem* q, NrnThread& nt, FileHandlerWrap& fh) { static int patstim_index; static double patstim_te; -static void checkpoint_restore_tqitem(int type, NrnThread& nt, FileHandler& fh) { - double te; - fh.read_array(&te, 1); - // printf("restore tqitem type=%d te=%.20g\n", type, te); +static void checkpoint_restore_tqitem(int type, std::shared_ptr event, NrnThread& nt) { + // printf("restore tqitem type=%d time=%.20g\n", type, time); switch (type) { case NetConType: { - int ncindex = fh.read_int(); - // printf(" NetCon %d\n", ncindex); - NetCon* nc = nt.netcons + ncindex; - nc->send(te, net_cvode_instance, &nt); + auto e = static_cast(event.get()); + // printf(" NetCon %d\n", netcon_index); + NetCon* nc = nt.netcons + e->netcon_index; + nc->send(e->time, net_cvode_instance, &nt); break; } case SelfEventType: { - int target_type = fh.read_int(); // not really needed (except for assert below) - int pinstance = fh.read_int(); - int target_instance = fh.read_int(); - double flag; - fh.read_array(&flag, 1); - int movable = fh.read_int(); - int weight_index = fh.read_int(); - if (target_type == patstimtype) { + auto e = static_cast(event.get()); + if (e->target_type == patstimtype) { if (nt.id == 0) { - patstim_te = te; + patstim_te = e->time; } break; } - Point_process* pnt = nt.pntprocs + pinstance; - // printf(" SelfEvent %d %d %d %g %d %d\n", target_type, pinstance, target_instance, + Point_process* pnt = nt.pntprocs + e->point_proc_instance; + // printf(" SelfEvent %d %d %d %g %d %d\n", target_type, point_proc_instance, target_instance, // flag, movable, weight_index); - assert(target_instance == pnt->_i_instance); - assert(target_type == pnt->_type); - net_send(nt._vdata + movable, weight_index, pnt, te, flag); + nrn_assert(e->target_instance == pnt->_i_instance); + nrn_assert(e->target_type == pnt->_type); + net_send(nt._vdata + e->movable, e->weight_index, pnt, e->time, e->flag); break; } case PreSynType: { - int psindex = fh.read_int(); - // printf(" PreSyn %d\n", psindex); - PreSyn* ps = nt.presyns + psindex; + auto e = static_cast(event.get()); + // printf(" PreSyn %d\n", presyn_index); + PreSyn* ps = nt.presyns + e->presyn_index; int gid = ps->output_index_; ps->output_index_ = -1; - ps->send(te, net_cvode_instance, &nt); + ps->send(e->time, net_cvode_instance, &nt); ps->output_index_ = gid; break; } @@ -749,13 +740,9 @@ static void checkpoint_restore_tqitem(int type, NrnThread& nt, FileHandler& fh) break; } case PlayRecordEventType: { - int prtype = fh.read_int(); - if (prtype == VecPlayContinuousType) { - VecPlayContinuous* vpc = (VecPlayContinuous*)(nt._vecplay[fh.read_int()]); - vpc->e_->send(te, net_cvode_instance, &nt); - } else { - assert(0); - } + auto e = static_cast(event.get()); + VecPlayContinuous* vpc = (VecPlayContinuous*)(nt._vecplay[e->vecplay_index]); + vpc->e_->send(e->time, net_cvode_instance, &nt); break; } default: { @@ -817,38 +804,50 @@ static void write_tqueue(NrnThread& nt, FileHandlerWrap& fh) { static bool checkpoint_restored_ = false; -void checkpoint_restore_tqueue(NrnThread& nt, FileHandler& fh) { +// Read a tqueue/checkpoint +// int :: should be equal to the previous n_vecplay +// n_vecplay: +// int: last_index +// int: discon_index +// int: ubound_index +// int: patstim_index +// int: should be -1 +// n_presyn: +// int: flags of presyn_helper +// int: should be -1 +// null terminated: +// int: type +// array of size 1: +// double: time +// ... depends of the type +// int: should be -1 +// null terminated: +// int: TO BE DEFINED +// ... depends of the type +void checkpoint_restore_tqueue(NrnThread& nt, const Phase2& p2) { int type; checkpoint_restored_ = true; - // VecPlayContinuous - assert(fh.read_int() == nt.n_vecplay); // VecPlayContinuous state for (int i = 0; i < nt.n_vecplay; ++i) { VecPlayContinuous* vpc = (VecPlayContinuous*)nt._vecplay[i]; - vpc->last_index_ = fh.read_int(); - vpc->discon_index_ = fh.read_int(); - vpc->ubound_index_ = fh.read_int(); + auto& vec = p2.vec_play_continuous[i]; + vpc->last_index_ = vec.last_index; + vpc->discon_index_ = vec.discon_index; + vpc->ubound_index_ = vec.ubound_index; } // PatternStim - patstim_index = fh.read_int(); // PatternStim + patstim_index = p2.patstim_index; // PatternStim if (nt.id == 0) { patstim_te = -1.0; // changed if relevant SelfEvent; } - assert(fh.read_int() == -1); // -1 PreSyn ConditionEvent flags for (int i = 0; i < nt.n_presyn; ++i) { - nt.presyns_helper[i].flag_ = fh.read_int(); - } - - assert(fh.read_int() == -1); // -1 TQItems from atomic_dq - while ((type = fh.read_int()) != 0) { - checkpoint_restore_tqitem(type, nt, fh); + nt.presyns_helper[i].flag_ = p2.preSynConditionEventFlags[i]; } - assert(fh.read_int() == -1); // -1 TQItems from binq_ - while ((type - fh.read_int()) != 0) { - checkpoint_restore_tqitem(type, nt, fh); + for (const auto& event: p2.events) { + checkpoint_restore_tqitem(event.first, event.second, nt); } } diff --git a/coreneuron/io/nrn_checkpoint.hpp b/coreneuron/io/nrn_checkpoint.hpp index 1b62da337..2371de1e8 100644 --- a/coreneuron/io/nrn_checkpoint.hpp +++ b/coreneuron/io/nrn_checkpoint.hpp @@ -28,6 +28,9 @@ THE POSSIBILITY OF SUCH DAMAGE. #ifndef _H_NRNCHECKPOINT_ #define _H_NRNCHECKPOINT_ + +#include "coreneuron/io/phase2.hpp" + namespace coreneuron { class NrnThread; class FileHandler; @@ -36,10 +39,9 @@ extern bool nrn_checkpoint_arg_exists; void write_checkpoint(NrnThread* nt, int nb_threads, - const char* dir, - bool swap_bytes_order = false); + const char* dir); -void checkpoint_restore_tqueue(NrnThread&, FileHandler&); +void checkpoint_restore_tqueue(NrnThread&, const Phase2& p2); int* inverse_permute(int* p, int n); void nrn_inverse_i_layout(int i, int& icnt, int cnt, int& isz, int sz, int layout); diff --git a/coreneuron/io/nrn_filehandler.cpp b/coreneuron/io/nrn_filehandler.cpp index 4631a41a4..8045823a3 100644 --- a/coreneuron/io/nrn_filehandler.cpp +++ b/coreneuron/io/nrn_filehandler.cpp @@ -31,24 +31,23 @@ THE POSSIBILITY OF SUCH DAMAGE. #include "coreneuron/nrnconf.h" namespace coreneuron { -FileHandler::FileHandler(const char* filename, bool reorder) { - this->open(filename, reorder); - checkpoint(0); - stored_chkpnt = 0; +FileHandler::FileHandler(const std::string& filename) + : chkpnt(0), stored_chkpnt(0) { + this->open(filename); } -bool FileHandler::file_exist(const char* filename) const { +bool FileHandler::file_exist(const std::string& filename) const { struct stat buffer; - return (stat(filename, &buffer) == 0); + return (stat(filename.c_str(), &buffer) == 0); } -void FileHandler::open(const char* filename, bool reorder, std::ios::openmode mode) { +void FileHandler::open(const std::string& filename, std::ios::openmode mode) { nrn_assert((mode & (std::ios::in | std::ios::out))); - reorder_bytes = reorder; close(); F.open(filename, mode | std::ios::binary); - if (!F.is_open()) - fprintf(stderr, "cannot open file %s\n", filename); + if (!F.is_open()) { + std::cerr << "cannot open file '" << filename << "'" << std::endl; + } nrn_assert(F.is_open()); current_mode = mode; char version[256]; diff --git a/coreneuron/io/nrn_filehandler.hpp b/coreneuron/io/nrn_filehandler.hpp index b2336ce3c..3242244cd 100644 --- a/coreneuron/io/nrn_filehandler.hpp +++ b/coreneuron/io/nrn_filehandler.hpp @@ -34,8 +34,6 @@ THE POSSIBILITY OF SUCH DAMAGE. #include #include -#include "coreneuron/utils/endianness.hpp" -#include "coreneuron/utils/swap_endian.h" #include "coreneuron/utils/nrn_assert.h" namespace coreneuron { @@ -43,11 +41,7 @@ namespace coreneuron { * * Error handling is simple: abort()! * - * Reader will abort() if native integer size is not 4 bytes, - * or if need a convoluted byte reordering between native - * and file endianness. Complicated endianness configurations and - * the possibility of non-IEEE floating point representations - * are happily ignored. + * Reader will abort() if native integer size is not 4 bytes. * * All automatic allocations performed by read_int_array() * and read_dbl_array() methods use new []. @@ -58,7 +52,6 @@ const int max_line_length = 1024; class FileHandler { std::fstream F; //!< File stream associated with reader. - bool reorder_bytes; //!< True if we need to reorder for native endiannes. std::ios_base::openmode current_mode; //!< File open mode (not stored in fstream) int chkpnt; //!< Current checkpoint number state. int stored_chkpnt; //!< last "remembered" checkpoint number state. @@ -75,20 +68,20 @@ class FileHandler { FileHandler& operator=(const FileHandler&); public: - FileHandler() : reorder_bytes(false), chkpnt(0), stored_chkpnt(0) { + FileHandler() : chkpnt(0), stored_chkpnt(0) { } - explicit FileHandler(const char* filename, bool reorder = false); + explicit FileHandler(const std::string& filename); /** Preserving chkpnt state, move to a new file. */ - void open(const char* filename, bool reorder, std::ios::openmode mode = std::ios::in); + void open(const std::string& filename, std::ios::openmode mode = std::ios::in); /** Is the file not open */ bool fail() const { return F.fail(); } - bool file_exist(const char* filename) const; + bool file_exist(const std::string& filename) const; /** nothing more to read */ bool eof(); @@ -194,8 +187,6 @@ class FileHandler { break; case read: F.read((char*)p, count * sizeof(T)); - if (reorder_bytes) - endian::swap_endian_range(p, p + count); break; } @@ -205,7 +196,7 @@ class FileHandler { // convenience interfaces: - /** Read and optionally allocate an integer array of fixed length. */ + /** Read an integer array of fixed length. */ template inline T* read_array(T* p, size_t count) { return parse_array(p, count, read); @@ -217,6 +208,13 @@ class FileHandler { return parse_array(new T[count], count, read); } + template + inline std::vector read_vector(size_t count) { + std::vector vec(count); + parse_array(vec.data(), count, read); + return vec; + } + /** Close currently open file. */ void close(); @@ -226,13 +224,7 @@ class FileHandler { nrn_assert(F.is_open()); nrn_assert(current_mode & std::ios::out); write_checkpoint(); - if (reorder_bytes) { - endian::swap_endian_range(p, p + nb_elements); - F.write((const char*)p, nb_elements * (sizeof(T))); - endian::swap_endian_range(p, p + nb_elements); - } else { - F.write((const char*)p, nb_elements * (sizeof(T))); - } + F.write((const char*)p, nb_elements * (sizeof(T))); nrn_assert(!F.fail()); } @@ -258,9 +250,6 @@ class FileHandler { } else { memcpy(temp_cpy, p, nb_elements * sizeof(T) * nb_lines); } - if (reorder_bytes) { - endian::swap_endian_range(temp_cpy, temp_cpy + nb_elements * nb_lines); - } // AoS never use padding, SoA is translated above, so one write // operation is enought in both cases F.write((const char*)temp_cpy, nb_elements * sizeof(T) * nb_lines); diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp index 97329fe54..f629f469e 100644 --- a/coreneuron/io/nrn_setup.cpp +++ b/coreneuron/io/nrn_setup.cpp @@ -30,6 +30,7 @@ THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include "coreneuron/apps/corenrn_parameters.hpp" #include "coreneuron/nrnconf.h" @@ -37,7 +38,6 @@ THE POSSIBILITY OF SUCH DAMAGE. #include "coreneuron/sim/multicore.hpp" #include "coreneuron/nrniv/nrniv_decl.h" #include "coreneuron/sim/fast_imem.hpp" -#include "coreneuron/utils/vrecitem.h" #include "coreneuron/network/multisend.hpp" #include "coreneuron/utils/nrn_assert.h" #include "coreneuron/utils/nrnmutdec.h" @@ -49,10 +49,11 @@ THE POSSIBILITY OF SUCH DAMAGE. #include "coreneuron/permute/cellorder.hpp" #include "coreneuron/io/nrnsection_mapping.hpp" #include "coreneuron/utils/nrnoc_aux.hpp" +#include "coreneuron/io/phase1.hpp" +#include "coreneuron/io/phase2.hpp" // callbacks into nrn/src/nrniv/nrnbbcore_write.cpp #include "coreneuron/sim/fast_imem.hpp" -#include "coreneuron/io/nrn2core_direct.h" #include "coreneuron/coreneuron.hpp" @@ -71,67 +72,8 @@ void (*nrn2core_get_partrans_setup_info_)(int tid, int*& sid_src, int*& v_indices); -int (*nrn2core_get_dat1_)(int tid, - int& n_presyn, - int& n_netcon, - int*& output_gid, - int*& netcon_srcgid); - -int (*nrn2core_get_dat2_1_)(int tid, - int& ngid, - int& n_real_gid, - int& nnode, - int& ndiam, - int& nmech, - int*& tml_index, - int*& ml_nodecount, - int& nidata, - int& nvdata, - int& nweight); - -int (*nrn2core_get_dat2_2_)(int tid, - int*& v_parent_index, - double*& a, - double*& b, - double*& area, - double*& v, - double*& diamvec); - -int (*nrn2core_get_dat2_mech_)(int tid, - size_t i, - int dsz_inst, - int*& nodeindices, - double*& data, - int*& pdata); - -int (*nrn2core_get_dat2_3_)(int tid, - int nweight, - int*& output_vindex, - double*& output_threshold, - int*& netcon_pnttype, - int*& netcon_pntindex, - double*& weights, - double*& delays); - -int (*nrn2core_get_dat2_corepointer_)(int tid, int& n); - -int (*nrn2core_get_dat2_corepointer_mech_)(int tid, - int type, - int& icnt, - int& dcnt, - int*& iarray, - double*& darray); - -int (*nrn2core_get_dat2_vecplay_)(int tid, int& n); - -int (*nrn2core_get_dat2_vecplay_inst_)(int tid, - int i, - int& vptype, - int& mtype, - int& ix, - int& sz, - double*& yvec, - double*& tvec); + + void (*nrn2core_get_trajectory_requests_)(int tid, int& bsize, @@ -221,9 +163,6 @@ namespace coreneuron { extern corenrn_parameters corenrn_param; int nrn_setup_multiple = 1; /* default */ int nrn_setup_extracon = 0; /* default */ -static int maxgid; -// no gid in any file can be greater than maxgid. maxgid will be set so that -// maxgid * nrn_setup_multiple < 0x7fffffff // nrn_setup_extracon extra connections per NrnThread. // i.e. nrn_setup_extracon * nrn_setup_multiple * nrn_nthread @@ -250,31 +189,14 @@ static int maxgid; // This is done between phase1 and phase2 during the call to // determine_inputpresyn(). -// If MATRIX_LAYOUT is 1 then a,b,d,rhs,v,area is not padded using NRN_SOA_PAD -// When MATRIX_LAYOUT is 0 then mechanism pdata index values into _actual_v -// and _actual_area data need to be updated. -#if !defined(LAYOUT) -#define LAYOUT 1 -#define MATRIX_LAYOUT 1 -#else -#define MATRIX_LAYOUT LAYOUT -#endif - -#if !defined(NRN_SOA_PAD) -// for layout 0, every range variable array must have a size which -// is a multiple of NRN_SOA_PAD doubles -#define NRN_SOA_PAD 8 -#endif - #ifdef _OPENMP -static MUTDEC +static OMP_Mutex mut; #endif - static size_t - model_size(void); +static size_t model_size(void); /// Vector of maps for negative presyns -std::vector > neg_gid2out; +std::vector> neg_gid2out; /// Maps for ouput and input presyns std::map gid2out; std::map gid2in; @@ -285,26 +207,6 @@ std::vector netcon_in_presyn_order_; /// Only for setup vector of netcon source gids std::vector netcon_srcgid; - -// Wrap read_phase1 and read_phase2 calls to allow using nrn_multithread_job. -// Args marshaled by store_phase_args are used by phase1_wrapper -// and phase2_wrapper. -static void store_phase_args(int ngroup, - int* gidgroups, - int* imult, - FileHandler* file_reader, - const char* path, - const char* restore_path, - bool byte_swap) { - ngroup_w = ngroup; - gidgroups_w = gidgroups; - imult_w = imult; - file_reader_w = file_reader; - path_w = path; - restore_path_w = restore_path; - byte_swap_w = byte_swap; -} - /* read files.dat file and distribute cellgroups to all mpi ranks */ void nrn_read_filesdat(int& ngrp, int*& grp, int multiple, int*& imult, const char* filesdat) { patstimtype = nrn_get_mechtype("PatternStim"); @@ -373,145 +275,6 @@ void nrn_read_filesdat(int& ngrp, int*& grp, int multiple, int*& imult, const ch fclose(fp); } -static void read_phase1(int* output_gid, int imult, NrnThread& nt); - -static void* direct_phase1(NrnThread* n) { - NrnThread& nt = *n; - int* output_gid; - int valid = - (*nrn2core_get_dat1_)(nt.id, nt.n_presyn, nt.n_netcon, output_gid, netcon_srcgid[nt.id]); - if (valid) { - read_phase1(output_gid, 0, nt); - } - return nullptr; -} - -void read_phase1(FileHandler& F, int imult, NrnThread& nt) { - assert(!F.fail()); - nt.n_presyn = F.read_int(); /// Number of PreSyn-s in NrnThread nt - nt.n_netcon = F.read_int(); /// Number of NetCon-s in NrnThread nt - - int* output_gid = F.read_array(nt.n_presyn); - // the extra netcon_srcgid will be filled in later - netcon_srcgid[nt.id] = new int[nt.n_netcon + nrn_setup_extracon]; - F.read_array(netcon_srcgid[nt.id], nt.n_netcon); - F.close(); - - read_phase1(output_gid, imult, nt); -} - -static void read_phase1(int* output_gid, int imult, NrnThread& nt) { - int zz = imult * maxgid; // offset for each gid - // offset the (non-negative) gids according to multiple - // make sure everything fits into gid space. - for (int i = 0; i < nt.n_presyn; ++i) { - if (output_gid[i] >= 0) { - nrn_assert(output_gid[i] < maxgid); - output_gid[i] += zz; - } - } - - nt.presyns = new PreSyn[nt.n_presyn]; - nt.netcons = new NetCon[nt.n_netcon + nrn_setup_extracon]; - nt.presyns_helper = (PreSynHelper*)ecalloc_align(nt.n_presyn, sizeof(PreSynHelper)); - - int* nc_srcgid = netcon_srcgid[nt.id]; - for (int i = 0; i < nt.n_netcon; ++i) { - if (nc_srcgid[i] >= 0) { - nrn_assert(nc_srcgid[i] < maxgid); - nc_srcgid[i] += zz; - } - } - - for (int i = 0; i < nt.n_presyn; ++i) { - int gid = output_gid[i]; - if (gid == -1) - continue; - - // Note that the negative (type, index) - // coded information goes into the neg_gid2out[tid] hash table. - // See netpar.cpp for the netpar_tid_... function implementations. - // Both that table and the process wide gid2out table can be deleted - // before the end of setup - - MUTLOCK - /// Put gid into the gid2out hash table with correspondent output PreSyn - /// Or to the negative PreSyn map - PreSyn* ps = nt.presyns + i; - if (gid >= 0) { - char m[200]; - if (gid2in.find(gid) != gid2in.end()) { - sprintf(m, "gid=%d already exists as an input port", gid); - hoc_execerror( - m, - "Setup all the output ports on this process before using them as input ports."); - } - if (gid2out.find(gid) != gid2out.end()) { - sprintf(m, "gid=%d already exists on this process as an output port", gid); - hoc_execerror(m, 0); - } - gid2out[gid] = ps; - ps->gid_ = gid; - ps->output_index_ = gid; - } else { - nrn_assert(neg_gid2out[nt.id].find(gid) == neg_gid2out[nt.id].end()); - neg_gid2out[nt.id][gid] = ps; - } - MUTUNLOCK - - if (gid < 0) { - nt.presyns[i].output_index_ = -1; - } - } - - delete[] output_gid; - - if (nrn_setup_extracon > 0) { - // very simplistic - // Use this threads positive source gids - zz in nt.netcon order as the - // source gids for extracon. - // The edge cases are: - // The 0th duplicate uses uses source gids for the last duplicate. - // If there are fewer positive source gids than extracon, then keep - // rotating through the nt.netcon . - // If there are no positive source gids, use a source gid of -1. - // Would not be difficult to modify so that random positive source was - // used, and/or random connect to another duplicate. - // Note that we increment the nt.n_netcon at the end of this function. - int sidoffset; // how much to increment the corresponding positive gid - // like ring connectivity - if (imult > 0) { - sidoffset = -maxgid; - } else if (nrn_setup_multiple > 1) { - sidoffset = (nrn_setup_multiple - 1) * maxgid; - } else { - sidoffset = 0; - } - // set up the extracon srcgid_ - int* nc_srcgid = netcon_srcgid[nt.id]; - int j = 0; // rotate through the n_netcon netcon_srcgid - for (int i = 0; i < nrn_setup_extracon; ++i) { - int sid = -1; - for (int k = 0; k < nt.n_netcon; ++k) { - // potentially rotate j through the entire n_netcon but no further - sid = nc_srcgid[j]; - j = (j + 1) % nt.n_netcon; - if (sid >= 0) { - break; - } - } - if (sid < 0) { // only connect to real cells. - sid = -1; - } else { - sid += sidoffset; - } - nc_srcgid[i + nt.n_netcon] = sid; - } - // finally increment the n_netcon - nt.n_netcon += nrn_setup_extracon; - } -} - void netpar_tid_gid2ps(int tid, int gid, PreSyn** ps, InputPreSyn** psi) { /// for gid < 0 returns the PreSyn* in the thread (tid) specific map. *ps = nullptr; @@ -676,35 +439,30 @@ void nrn_setup_cleanup() { void nrn_setup(const char* filesdat, bool is_mapping_needed, - bool byte_swap, + bool /* byte_swap */, bool run_setup_cleanup, const char* datpath, const char* restore_path, double* mindelay) { - /// Number of local cell groups - int ngroup = 0; - - /// Array of cell group numbers (indices) - int* gidgroups = nullptr; - - /// Array of duplicate indices. Normally, with nrn_setup_multiple=1, - // they are ngroup values of 0. - int* imult = nullptr; - double time = nrn_wtime(); - maxgid = 0x7fffffff / nrn_setup_multiple; + int ngroup; + int* gidgroups; + int* imult; nrn_read_filesdat(ngroup, gidgroups, nrn_setup_multiple, imult, filesdat); + UserParams userParams(ngroup, + gidgroups, + imult, + datpath, + strlen(restore_path) == 0 ? datpath : restore_path); + - if (!MUTCONSTRUCTED) { - MUTCONSTRUCT(1) - } // temporary bug work around. If any process has multiple threads, no // process can have a single thread. So, for now, if one thread, make two. // Fortunately, empty threads work fine. // Allocate NrnThread* nrn_threads of size ngroup (minimum 2) // Note that rank with 0 dataset/cellgroup works fine - nrn_threads_create(ngroup <= 1 ? 2 : ngroup); + nrn_threads_create(userParams.ngroup <= 1 ? 2 : userParams.ngroup); nrnthread_chkpnt = new NrnThreadChkpnt[nrn_nthread]; @@ -721,7 +479,7 @@ void nrn_setup(const char* filesdat, /// Reserve vector of maps of size ngroup for negative gid-s /// std::vector< std::map > neg_gid2out; - neg_gid2out.resize(ngroup); + neg_gid2out.resize(userParams.ngroup); // bug fix. gid2out is cumulative over all threads and so do not // know how many there are til after phase1 @@ -737,35 +495,39 @@ void nrn_setup(const char* filesdat, for (int i = 0; i < nrn_nthread; ++i) netcon_srcgid[i] = nullptr; - FileHandler* file_reader = new FileHandler[ngroup]; - - /* nrn_multithread_job supports serial, pthread, and openmp. */ - store_phase_args(ngroup, gidgroups, imult, file_reader, datpath, - strlen(restore_path) != 0 ? restore_path : datpath, byte_swap); - // gap junctions if (nrn_have_gaps) { assert(nrn_setup_multiple == 1); nrn_partrans::transfer_thread_data_ = new nrn_partrans::TransferThreadData[nrn_nthread]; - nrn_partrans::setup_info_ = new nrn_partrans::SetupInfo[ngroup]; + nrn_partrans::setup_info_ = new nrn_partrans::SetupInfo[userParams.ngroup]; if (!corenrn_embedded) { - coreneuron::phase_wrapper(); + coreneuron::phase_wrapper(userParams); } else { nrn_assert(sizeof(nrn_partrans::sgid_t) == sizeof(int)); - for (int i = 0; i < ngroup; ++i) { + for (int i = 0; i < userParams.ngroup; ++i) { nrn_partrans::SetupInfo& si = nrn_partrans::setup_info_[i]; (*nrn2core_get_partrans_setup_info_)(i, si.ntar, si.nsrc, si.type, si.ix_vpre, si.sid_target, si.sid_src, si.v_indices); } } - nrn_partrans::gap_mpi_setup(ngroup); + nrn_partrans::gap_mpi_setup(userParams.ngroup); } if (!corenrn_embedded) { - coreneuron::phase_wrapper<(coreneuron::phase)1>(); /// If not the xlc compiler, it should - /// be coreneuron::phase::one + coreneuron::phase_wrapper(userParams); } else { - nrn_multithread_job(direct_phase1); + nrn_multithread_job([](NrnThread* n) { + Phase1 p1; + p1.read_direct(n->id); + NrnThread& nt = *n; + { +#ifdef _OPENMP + p1.populate(nt, 0, mut); +#else + p1.populate(nt, 0); +#endif + } + }); } // from the gid2out map and the netcon_srcgid array, @@ -776,10 +538,10 @@ void nrn_setup(const char* filesdat, // read the rest of the gidgroup's data and complete the setup for each // thread. /* nrn_multithread_job supports serial, pthread, and openmp. */ - coreneuron::phase_wrapper<(coreneuron::phase)2>(corenrn_embedded); + coreneuron::phase_wrapper(userParams, corenrn_embedded); if (is_mapping_needed) - coreneuron::phase_wrapper<(coreneuron::phase)3>(); + coreneuron::phase_wrapper(userParams); *mindelay = set_mindelay(*mindelay); @@ -801,11 +563,9 @@ void nrn_setup(const char* filesdat, /// which is only executed by StochKV.c. nrn_mk_table_check(); // was done in nrn_thread_memblist_setup in multicore.c - delete[] file_reader; - model_size(); - delete[] gidgroups; - delete[] imult; + delete[] userParams.gidgroups; + delete[] userParams.imult; if (nrnmpi_myid == 0 && !corenrn_param.is_quiet()) { printf(" Setup Done : %.2lf seconds \n", nrn_wtime() - time); @@ -819,8 +579,12 @@ void setup_ThreadData(NrnThread& nt) { if (mf.thread_size_) { ml->_thread = (ThreadDatum*)ecalloc_align(mf.thread_size_, sizeof(ThreadDatum)); if (mf.thread_mem_init_) { - MUTLOCK (*mf.thread_mem_init_)(ml->_thread); - MUTUNLOCK + { +#ifdef _OPENMP + const std::lock_guard lock(mut); +#endif + (*mf.thread_mem_init_)(ml->_thread); + } } } else { ml->_thread = nullptr; @@ -828,12 +592,13 @@ void setup_ThreadData(NrnThread& nt) { } } -void read_phasegap(FileHandler& F, int imult, NrnThread& nt) { - nrn_assert(imult == 0); +void read_phasegap(NrnThread& nt, UserParams& userParams) { + nrn_assert(userParams.imult[nt.id] == 0); nrn_partrans::SetupInfo& si = nrn_partrans::setup_info_[nt.id]; si.ntar = 0; si.nsrc = 0; + auto& F = userParams.file_reader[nt.id]; if (F.fail()) { return; } @@ -864,49 +629,6 @@ void read_phasegap(FileHandler& F, int imult, NrnThread& nt) { #endif } -int nrn_soa_padded_size(int cnt, int layout) { - return soa_padded_size(cnt, layout); -} - -static size_t nrn_soa_byte_align(size_t i) { - if (LAYOUT == 0) { - size_t dbl_align = NRN_SOA_BYTE_ALIGN / sizeof(double); - size_t rem = i % dbl_align; - if (rem) { - i += dbl_align - rem; - } - assert((i * sizeof(double)) % NRN_SOA_BYTE_ALIGN == 0); - } - return i; -} - -// file data is AoS. ie. -// organized as cnt array instances of mtype each of size sz. -// So input index i refers to i_instance*sz + i_item offset -// Return the corresponding SoA index -- taking into account the -// alignment requirements. Ie. i_instance + i_item*align_cnt. - -int nrn_param_layout(int i, int mtype, Memb_list* ml) { - int layout = corenrn.get_mech_data_layout()[mtype]; - if (layout == 1) { - return i; - } - assert(layout == 0); - int sz = corenrn.get_prop_param_size()[mtype]; - return nrn_i_layout(i / sz, ml->nodecount, i % sz, sz, layout); -} - -int nrn_i_layout(int icnt, int cnt, int isz, int sz, int layout) { - if (layout == 1) { - return icnt * sz + isz; - } else if (layout == 0) { - int padded_cnt = nrn_soa_padded_size(cnt, layout); // may want to factor out to save time - return icnt + isz * padded_cnt; - } - assert(0); - return 0; -} - // This function is related to nrn_dblpntr2nrncore in Neuron to determine which values should // be transferred from CoreNeuron. Types correspond to the value to be transferred based on // mech_type enum or non-artificial cell mechanisms. @@ -960,35 +682,6 @@ void nrn_inverse_i_layout(int i, int& icnt, int cnt, int& isz, int sz, int layou } } -template -inline void mech_layout(FileHandler& F, T* data, int cnt, int sz, int layout) { - if (layout == 1) { /* AoS */ - if (corenrn_embedded) { - return; - } - F.read_array(data, cnt * sz); - } else if (layout == 0) { /* SoA */ - int align_cnt = nrn_soa_padded_size(cnt, layout); - T* d; - if (corenrn_embedded) { - d = new T[cnt * sz]; - for (int i = 0; i < cnt; ++i) { - for (int j = 0; j < sz; ++j) { - d[i * sz + j] = data[i * sz + j]; - } - } - } else { - d = F.read_array(cnt * sz); - } - for (int i = 0; i < cnt; ++i) { - for (int j = 0; j < sz; ++j) { - data[i + j * align_cnt] = d[i * sz + j]; - } - } - delete[] d; - } -} - /** * Cleanup global ion map created during mechanism registration * @@ -1204,959 +897,33 @@ void delete_trajectory_requests(NrnThread& nt) { } } -void read_phase2(FileHandler& F, int imult, NrnThread& nt) { - bool direct = corenrn_embedded; - if (!direct) { - assert(!F.fail()); // actually should assert that it is open - } - nrn_assert(imult >= 0); // avoid imult unused warning - NrnThreadChkpnt& ntc = nrnthread_chkpnt[nt.id]; - ntc.file_id = gidgroups_w[nt.id]; - - int n_outputgid, ndiam, nmech, *tml_index, *ml_nodecount; - if (direct) { - int nidata, nvdata; - (*nrn2core_get_dat2_1_)(nt.id, n_outputgid, nt.ncell, nt.end, ndiam, nmech, tml_index, - ml_nodecount, nidata, nvdata, nt.n_weight); - nt._nidata = nidata; - nt._nvdata = nvdata; - } else { - n_outputgid = F.read_int(); - nt.ncell = F.read_int(); - nt.end = F.read_int(); - ndiam = F.read_int(); // 0 if not needed, else nt.end - nmech = F.read_int(); - tml_index = new int[nmech]; - ml_nodecount = new int[nmech]; - int diff_mech_count = 0; - for (int i = 0; i < nmech; ++i) { - tml_index[i] = F.read_int(); - ml_nodecount[i] = F.read_int(); - if (std::any_of(corenrn.get_different_mechanism_type().begin(), - corenrn.get_different_mechanism_type().end(), - [&](int e) { return e == tml_index[i]; })) { - if (nrnmpi_myid == 0) { - printf("Error: %s is a different MOD file than used by NEURON!\n", - nrn_get_mechname(tml_index[i])); - } - diff_mech_count++; - } - } - - if (diff_mech_count > 0) { - if (nrnmpi_myid == 0) { - printf( - "Error : NEURON and CoreNEURON must use same mod files for compatibility, %d different mod file(s) found. Re-compile special and special-core!\n", - diff_mech_count); - nrn_abort(1); - } - } - - nt._nidata = F.read_int(); - nt._nvdata = F.read_int(); - nt.n_weight = F.read_int(); - } - -#if CHKPNTDEBUG - ntc.n_outputgids = n_outputgid; - ntc.nmech = nmech; -#endif - if (!direct) { - nrn_assert(n_outputgid > 0); // avoid n_outputgid unused warning - } - - /// Checkpoint in coreneuron is defined for both phase 1 and phase 2 since they are written - /// together - // printf("ncell=%d end=%d nmech=%d\n", nt.ncell, nt.end, nmech); - // printf("nart=%d\n", nart); - nt._ml_list = (Memb_list**)ecalloc_align(corenrn.get_memb_funcs().size(), sizeof(Memb_list*)); - -#if CHKPNTDEBUG - ntc.mlmap = new Memb_list_chkpnt*[memb_func.size()]; - for (int i = 0; i < _memb_func.size(); ++i) { - ntc.mlmap[i] = nullptr; - } -#endif - - int shadow_rhs_cnt = 0; - nt.shadow_rhs_cnt = 0; - - nt.stream_id = 0; - nt.compute_gpu = 0; - auto& nrn_prop_param_size_ = corenrn.get_prop_param_size(); - auto& nrn_prop_dparam_size_ = corenrn.get_prop_dparam_size(); - -/* read_phase2 is being called from openmp region - * and hence we can set the stream equal to current thread id. - * In fact we could set gid as stream_id when we will have nrn threads - * greater than number of omp threads. - */ -#if defined(_OPENMP) - nt.stream_id = omp_get_thread_num(); -#endif - auto& memb_func = corenrn.get_memb_funcs(); - NrnThreadMembList* tml_last = nullptr; - for (int i = 0; i < nmech; ++i) { - auto tml = (NrnThreadMembList*)emalloc_align(sizeof(NrnThreadMembList)); - tml->next = nullptr; - tml->index = tml_index[i]; - - tml->ml = (Memb_list*)ecalloc_align(1, sizeof(Memb_list)); - tml->ml->_net_receive_buffer = nullptr; - tml->ml->_net_send_buffer = nullptr; - tml->ml->_permute = nullptr; - if (memb_func[tml->index].alloc == nullptr) { - hoc_execerror(memb_func[tml->index].sym, "mechanism does not exist"); - } - tml->ml->nodecount = ml_nodecount[i]; - if (!memb_func[tml->index].sym) { - printf("%s (type %d) is not available\n", nrn_get_mechname(tml->index), tml->index); - exit(1); - } - tml->ml->_nodecount_padded = - nrn_soa_padded_size(tml->ml->nodecount, corenrn.get_mech_data_layout()[tml->index]); - if (memb_func[tml->index].is_point && corenrn.get_is_artificial()[tml->index] == 0) { - // Avoid race for multiple PointProcess instances in same compartment. - if (tml->ml->nodecount > shadow_rhs_cnt) { - shadow_rhs_cnt = tml->ml->nodecount; - } - } - - nt._ml_list[tml->index] = tml->ml; -#if CHKPNTDEBUG - Memb_list_chkpnt* mlc = new Memb_list_chkpnt; - ntc.mlmap[tml->index] = mlc; -#endif - // printf("index=%d nodecount=%d membfunc=%s\n", tml->index, tml->ml->nodecount, - // memb_func[tml->index].sym?memb_func[tml->index].sym:"None"); - if (nt.tml) { - tml_last->next = tml; - } else { - nt.tml = tml; - } - tml_last = tml; - } - delete[] tml_index; - delete[] ml_nodecount; - - if (shadow_rhs_cnt) { - nt._shadow_rhs = - (double*)ecalloc_align(nrn_soa_padded_size(shadow_rhs_cnt, 0), sizeof(double)); - nt._shadow_d = - (double*)ecalloc_align(nrn_soa_padded_size(shadow_rhs_cnt, 0), sizeof(double)); - nt.shadow_rhs_cnt = shadow_rhs_cnt; - } - - nt._data = nullptr; // allocated below after padding - nt.mapping = nullptr; // section segment mapping - - if (nt._nidata) - nt._idata = (int*)ecalloc(nt._nidata, sizeof(int)); - else - nt._idata = nullptr; - // see patternstim.cpp - int extra_nv = (&nt == nrn_threads) ? nrn_extra_thread0_vdata : 0; - if (nt._nvdata + extra_nv) - nt._vdata = (void**)ecalloc_align(nt._nvdata + extra_nv, sizeof(void*)); - else - nt._vdata = nullptr; - // printf("_nidata=%d _nvdata=%d\n", nt._nidata, nt._nvdata); - - // The data format begins with the matrix data - int ne = nrn_soa_padded_size(nt.end, MATRIX_LAYOUT); - size_t offset = 6 * ne; - - if (ndiam) { - // in the rare case that a mechanism has dparam with diam semantics - // then actual_diam array added after matrix in nt._data - // Generally wasteful since only a few diam are pointed to. - // Probably better to move the diam semantics to the p array of the mechanism - offset += ne; - } - - // Memb_list.data points into the nt.data array. - // Also count the number of Point_process - int npnt = 0; - for (auto tml = nt.tml; tml; tml = tml->next) { - Memb_list* ml = tml->ml; - int type = tml->index; - int layout = corenrn.get_mech_data_layout()[type]; - int n = ml->nodecount; - int sz = nrn_prop_param_size_[type]; - offset = nrn_soa_byte_align(offset); - ml->data = (double*)0 + offset; // adjust below since nt._data not allocated - offset += nrn_soa_padded_size(n, layout) * sz; - if (corenrn.get_pnt_map()[type] > 0) { - npnt += n; - } - } - nt.pntprocs = (Point_process*)ecalloc_align( - npnt, sizeof(Point_process)); // includes acell with and without gid - nt.n_pntproc = npnt; - // printf("offset=%ld\n", offset); - nt._ndata = offset; - - // now that we know the effect of padding, we can allocate data space, - // fill matrix, and adjust Memb_list data pointers - nt._data = (double*)ecalloc_align(nt._ndata, sizeof(double)); - nt._actual_rhs = nt._data + 0 * ne; - nt._actual_d = nt._data + 1 * ne; - nt._actual_a = nt._data + 2 * ne; - nt._actual_b = nt._data + 3 * ne; - nt._actual_v = nt._data + 4 * ne; - nt._actual_area = nt._data + 5 * ne; - nt._actual_diam = ndiam ? nt._data + 6 * ne : nullptr; - for (auto tml = nt.tml; tml; tml = tml->next) { - Memb_list* ml = tml->ml; - ml->data = nt._data + (ml->data - (double*)0); - } - - // matrix info - nt._v_parent_index = (int*)ecalloc_align(nt.end, sizeof(int)); - if (direct) { - (*nrn2core_get_dat2_2_)(nt.id, nt._v_parent_index, nt._actual_a, nt._actual_b, - nt._actual_area, nt._actual_v, nt._actual_diam); - } else { - F.read_array(nt._v_parent_index, nt.end); - F.read_array(nt._actual_a, nt.end); - F.read_array(nt._actual_b, nt.end); - F.read_array(nt._actual_area, nt.end); - F.read_array(nt._actual_v, nt.end); - if (ndiam) { - F.read_array(nt._actual_diam, nt.end); - } - } -#if CHKPNTDEBUG - ntc.parent = new int[nt.end]; - memcpy(ntc.parent, nt._v_parent_index, nt.end * sizeof(int)); - ntc.area = new double[nt.end]; - memcpy(ntc.area, nt._actual_area, nt.end * sizeof(double)); -#endif - - int synoffset = 0; - std::vector pnt_offset(memb_func.size()); - - // All the mechanism data and pdata. - // Also fill in the pnt_offset - // Complete spec of Point_process except for the acell presyn_ field. - int itml = 0; - int dsz_inst = 0; - for (auto tml = nt.tml; tml; tml = tml->next, ++itml) { - int type = tml->index; - Memb_list* ml = tml->ml; - int is_art = corenrn.get_is_artificial()[type]; - int n = ml->nodecount; - int szp = nrn_prop_param_size_[type]; - int szdp = nrn_prop_dparam_size_[type]; - int layout = corenrn.get_mech_data_layout()[type]; - - if (!is_art && !direct) { - ml->nodeindices = (int*)ecalloc_align(ml->nodecount, sizeof(int)); - } else { - ml->nodeindices = nullptr; - } - if (szdp) { - ml->pdata = (int*)ecalloc_align(nrn_soa_padded_size(n, layout) * szdp, sizeof(int)); - } - - if (direct) { - (*nrn2core_get_dat2_mech_)(nt.id, itml, dsz_inst, ml->nodeindices, ml->data, ml->pdata); - } else { - if (!is_art) { - F.read_array(ml->nodeindices, ml->nodecount); - } - } - if (szdp) { - ++dsz_inst; - } - - mech_layout(F, ml->data, n, szp, layout); - - if (szdp) { - mech_layout(F, ml->pdata, n, szdp, layout); -#if CHKPNTDEBUG // Not substantive. Only for debugging. - Memb_list_ckpnt* mlc = ntc.mlmap[type]; - mlc->pdata_not_permuted = (int*)coreneuron::ecalloc_align(n * szdp, sizeof(int)); - if (layout == 1) { // AoS just copy - for (int i = 0; i < n; ++i) { - for (int j = 0; j < szdp; ++j) { - mlc->pdata_not_permuted[i * szdp + j] = ml->pdata[i * szdp + j]; - } - } - } else if (layout == 0) { // SoA transpose and unpad - int align_cnt = nrn_soa_padded_size(n, layout); - for (int i = 0; i < n; ++i) { - for (int j = 0; j < szdp; ++j) { - mlc->pdata_not_permuted[i * szdp + j] = ml->pdata[i + j * align_cnt]; - } - } - } +void read_phase1(NrnThread& nt, UserParams& userParams) { + Phase1 p1; + p1.read_file(userParams.file_reader[nt.id]); + + { // Protect gid2in, gid2out and neg_gid2out +#ifdef _OPENMP + p1.populate(nt, userParams.imult[nt.id], mut); +#else + p1.populate(nt, userParams.imult[nt.id]); #endif - } else { - ml->pdata = nullptr; - } - if (corenrn.get_pnt_map()[type] > 0) { // POINT_PROCESS mechanism including acell - int cnt = ml->nodecount; - Point_process* pnt = nullptr; - pnt = nt.pntprocs + synoffset; - pnt_offset[type] = synoffset; - synoffset += cnt; - for (int i = 0; i < cnt; ++i) { - Point_process* pp = pnt + i; - pp->_type = type; - pp->_i_instance = i; - nt._vdata[ml->pdata[nrn_i_layout(i, cnt, 1, szdp, layout)]] = pp; - pp->_tid = nt.id; - } - } } - - if (nrn_have_gaps) { - nrn_partrans::gap_thread_setup(nt); - } - - // Some pdata may index into data which has been reordered from AoS to - // SoA. The four possibilities are if semantics is -1 (area), -5 (pointer), - // -9 (diam), - // or 0-999 (ion variables). Note that pdata has a layout and the - // type block in nt.data into which it indexes, has a layout. - for (auto tml = nt.tml; tml; tml = tml->next) { - int type = tml->index; - int layout = corenrn.get_mech_data_layout()[type]; - int* pdata = tml->ml->pdata; - int cnt = tml->ml->nodecount; - int szdp = nrn_prop_dparam_size_[type]; - int* semantics = memb_func[type].dparam_semantics; - - // ignore ARTIFICIAL_CELL (has useless area pointer with semantics=-1) - if (corenrn.get_is_artificial()[type]) { - continue; - } - - if (szdp) { - if (!semantics) - continue; // temporary for HDFReport, Binreport which will be skipped in - // bbcore_write of HBPNeuron - nrn_assert(semantics); - } - - for (int i = 0; i < szdp; ++i) { - int s = semantics[i]; - if (s == -1) { // area - int area0 = nt._actual_area - nt._data; - for (int iml = 0; iml < cnt; ++iml) { - int* pd = pdata + nrn_i_layout(iml, cnt, i, szdp, layout); - int ix = *pd; // relative to beginning of _actual_area - nrn_assert((ix >= 0) && (ix < nt.end)); - *pd = area0 + ix; // relative to nt._data - } - } else if (s == -9) { // diam - int diam0 = nt._actual_diam - nt._data; - for (int iml = 0; iml < cnt; ++iml) { - int* pd = pdata + nrn_i_layout(iml, cnt, i, szdp, layout); - int ix = *pd; // relative to beginning of _actual_diam - nrn_assert((ix >= 0) && (ix < nt.end)); - *pd = diam0 + ix; // relative to nt._data - } - } else if (s == -5) { // pointer assumes a pointer to membrane voltage - int v0 = nt._actual_v - nt._data; - for (int iml = 0; iml < cnt; ++iml) { - int* pd = pdata + nrn_i_layout(iml, cnt, i, szdp, layout); - int ix = *pd; // relative to _actual_v - nrn_assert((ix >= 0) && (ix < nt.end)); - *pd = v0 + ix; // relative to nt._data - } - } else if (s >= 0 && s < 1000) { // ion - int etype = s; - /* if ion is SoA, must recalculate pdata values */ - /* if ion is AoS, have to deal with offset */ - Memb_list* eml = nt._ml_list[etype]; - int edata0 = eml->data - nt._data; - int ecnt = eml->nodecount; - int esz = nrn_prop_param_size_[etype]; - for (int iml = 0; iml < cnt; ++iml) { - int* pd = pdata + nrn_i_layout(iml, cnt, i, szdp, layout); - int ix = *pd; // relative to the ion data - nrn_assert((ix >= 0) && (ix < ecnt * esz)); - /* Original pd order assumed ecnt groups of esz */ - *pd = edata0 + nrn_param_layout(ix, etype, eml); - } - } - } - } - - /* if desired, apply the node permutation. This involves permuting - at least the node parameter arrays for a, b, and area (and diam) and all - integer vector values that index into nodes. This could have been done - when originally filling the arrays with AoS ordered data, but can also - be done now, after the SoA transformation. The latter has the advantage - that the present order is consistent with all the layout values. Note - that after this portion of the permutation, a number of other node index - vectors will be read and will need to be permuted as well in subsequent - sections of this function. - */ - if (interleave_permute_type) { - nt._permute = interleave_order(nt.id, nt.ncell, nt.end, nt._v_parent_index); - } - if (nt._permute) { - int* p = nt._permute; - permute_data(nt._actual_a, nt.end, p); - permute_data(nt._actual_b, nt.end, p); - permute_data(nt._actual_area, nt.end, p); - permute_data(nt._actual_v, nt.end, - p); // need if restore or finitialize does not initialize voltage - if (nt._actual_diam) { - permute_data(nt._actual_diam, nt.end, p); - } - // index values change as well as ordering - permute_ptr(nt._v_parent_index, nt.end, p); - node_permute(nt._v_parent_index, nt.end, p); - -#if DEBUG -for (int i=0; i < nt.end; ++i) { - printf("parent[%d] = %d\n", i, nt._v_parent_index[i]); } -#endif - - // specify the ml->_permute and sort the nodeindices - for (auto tml = nt.tml; tml; tml = tml->next) { - if (tml->ml->nodeindices) { // not artificial - permute_nodeindices(tml->ml, p); - } - } - - // permute mechanism data, pdata (and values) - for (auto tml = nt.tml; tml; tml = tml->next) { - if (tml->ml->nodeindices) { // not artificial - permute_ml(tml->ml, tml->index, nt); - } - } - - // permute the Point_process._i_instance - for (int i = 0; i < nt.n_pntproc; ++i) { - Point_process& pp = nt.pntprocs[i]; - Memb_list* ml = nt._ml_list[pp._type]; - if (ml->_permute) { - pp._i_instance = ml->_permute[pp._i_instance]; - } - } - } - - if (nrn_have_gaps && interleave_permute_type) { - nrn_partrans::gap_indices_permute(nt); - } - - /* here we setup the mechanism dependencies. if there is a mechanism dependency - * then we allocate an array for tml->dependencies otherwise set it to nullptr. - * In order to find out the "real" dependencies i.e. dependent mechanism - * exist at the same compartment, we compare the nodeindices of mechanisms - * returned by nrn_mech_depend. - */ - - /* temporary array for dependencies */ - int* mech_deps = (int*)ecalloc(memb_func.size(), sizeof(int)); - - for (auto tml = nt.tml; tml; tml = tml->next) { - /* initialize to null */ - tml->dependencies = nullptr; - tml->ndependencies = 0; - - /* get dependencies from the models */ - int deps_cnt = nrn_mech_depend(tml->index, mech_deps); - - /* if dependencies, setup dependency array */ - if (deps_cnt) { - /* store "real" dependencies in the vector */ - std::vector actual_mech_deps; - - Memb_list* ml = tml->ml; - int* nodeindices = ml->nodeindices; - - /* iterate over dependencies */ - for (int j = 0; j < deps_cnt; j++) { - /* memb_list of dependency mechanism */ - Memb_list* dml = nt._ml_list[mech_deps[j]]; - - /* dependency mechanism may not exist in the model */ - if (!dml) - continue; - - /* take nodeindices for comparison */ - int* dnodeindices = dml->nodeindices; - - /* set_intersection function needs temp vector to push the common values */ - std::vector node_intersection; - - /* make sure they have non-zero nodes and find their intersection */ - if ((ml->nodecount > 0) && (dml->nodecount > 0)) { - std::set_intersection(nodeindices, nodeindices + ml->nodecount, dnodeindices, - dnodeindices + dml->nodecount, - std::back_inserter(node_intersection)); - } - - /* if they intersect in the nodeindices, it's real dependency */ - if (!node_intersection.empty()) { - actual_mech_deps.push_back(mech_deps[j]); - } - } - - /* copy actual_mech_deps to dependencies */ - if (!actual_mech_deps.empty()) { - tml->ndependencies = actual_mech_deps.size(); - tml->dependencies = (int*)ecalloc(actual_mech_deps.size(), sizeof(int)); - memcpy(tml->dependencies, &actual_mech_deps[0], - sizeof(int) * actual_mech_deps.size()); - } - } - } - - /* free temp dependency array */ - free(mech_deps); - - /// Fill the BA lists - std::vector bamap(memb_func.size()); - for (int i = 0; i < BEFORE_AFTER_SIZE; ++i) { - for (size_t ii = 0; ii < memb_func.size(); ++ii) { - bamap[ii] = nullptr; - } - for (auto bam = corenrn.get_bamech()[i]; bam; bam = bam->next) { - bamap[bam->type] = bam; - } - /* unnecessary but keep in order anyway */ - NrnThreadBAList **ptbl = nt.tbl + i; - for (auto tml = nt.tml; tml; tml = tml->next) { - if (bamap[tml->index]) { - Memb_list* ml = tml->ml; - auto tbl = (NrnThreadBAList*)emalloc(sizeof(NrnThreadBAList)); - tbl->next = nullptr; - tbl->bam = bamap[tml->index]; - tbl->ml = ml; - *ptbl = tbl; - ptbl = &(tbl->next); - } - } - } - - // for fast watch statement checking - // setup a list of types that have WATCH statement - { - int sz = 0; // count the types with WATCH - for (auto tml = nt.tml; tml; tml = tml->next) { - if (corenrn.get_watch_check()[tml->index]) { - ++sz; - } - } - if (sz) { - nt._watch_types = (int*)ecalloc(sz + 1, sizeof(int)); // nullptr terminated - sz = 0; - for (auto tml = nt.tml; tml; tml = tml->next) { - if (corenrn.get_watch_check()[tml->index]) { - nt._watch_types[sz++] = tml->index; - } - } - } - } - auto& pnttype2presyn = corenrn.get_pnttype2presyn(); - auto& nrn_has_net_event_ = corenrn.get_has_net_event(); - // from nrn_has_net_event create pnttype2presyn. - if (pnttype2presyn.empty()) { - pnttype2presyn.resize(memb_func.size(), -1); - } - for (size_t i = 0; i < nrn_has_net_event_.size(); ++i) { - pnttype2presyn[nrn_has_net_event_[i]] = i; - } - // create the nt.pnt2presyn_ix array of arrays. - nt.pnt2presyn_ix = (int**)ecalloc(nrn_has_net_event_.size(), sizeof(int*)); - for (size_t i = 0; i < nrn_has_net_event_.size(); ++i) { - Memb_list* ml = nt._ml_list[nrn_has_net_event_[i]]; - if (ml && ml->nodecount > 0) { - nt.pnt2presyn_ix[i] = (int*)ecalloc(ml->nodecount, sizeof(int)); - } - } - - // Real cells are at the beginning of the nt.presyns followed by - // acells (with and without gids mixed together) - // Here we associate the real cells with voltage pointers and - // acell PreSyn with the Point_process. - // nt.presyns order same as output_vindex order - int *output_vindex, *pnttype, *pntindex; - double *output_threshold, *delay; - if (direct) { - (*nrn2core_get_dat2_3_)(nt.id, nt.n_weight, output_vindex, output_threshold, pnttype, - pntindex, nt.weights, delay); - } - if (!direct) { - output_vindex = F.read_array(nt.n_presyn); - } -#if CHKPNTDEBUG - ntc.output_vindex = new int[nt.n_presyn]; - memcpy(ntc.output_vindex, output_vindex, nt.n_presyn * sizeof(int)); -#endif - if (nt._permute) { - // only indices >= 0 (i.e. _actual_v indices) will be changed. - node_permute(output_vindex, nt.n_presyn, nt._permute); - } - if (!direct) { - output_threshold = F.read_array(nt.ncell); - } -#if CHKPNTDEBUG - ntc.output_threshold = new double[nt.ncell]; - memcpy(ntc.output_threshold, output_threshold, nt.ncell * sizeof(double)); -#endif - for (int i = 0; i < nt.n_presyn; ++i) { // real cells - PreSyn* ps = nt.presyns + i; - - int ix = output_vindex[i]; - if (ix == -1 && i < nt.ncell) { // real cell without a presyn - continue; - } - if (ix < 0) { - ix = -ix; - int index = ix / 1000; - int type = ix - index * 1000; - Point_process* pnt = nt.pntprocs + (pnt_offset[type] + index); - ps->pntsrc_ = pnt; - // pnt->_presyn = ps; - int ip2ps = pnttype2presyn[pnt->_type]; - if (ip2ps >= 0) { - nt.pnt2presyn_ix[ip2ps][pnt->_i_instance] = i; - } - if (ps->gid_ < 0) { - ps->gid_ = -1; - } - } else { - assert(ps->gid_ > -1); - ps->thvar_index_ = ix; // index into _actual_v - assert(ix < nt.end); - ps->threshold_ = output_threshold[i]; - } - } - delete[] output_vindex; - delete[] output_threshold; - - // initial net_send_buffer size about 1% of number of presyns - // nt._net_send_buffer_size = nt.ncell/100 + 1; - // but, to avoid reallocation complexity on GPU ... - nt._net_send_buffer_size = nt.ncell; - nt._net_send_buffer = (int*)ecalloc_align(nt._net_send_buffer_size, sizeof(int)); - - // do extracon later as the target and weight info - // is not directly in the file - int nnetcon = nt.n_netcon - nrn_setup_extracon; - - int nweight = nt.n_weight; - // printf("nnetcon=%d nweight=%d\n", nnetcon, nweight); - // it may happen that Point_process structures will be made unnecessary - // by factoring into NetCon. - - // Make NetCon.target_ point to proper Point_process. Only the NetCon - // with pnttype[i] > 0 have a target. - if (!direct) { - pnttype = F.read_array(nnetcon); - } - if (!direct) { - pntindex = F.read_array(nnetcon); - } -#if CHKPNTDEBUG - ntc.pnttype = new int[nnetcon]; - ntc.pntindex = new int[nnetcon]; - memcpy(ntc.pnttype, pnttype, nnetcon * sizeof(int)); - memcpy(ntc.pntindex, pntindex, nnetcon * sizeof(int)); -#endif - for (int i = 0; i < nnetcon; ++i) { - int type = pnttype[i]; - if (type > 0) { - int index = pnt_offset[type] + pntindex[i]; /// Potentially uninitialized pnt_offset[], - /// check for previous assignments - NetCon& nc = nt.netcons[i]; - nc.target_ = nt.pntprocs + index; - nc.active_ = true; - } - } - - int extracon_target_nweight = 0; - if (nrn_setup_extracon > 0) { - // Fill in the extracon target_ and active_. - // Simplistic. - // Rotate through the pntindex and use only pnttype for ProbAMPANMDA_EMS - // (which happens to have a weight vector length of 5.) - // Edge case: if there is no such synapse, let the target_ be nullptr - // and the netcon be inactive. - // Same pattern as algorithm for extracon netcon_srcgid above in phase1. - int extracon_target_type = nrn_get_mechtype("ProbAMPANMDA_EMS"); - assert(extracon_target_type > 0); - extracon_target_nweight = corenrn.get_pnt_receive_size()[extracon_target_type]; - int j = 0; - for (int i = 0; i < nrn_setup_extracon; ++i) { - int active = 0; - for (int k = 0; k < nnetcon; ++k) { - if (pnttype[j] == extracon_target_type) { - active = 1; - break; - } - j = (j + 1) % nnetcon; - } - NetCon& nc = nt.netcons[i + nnetcon]; - nc.active_ = active; - if (active) { - nc.target_ = nt.pntprocs + (pnt_offset[extracon_target_type] + pntindex[j]); - } else { - nc.target_ = nullptr; - } - } - } - - delete[] pntindex; - - // weights in netcons order in groups defined by Point_process target type. - nt.n_weight += nrn_setup_extracon * extracon_target_nweight; - if (!direct) { - nt.weights = (double*)ecalloc_align(nt.n_weight, sizeof(double)); - F.read_array(nt.weights, nweight); - } - - int iw = 0; - for (int i = 0; i < nnetcon; ++i) { - NetCon& nc = nt.netcons[i]; - nc.u.weight_index_ = iw; - if (pnttype[i] != 0) { - iw += corenrn.get_pnt_receive_size()[pnttype[i]]; - } else { - iw += 1; - } - } - assert(iw == nweight); - delete[] pnttype; - - // delays in netcons order - if (!direct) { - delay = F.read_array(nnetcon); - } -#if CHKPNTDEBUG - ntc.delay = new double[nnetcon]; - memcpy(ntc.delay, delay, nnetcon * sizeof(double)); -#endif - for (int i = 0; i < nnetcon; ++i) { - NetCon& nc = nt.netcons[i]; - nc.delay_ = delay[i]; - } - delete[] delay; - - if (nrn_setup_extracon > 0) { - // simplistic. delay is 1 and weight is 0.001 - for (int i = 0; i < nrn_setup_extracon; ++i) { - NetCon& nc = nt.netcons[nnetcon + i]; - nc.delay_ = 1.0; - nc.u.weight_index_ = nweight + i * extracon_target_nweight; - nt.weights[nc.u.weight_index_] = 2.0; // this value 2.0 is extracted from .dat files - } - } - - // BBCOREPOINTER information - if (direct) { - (*nrn2core_get_dat2_corepointer_)(nt.id, npnt); - } else { - npnt = F.read_int(); - } -#if CHKPNTDEBUG - ntc.nbcp = npnt; - ntc.bcpicnt = new int[npnt]; - ntc.bcpdcnt = new int[npnt]; - ntc.bcptype = new int[npnt]; -#endif - for (auto tml = nt.tml; tml; tml = tml->next) { - int type = tml->index; - if (!corenrn.get_bbcore_read()[type]) { - continue; - } - int* iArray = nullptr; - double* dArray = nullptr; - int icnt, dcnt; - if (direct) { - (*nrn2core_get_dat2_corepointer_mech_)(nt.id, type, icnt, dcnt, iArray, dArray); - } else { - type = F.read_int(); - icnt = F.read_int(); - dcnt = F.read_int(); - if (icnt) { - iArray = F.read_array(icnt); - } - if (dcnt) { - dArray = F.read_array(dcnt); - } - } - if (!corenrn.get_bbcore_write()[type] && nrn_checkpoint_arg_exists) { - fprintf( - stderr, - "Checkpoint is requested involving BBCOREPOINTER but there is no bbcore_write function for %s\n", - memb_func[type].sym); - assert(corenrn.get_bbcore_write()[type]); - } -#if CHKPNTDEBUG - ntc.bcptype[i] = type; - ntc.bcpicnt[i] = icnt; - ntc.bcpdcnt[i] = dcnt; -#endif - int ik = 0; - int dk = 0; - Memb_list* ml = nt._ml_list[type]; - int dsz = nrn_prop_param_size_[type]; - int pdsz = nrn_prop_dparam_size_[type]; - int cntml = ml->nodecount; - int layout = corenrn.get_mech_data_layout()[type]; - for (int j = 0; j < cntml; ++j) { - int jp = j; - if (ml->_permute) { - jp = ml->_permute[j]; - } - double* d = ml->data; - Datum* pd = ml->pdata; - d += nrn_i_layout(jp, cntml, 0, dsz, layout); - pd += nrn_i_layout(jp, cntml, 0, pdsz, layout); - int aln_cntml = nrn_soa_padded_size(cntml, layout); - (*corenrn.get_bbcore_read()[type])(dArray, iArray, &dk, &ik, 0, aln_cntml, d, pd, ml->_thread, - &nt, 0.0); - } - assert(dk == dcnt); - assert(ik == icnt); - if (ik) { - delete[] iArray; - } - if (dk) { - delete[] dArray; - } - } - // VecPlayContinuous instances - // No attempt at memory efficiency - int n; - if (direct) { - (*nrn2core_get_dat2_vecplay_)(nt.id, n); - } else { - n = F.read_int(); - } - nt.n_vecplay = n; - if (n) { - nt._vecplay = new void*[n]; +void read_phase2(NrnThread& nt, UserParams& userParams) { + Phase2 p2; + if (corenrn_embedded) { + p2.read_direct(nt.id, nt); } else { - nt._vecplay = nullptr; - } -#if CHKPNTDEBUG - ntc.vecplay_ix = new int[n]; - ntc.vtype = new int[n]; - ntc.mtype = new int[n]; -#endif - for (int i = 0; i < n; ++i) { - int vtype, mtype, ix, sz; - double *yvec1, *tvec1; - if (direct) { - (*nrn2core_get_dat2_vecplay_inst_)(nt.id, i, vtype, mtype, ix, sz, yvec1, tvec1); - } else { - vtype = F.read_int(); - mtype = F.read_int(); - ix = F.read_int(); - sz = F.read_int(); - } - nrn_assert(vtype == VecPlayContinuousType); -#if CHKPNTDEBUG - ntc.vtype[i] = vtype; -#endif -#if CHKPNTDEBUG - ntc.mtype[i] = mtype; -#endif - Memb_list* ml = nt._ml_list[mtype]; -#if CHKPNTDEBUG - ntc.vecplay_ix[i] = ix; -#endif - IvocVect* yvec = vector_new(sz); - IvocVect* tvec = vector_new(sz); - if (direct) { - double* py = vector_vec(yvec); - double* pt = vector_vec(tvec); - for (int j = 0; j < sz; ++j) { - py[j] = yvec1[j]; - pt[j] = tvec1[j]; - } - // yvec1 and tvec1 are not deleted as that space is within - // NEURON Vector - } else { - F.read_array(vector_vec(yvec), sz); - F.read_array(vector_vec(tvec), sz); - } - ix = nrn_param_layout(ix, mtype, ml); - if (ml->_permute) { - ix = nrn_index_permute(ix, mtype, ml); - } - nt._vecplay[i] = new VecPlayContinuous(ml->data + ix, yvec, tvec, nullptr, nt.id); - } - if (!direct) { - // store current checkpoint state to continue reading mapping - F.record_checkpoint(); - - // If not at end of file, then this must be a checkpoint and restore tqueue. - if (!F.eof()) { - checkpoint_restore_tqueue(nt, F); - } - } - - // NetReceiveBuffering - for (auto& net_buf_receive : corenrn.get_net_buf_receive()) { - int type = net_buf_receive.second; - // Does this thread have this type. - Memb_list* ml = nt._ml_list[type]; - if (ml) { // needs a NetReceiveBuffer - NetReceiveBuffer_t* nrb = - (NetReceiveBuffer_t*)ecalloc_align(1, sizeof(NetReceiveBuffer_t)); - ml->_net_receive_buffer = nrb; - nrb->_pnt_offset = pnt_offset[type]; - - // begin with a size of 5% of the number of instances - nrb->_size = ml->nodecount; - // or at least 8 - nrb->_size = std::max(8, nrb->_size); - // but not more than nodecount - nrb->_size = std::min(ml->nodecount, nrb->_size); - - nrb->_pnt_index = (int*)ecalloc_align(nrb->_size, sizeof(int)); - nrb->_displ = (int*)ecalloc_align(nrb->_size + 1, sizeof(int)); - nrb->_nrb_index = (int*)ecalloc_align(nrb->_size, sizeof(int)); - nrb->_weight_index = (int*)ecalloc_align(nrb->_size, sizeof(int)); - nrb->_nrb_t = (double*)ecalloc_align(nrb->_size, sizeof(double)); - nrb->_nrb_flag = (double*)ecalloc_align(nrb->_size, sizeof(double)); - } - } - - // NetSendBuffering - for (int type : corenrn.get_net_buf_send_type()) { - // Does this thread have this type. - Memb_list* ml = nt._ml_list[type]; - if (ml) { // needs a NetSendBuffer - NetSendBuffer_t* nsb = (NetSendBuffer_t*)ecalloc_align(1, sizeof(NetSendBuffer_t)); - ml->_net_send_buffer = nsb; - - // begin with a size equal to twice number of instances - // at present there is no provision for dynamically increasing this. - nsb->_size = ml->nodecount * 2; - nsb->_cnt = 0; - - nsb->_sendtype = (int*)ecalloc_align(nsb->_size, sizeof(int)); - nsb->_vdata_index = (int*)ecalloc_align(nsb->_size, sizeof(int)); - nsb->_pnt_index = (int*)ecalloc_align(nsb->_size, sizeof(int)); - nsb->_weight_index = (int*)ecalloc_align(nsb->_size, sizeof(int)); - // when == 1, NetReceiveBuffer_t is newly allocated (i.e. we need to free previous copy - // and recopy new data - nsb->reallocated = 1; - nsb->_nsb_t = (double*)ecalloc_align(nsb->_size, sizeof(double)); - nsb->_nsb_flag = (double*)ecalloc_align(nsb->_size, sizeof(double)); - } + p2.read_file(userParams.file_reader[nt.id], nt); } + p2.populate(nt, userParams); } /** read mapping information for neurons */ -void read_phase3(FileHandler& F, int imult, NrnThread& nt) { - (void)imult; - +void read_phase3(NrnThread& nt, UserParams& userParams) { /** restore checkpoint state (before restoring queue items */ + auto& F = userParams.file_reader[nt.id]; F.restore_checkpoint(); /** mapping information for all neurons in single NrnThread */ diff --git a/coreneuron/io/nrn_setup.hpp b/coreneuron/io/nrn_setup.hpp index c1c57ad13..212381d75 100644 --- a/coreneuron/io/nrn_setup.hpp +++ b/coreneuron/io/nrn_setup.hpp @@ -32,21 +32,15 @@ THE POSSIBILITY OF SUCH DAMAGE. #include #include "coreneuron/sim/multicore.hpp" #include "coreneuron/io/nrn_filehandler.hpp" +#include "coreneuron/io/nrn2core_direct.h" +#include "coreneuron/io/user_params.hpp" +#include "coreneuron/io/mem_layout_util.hpp" namespace coreneuron { -static bool do_not_open; -static int ngroup_w; -static int* gidgroups_w; -static int* imult_w; -static const char* path_w; -static const char* restore_path_w; -static FileHandler* file_reader_w; -static bool byte_swap_w; - -static void read_phase1(FileHandler& F, int imult, NrnThread& nt); -static void read_phase2(FileHandler& F, int imult, NrnThread& nt); -static void read_phase3(FileHandler& F, int imult, NrnThread& nt); -static void read_phasegap(FileHandler& F, int imult, NrnThread& nt); +static void read_phase1(NrnThread& nt, UserParams& userParams); +static void read_phase2(NrnThread& nt, UserParams& userParams); +static void read_phase3(NrnThread& nt, UserParams& userParams); +static void read_phasegap(NrnThread& nt, UserParams& userParams); static void setup_ThreadData(NrnThread& nt); // Functions to load and clean data; @@ -89,58 +83,56 @@ inline std::string getPhaseName() { /// Reading phase selector. template -inline void read_phase_aux(FileHandler& F, int imult, NrnThread& nt); +inline void read_phase_aux(NrnThread& nt, UserParams&); template <> -inline void read_phase_aux(FileHandler& F, int imult, NrnThread& nt) { - read_phase1(F, imult, nt); +inline void read_phase_aux(NrnThread& nt, UserParams& userParams) { + read_phase1(nt, userParams); } template <> -inline void read_phase_aux(FileHandler& F, int imult, NrnThread& nt) { - read_phase2(F, imult, nt); +inline void read_phase_aux(NrnThread& nt, UserParams& userParams) { + read_phase2(nt, userParams); } template <> -inline void read_phase_aux(FileHandler& F, int imult, NrnThread& nt) { - read_phase3(F, imult, nt); +inline void read_phase_aux(NrnThread& nt, UserParams& userParams) { + read_phase3(nt, userParams); } template <> -inline void read_phase_aux(FileHandler& F, int imult, NrnThread& nt) { - read_phasegap(F, imult, nt); +inline void read_phase_aux(NrnThread& nt, UserParams& userParams) { + read_phasegap(nt, userParams); } /// Reading phase wrapper for each neuron group. template -inline void* phase_wrapper_w(NrnThread* nt) { - bool no_open = do_not_open; +inline void* phase_wrapper_w(NrnThread* nt, UserParams& userParams, bool in_memory_transfer) { int i = nt->id; - char fnamebuf[1000]; - if (i < ngroup_w) { - if (!no_open) { - const char* data_dir = path_w; + if (i < userParams.ngroup) { + if (!in_memory_transfer) { + const char* data_dir = userParams.path; // directory to read could be different for phase 2 if we are restoring // all other phases still read from dataset directory because the data // is constant if (P == 2) { - data_dir = restore_path_w; + data_dir = userParams.restore_path; } - std::string fname = std::string(data_dir) + "/" + std::to_string(gidgroups_w[i]) + "_" + getPhaseName

() + ".dat"; + std::string fname = std::string(data_dir) + "/" + std::to_string(userParams.gidgroups[i]) + "_" + getPhaseName

() + ".dat"; // Avoid trying to open the gid_gap.dat file if it doesn't exist when there are no // gap junctions in this gid - if (P == gap && !file_reader_w[i].file_exist(fname.c_str())) { - file_reader_w[i].close(); + if (P == gap && !userParams.file_reader[i].file_exist(fname)) { + userParams.file_reader[i].close(); } else { // if no file failed to open or not opened at all - file_reader_w[i].open(fname.c_str(), byte_swap_w); + userParams.file_reader[i].open(fname); } } - read_phase_aux

(file_reader_w[i], imult_w[i], *nt); - if (!no_open) { - file_reader_w[i].close(); + read_phase_aux

(*nt, userParams); + if (!in_memory_transfer) { + userParams.file_reader[i].close(); } if (P == 2) { setup_ThreadData(*nt); @@ -151,14 +143,8 @@ inline void* phase_wrapper_w(NrnThread* nt) { /// Specific phase reading executed by threads. template -inline static void phase_wrapper(int direct = 0) { - if (direct) { - do_not_open = true; - } - nrn_multithread_job(phase_wrapper_w

); - if (direct) { - do_not_open = false; - } +inline static void phase_wrapper(UserParams& userParams, int direct = 0) { + nrn_multithread_job(phase_wrapper_w

, userParams, direct != 0); } } // namespace coreneuron } // namespace coreneuron diff --git a/coreneuron/io/phase1.cpp b/coreneuron/io/phase1.cpp new file mode 100644 index 000000000..422ceb3d1 --- /dev/null +++ b/coreneuron/io/phase1.cpp @@ -0,0 +1,184 @@ +#include +#include + +#include "coreneuron/nrniv/nrniv_decl.h" +#include "coreneuron/sim/multicore.hpp" +#include "coreneuron/io/phase1.hpp" +#include "coreneuron/utils/nrnoc_aux.hpp" + +int (*nrn2core_get_dat1_)(int tid, + int& n_presyn, + int& n_netcon, + int*& output_gid, + int*& netcon_srcgid); + +namespace coreneuron { +void Phase1::read_file(FileHandler& F) { + assert(!F.fail()); + int n_presyn = F.read_int(); /// Number of PreSyn-s in NrnThread nt + int n_netcon = F.read_int(); /// Number of NetCon-s in NrnThread nt + + this->output_gids = F.read_vector(n_presyn); + this->netcon_srcgids = F.read_vector(n_netcon); + + F.close(); +} + +void Phase1::read_direct(int thread_id) { + int* output_gids; + int* netcon_srcgid; + int n_presyn; + int n_netcon; + + // TODO : check error codes for NEURON - CoreNEURON communication + int valid = + (*nrn2core_get_dat1_)(thread_id, n_presyn, n_netcon, output_gids, netcon_srcgid); + if (!valid) { + return; + } + + this->output_gids = std::vector(output_gids, output_gids + n_presyn); + delete[] output_gids; + this->netcon_srcgids = std::vector(netcon_srcgid, netcon_srcgid + n_netcon); + delete[] netcon_srcgid; +} + +void Phase1::shift_gids(int imult) { + // maxgid is the maxgid to have the different multiple in the same gid space. + int maxgid = 0x7fffffff / nrn_setup_multiple; + // this value is the beginning of the new cluster of gids. + // the correct cluster is choose with imult. + int offset_gids = imult * maxgid; // offset for each gid + + // offset the (non-negative) gids according to multiple + // make sure everything fits into gid space. + for (auto& gid: this->output_gids) { + if (gid >= 0) { + nrn_assert(gid < maxgid); + gid += offset_gids; + } + } + + for (auto& srcgid: this->netcon_srcgids) { + if (srcgid >= 0) { + nrn_assert(srcgid < maxgid); + srcgid += offset_gids; + } + } +} + +void Phase1::add_extracon(NrnThread& nt, int imult) { + int maxgid = 0x7fffffff / nrn_setup_multiple; + if (nrn_setup_extracon <= 0) { + return; + } + + // very simplistic + // Use this threads positive source gids - zz in nt.netcon order as the + // source gids for extracon. + // The edge cases are: + // The 0th duplicate uses uses source gids for the last duplicate. + // If there are fewer positive source gids than extracon, then keep + // rotating through the nt.netcon . + // If there are no positive source gids, use a source gid of -1. + // Would not be difficult to modify so that random positive source was + // used, and/or random connect to another duplicate. + // Note that we increment the nt.n_netcon at the end of this function. + int sidoffset = 0; // how much to increment the corresponding positive gid + // like ring connectivity + if (imult > 0) { + sidoffset = -maxgid; + } else if (nrn_setup_multiple > 1) { + sidoffset = (nrn_setup_multiple - 1) * maxgid; + } + // set up the extracon srcgid_ + int* nc_srcgid = netcon_srcgid[nt.id]; + int j = 0; // rotate through the n_netcon netcon_srcgid + for (int i = 0; i < nrn_setup_extracon; ++i) { + int sid = -1; + for (int k = 0; k < nt.n_netcon; ++k) { + // potentially rotate j through the entire n_netcon but no further + sid = nc_srcgid[j]; + j = (j + 1) % nt.n_netcon; + if (sid >= 0) { + break; + } + } + if (sid < 0) { // only connect to real cells. + sid = -1; + } else { + sid += sidoffset; + } + nc_srcgid[nt.n_netcon + i] = sid; + } + // finally increment the n_netcon + nt.n_netcon += nrn_setup_extracon; +} + +#ifdef _OPENMP +void Phase1::populate(NrnThread& nt, int imult, OMP_Mutex& mut) { +#else +void Phase1::populate(NrnThread& nt, int imult) { +#endif + nt.n_presyn = this->output_gids.size(); + nt.n_netcon = this->netcon_srcgids.size(); + + shift_gids(imult); + + netcon_srcgid[nt.id] = new int[nt.n_netcon + nrn_setup_extracon]; + std::copy(this->netcon_srcgids.begin(), this->netcon_srcgids.end(), + netcon_srcgid[nt.id]); + + nt.netcons = new NetCon[nt.n_netcon + nrn_setup_extracon]; + nt.presyns_helper = (PreSynHelper*)ecalloc_align(nt.n_presyn, sizeof(PreSynHelper)); + + nt.presyns = new PreSyn[nt.n_presyn]; + PreSyn* ps = nt.presyns; + /// go through all presyns + for (auto& gid: this->output_gids) { + if (gid == -1) { + ++ps; + continue; + } + + { +#ifdef _OPENMP + const std::lock_guard lock(mut); +#endif + // Note that the negative (type, index) + // coded information goes into the neg_gid2out[tid] hash table. + // See netpar.cpp for the netpar_tid_... function implementations. + // Both that table and the process wide gid2out table can be deleted + // before the end of setup + + /// Put gid into the gid2out hash table with correspondent output PreSyn + /// Or to the negative PreSyn map + if (gid >= 0) { + char m[200]; + if (gid2in.find(gid) != gid2in.end()) { + sprintf(m, "gid=%d already exists as an input port", gid); + hoc_execerror( + m, + "Setup all the output ports on this process before using them as input ports."); + } + if (gid2out.find(gid) != gid2out.end()) { + sprintf(m, "gid=%d already exists on this process as an output port", gid); + hoc_execerror(m, 0); + } + ps->gid_ = gid; + ps->output_index_ = gid; + gid2out[gid] = ps; + } else { + nrn_assert(neg_gid2out[nt.id].find(gid) == neg_gid2out[nt.id].end()); + ps->output_index_ = -1; + neg_gid2out[nt.id][gid] = ps; + } + } // end of the mutex + + ++ps; + } + + add_extracon(nt, imult); +} + +} // namespace coreneuron diff --git a/coreneuron/io/phase1.hpp b/coreneuron/io/phase1.hpp new file mode 100644 index 000000000..65a9034ba --- /dev/null +++ b/coreneuron/io/phase1.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include + +#include "coreneuron/io/nrn_filehandler.hpp" +#include "coreneuron/utils/nrnmutdec.h" + +namespace coreneuron { + +struct NrnThread; + +class Phase1 { + public: + void read_file(FileHandler& F); + void read_direct(int thread_id); +#ifdef _OPENMP + void populate(NrnThread& nt, int imult, OMP_Mutex& mut); +#else + void populate(NrnThread& nt, int imult); +#endif + + private: + void shift_gids(int imult); + void add_extracon(NrnThread& nt, int imult); + + std::vector output_gids; + std::vector netcon_srcgids; +}; + +} // namespace coreneuron diff --git a/coreneuron/io/phase2.cpp b/coreneuron/io/phase2.cpp new file mode 100644 index 000000000..445d612a5 --- /dev/null +++ b/coreneuron/io/phase2.cpp @@ -0,0 +1,1243 @@ +#include "coreneuron/io/phase2.hpp" +#include "coreneuron/coreneuron.hpp" +#include "coreneuron/sim/multicore.hpp" +#include "coreneuron/io/nrn_checkpoint.hpp" +#include "coreneuron/utils/nrnoc_aux.hpp" +#include "coreneuron/network/partrans.hpp" +#include "coreneuron/permute/cellorder.hpp" +#include "coreneuron/permute/node_permute.h" +#include "coreneuron/utils/vrecitem.h" +#include "coreneuron/io/mem_layout_util.hpp" + +int (*nrn2core_get_dat2_1_)(int tid, + int& ngid, + int& n_real_gid, + int& nnode, + int& ndiam, + int& nmech, + int*& tml_index, + int*& ml_nodecount, + int& nidata, + int& nvdata, + int& nweight); + +int (*nrn2core_get_dat2_2_)(int tid, + int*& v_parent_index, + double*& a, + double*& b, + double*& area, + double*& v, + double*& diamvec); + +int (*nrn2core_get_dat2_mech_)(int tid, + size_t i, + int dsz_inst, + int*& nodeindices, + double*& data, + int*& pdata); + +int (*nrn2core_get_dat2_3_)(int tid, + int nweight, + int*& output_vindex, + double*& output_threshold, + int*& netcon_pnttype, + int*& netcon_pntindex, + double*& weights, + double*& delays); + +int (*nrn2core_get_dat2_corepointer_)(int tid, int& n); + +int (*nrn2core_get_dat2_corepointer_mech_)(int tid, + int type, + int& icnt, + int& dcnt, + int*& iarray, + double*& darray); + +int (*nrn2core_get_dat2_vecplay_)(int tid, int& n); + +int (*nrn2core_get_dat2_vecplay_inst_)(int tid, + int i, + int& vptype, + int& mtype, + int& ix, + int& sz, + double*& yvec, + double*& tvec); + +namespace coreneuron { +template +inline void mech_data_layout_transform(T* data, int cnt, int sz, int layout) { + if (layout == Layout::AoS) { + return; + } + // layout is equal to Layout::SoA + int align_cnt = nrn_soa_padded_size(cnt, layout); + std::vector d(cnt * sz); + // copy matrix + for (int i = 0; i < cnt; ++i) { + for (int j = 0; j < sz; ++j) { + d[i * sz + j] = data[i * sz + j]; + } + } + // transform memory layout + for (int i = 0; i < cnt; ++i) { + for (int j = 0; j < sz; ++j) { + data[i + j * align_cnt] = d[i * sz + j]; + } + } +} + +void Phase2::read_file(FileHandler& F, const NrnThread& nt) { + n_output = F.read_int(); + n_real_output = F.read_int(); + n_node = F.read_int(); + n_diam = F.read_int(); + n_mech = F.read_int(); + mech_types = std::vector(n_mech, 0); + nodecounts = std::vector(n_mech, 0); + for (int i = 0; i < n_mech; ++i) { + mech_types[i] = F.read_int(); + nodecounts[i] = F.read_int(); + } + + // check mechanism compatibility before reading data + check_mechanism(); + + n_idata = F.read_int(); + n_vdata = F.read_int(); + int n_weight = F.read_int(); + v_parent_index = (int*)ecalloc_align(n_node, sizeof(int)); + F.read_array(v_parent_index, n_node); + + int n_data_padded = nrn_soa_padded_size(n_node, MATRIX_LAYOUT); + { + { // Compute size of _data and allocate + int n_data = 6 * n_data_padded; + if (n_diam > 0) { + n_data += n_data_padded; + } + for (size_t i = 0; i < n_mech; ++i) { + int layout = corenrn.get_mech_data_layout()[mech_types[i]]; + int n = nodecounts[i]; + int sz = corenrn.get_prop_param_size()[mech_types[i]]; + n_data = nrn_soa_byte_align(n_data); + n_data += nrn_soa_padded_size(n, layout) * sz; + } + _data = (double*)ecalloc_align(n_data, sizeof(double)); + } + F.read_array(_data + 2 * n_data_padded, n_node); + F.read_array(_data + 3 * n_data_padded, n_node); + F.read_array(_data + 5 * n_data_padded, n_node); + F.read_array(_data + 4 * n_data_padded, n_node); + if (n_diam > 0) { + F.read_array(_data + 6 * n_data_padded, n_node); + } + } + + size_t offset = 6 * n_data_padded; + if (n_diam > 0) { + offset += n_data_padded; + } + for (size_t i = 0; i < n_mech; ++i) { + int layout = corenrn.get_mech_data_layout()[mech_types[i]]; + int n = nodecounts[i]; + int sz = corenrn.get_prop_param_size()[mech_types[i]]; + int dsz = corenrn.get_prop_dparam_size()[mech_types[i]]; + offset = nrn_soa_byte_align(offset); + std::vector nodeindices; + if (!corenrn.get_is_artificial()[mech_types[i]]) { + nodeindices = F.read_vector(n); + } + F.read_array(_data + offset, sz * n); + offset += nrn_soa_padded_size(n, layout) * sz; + std::vector pdata; + if (dsz > 0) { + pdata = F.read_vector(dsz * n); + } + tmls.emplace_back(TML{nodeindices, pdata, 0, {}, {}}); + } + output_vindex = F.read_vector(nt.n_presyn); + output_threshold = F.read_vector(n_real_output); + pnttype = F.read_vector(nt.n_netcon - nrn_setup_extracon); + pntindex = F.read_vector(nt.n_netcon - nrn_setup_extracon); + weights = F.read_vector(n_weight); + delay = F.read_vector(nt.n_netcon); + num_point_process = F.read_int(); + + for (size_t i = 0; i < n_mech; ++i) { + if (!corenrn.get_bbcore_read()[mech_types[i]]) { + continue; + } + tmls[i].type = F.read_int(); + int icnt = F.read_int(); + int dcnt = F.read_int(); + if (icnt > 0) { + tmls[i].iArray = F.read_vector(icnt); + } + if (dcnt > 0) { + tmls[i].dArray = F.read_vector(dcnt); + } + } + + int n_vec_play_continuous = F.read_int(); + vec_play_continuous.reserve(n_vec_play_continuous); + for (size_t i = 0; i < n_vec_play_continuous; ++i) { + VecPlayContinuous_ item; + item.vtype = F.read_int(); + item.mtype = F.read_int(); + item.ix = F.read_int(); + int sz = F.read_int(); + item.yvec = IvocVect(sz); + item.tvec = IvocVect(sz); + F.read_array(item.yvec.data(), sz); + F.read_array(item.tvec.data(), sz); + vec_play_continuous.push_back(std::move(item)); + } + + // store current checkpoint state to continue reading mapping + // The checkpoint numbering in phase 3 is a continuing of phase 2, and so will be restored + F.record_checkpoint(); + + if (F.eof()) + return; + + nrn_assert(F.read_int() == n_vec_play_continuous); + + for (int i = 0; i < n_vec_play_continuous; ++i) { + auto &vecPlay = vec_play_continuous[i]; + vecPlay.last_index = F.read_int(); + vecPlay.discon_index = F.read_int(); + vecPlay.ubound_index = F.read_int(); + } + + patstim_index = F.read_int(); + + nrn_assert(F.read_int() == -1); + + for (int i = 0; i < nt.n_presyn; ++i) { + preSynConditionEventFlags.push_back(F.read_int()); + } + + assert(F.read_int() == -1); + restore_events(F); + + assert(F.read_int() == -1); + restore_events(F); +} + +void Phase2::read_direct(int thread_id, const NrnThread& nt) { + int* types_ = nullptr; + int* nodecounts_ = nullptr; + int n_weight; + (*nrn2core_get_dat2_1_)(thread_id, n_output, n_real_output, n_node, n_diam, n_mech, types_, + nodecounts_, n_idata, n_vdata, n_weight); + mech_types = std::vector(types_, types_ + n_mech); + delete[] types_; + + nodecounts = std::vector(nodecounts_, nodecounts_ + n_mech); + delete[] nodecounts_; + + + // TODO: fix it in the future + int n_data_padded = nrn_soa_padded_size(n_node, MATRIX_LAYOUT); + int n_data = 6 * n_data_padded; + if (n_diam > 0) { + n_data += n_data_padded; + } + for (int i = 0; i < n_mech; ++i) { + int layout = corenrn.get_mech_data_layout()[mech_types[i]]; + int n = nodecounts[i]; + int sz = corenrn.get_prop_param_size()[mech_types[i]]; + n_data = nrn_soa_byte_align(n_data); + n_data += nrn_soa_padded_size(n, layout) * sz; + } + _data = (double*)ecalloc_align(n_data, sizeof(double)); + + v_parent_index = (int*)ecalloc_align(n_node, sizeof(int)); + double* actual_a = _data + 2 * n_data_padded; + double* actual_b = _data + 3 * n_data_padded; + double* actual_v = _data + 4 * n_data_padded; + double* actual_area = _data + 5 * n_data_padded; + double* actual_diam = n_diam > 0 ? _data + 6 * n_data_padded : nullptr; + (*nrn2core_get_dat2_2_)(thread_id, v_parent_index, actual_a, actual_b, actual_area, actual_v, actual_diam); + + tmls.resize(n_mech); + + auto& param_sizes = corenrn.get_prop_param_size(); + auto& dparam_sizes = corenrn.get_prop_dparam_size(); + int dsz_inst = 0; + size_t offset = 6 * n_data_padded; + if (n_diam > 0) offset += n_data_padded; + for (size_t i = 0; i < n_mech; ++i) { + auto& tml = tmls[i]; + int type = mech_types[i]; + int layout = corenrn.get_mech_data_layout()[type]; + offset = nrn_soa_byte_align(offset); + + tml.type = type; + tml.nodeindices.resize(nodecounts[i]); + tml.pdata.resize(nodecounts[i] * dparam_sizes[type]); + + int* nodeindices_ = nullptr; + double* data_ = _data + offset; + int* pdata_ = const_cast(tml.pdata.data()); + (*nrn2core_get_dat2_mech_)(thread_id, i, dparam_sizes[type] > 0 ? dsz_inst : 0, nodeindices_, data_, pdata_); + if (dparam_sizes[type] > 0) + dsz_inst++; + offset += nrn_soa_padded_size(nodecounts[i], layout) * param_sizes[type]; + if (nodeindices_) { + std::copy(nodeindices_, nodeindices_ + nodecounts[i], tml.nodeindices.data()); + free_memory(nodeindices_); + } + if (corenrn.get_is_artificial()[type]) { + assert(nodeindices_ == nullptr); + } + } + + int* output_vindex_ = nullptr; + double* output_threshold_ = nullptr; + int* pnttype_ = nullptr; + int* pntindex_ = nullptr; + double* weight_ = nullptr; + double* delay_ = nullptr; + (*nrn2core_get_dat2_3_)(thread_id, n_weight, output_vindex_, output_threshold_, pnttype_, + pntindex_, weight_, delay_); + + output_vindex = std::vector(output_vindex_, output_vindex_ + nt.n_presyn); + delete[] output_vindex_; + + output_threshold = std::vector(output_threshold_, output_threshold_ + n_real_output); + delete[] output_threshold_; + + int n_netcon = nt.n_netcon - nrn_setup_extracon; + pnttype = std::vector(pnttype_, pnttype_ + n_netcon); + delete[] pnttype_; + + pntindex = std::vector(pntindex_, pntindex_ + n_netcon); + delete[] pntindex_; + + weights = std::vector(weight_, weight_ + n_weight); + delete[] weight_; + + delay = std::vector(delay_, delay_ + n_netcon); + delete[] delay_; + + (*nrn2core_get_dat2_corepointer_)(nt.id, num_point_process); + + for (size_t i = 0; i < n_mech; ++i) { + // not all mod files have BBCOREPOINTER data to read + if (!corenrn.get_bbcore_read()[mech_types[i]]) { + continue; + } + int icnt; + int* iArray_ = nullptr; + int dcnt; + double* dArray_ = nullptr; + (*nrn2core_get_dat2_corepointer_mech_)(nt.id, tmls[i].type, icnt, dcnt, iArray_, dArray_); + tmls[i].iArray.resize(icnt); + std::copy(iArray_, iArray_ + icnt, tmls[i].iArray.begin()); + delete[] iArray_; + + tmls[i].dArray.resize(dcnt); + std::copy(dArray_, dArray_ + dcnt, tmls[i].dArray.begin()); + delete[] dArray_; + } + + int n_vec_play_continuous; + (*nrn2core_get_dat2_vecplay_)(thread_id, n_vec_play_continuous); + + for (size_t i = 0; i < n_vec_play_continuous; ++i) { + VecPlayContinuous_ item; + // yvec_ and tvec_ are not deleted as that space is within + // NEURON Vector + double *yvec_, *tvec_; + int sz; + (*nrn2core_get_dat2_vecplay_inst_)(thread_id, i, item.vtype, item.mtype, item.ix, sz, yvec_, tvec_); + item.yvec = IvocVect(sz); + item.tvec = IvocVect(sz); + std::copy(yvec_, yvec_ + sz, item.yvec.data()); + std::copy(tvec_, tvec_ + sz, item.tvec.data()); + vec_play_continuous.push_back(std::move(item)); + } +} + +/// Check if MOD file used between NEURON and CoreNEURON is same +void Phase2::check_mechanism() { + int diff_mech_count = 0; + for (int i = 0; i < n_mech; ++i) { + if (std::any_of(corenrn.get_different_mechanism_type().begin(), + corenrn.get_different_mechanism_type().end(), + [&](int e) { return e == mech_types[i]; })) { + if (nrnmpi_myid == 0) { + printf("Error: %s is a different MOD file than used by NEURON!\n", + nrn_get_mechname(mech_types[i])); + } + diff_mech_count++; + } + } + + if (diff_mech_count > 0) { + if (nrnmpi_myid == 0) { + printf( + "Error : NEURON and CoreNEURON must use same mod files for compatibility, %d different mod file(s) found. Re-compile special and special-core!\n", + diff_mech_count); + nrn_abort(1); + } + } + +} + +/// Perform in memory transformation between AoS<>SoA for integer data +void Phase2::transform_int_data(int elem0, int nodecount, int* pdata, int i, int dparam_size, int layout, int n_node_) { + for (int iml = 0; iml < nodecount; ++iml) { + int* pd = pdata + nrn_i_layout(iml, nodecount, i, dparam_size, layout); + int ix = *pd; // relative to beginning of _actual_* + nrn_assert((ix >= 0) && (ix < n_node_)); + *pd = elem0 + ix; // relative to nt._data + } +} + +NrnThreadMembList* Phase2::create_tml(int mech_id, Memb_func& memb_func, int& shadow_rhs_cnt) { + auto tml = (NrnThreadMembList*)emalloc_align(sizeof(NrnThreadMembList)); + tml->next = nullptr; + tml->index = mech_types[mech_id]; + + tml->ml = (Memb_list*)ecalloc_align(1, sizeof(Memb_list)); + tml->ml->_net_receive_buffer = nullptr; + tml->ml->_net_send_buffer = nullptr; + tml->ml->_permute = nullptr; + if (memb_func.alloc == nullptr) { + hoc_execerror(memb_func.sym, "mechanism does not exist"); + } + tml->ml->nodecount = nodecounts[mech_id]; + if (!memb_func.sym) { + printf("%s (type %d) is not available\n", nrn_get_mechname(tml->index), tml->index); + exit(1); + } + tml->ml->_nodecount_padded = + nrn_soa_padded_size(tml->ml->nodecount, corenrn.get_mech_data_layout()[tml->index]); + if (memb_func.is_point && corenrn.get_is_artificial()[tml->index] == 0) { + // Avoid race for multiple PointProcess instances in same compartment. + if (tml->ml->nodecount > shadow_rhs_cnt) { + shadow_rhs_cnt = tml->ml->nodecount; + } + } + + return tml; +} + +void Phase2::set_net_send_buffer(Memb_list** ml_list, const std::vector& pnt_offset) { + // NetReceiveBuffering + for (auto& net_buf_receive : corenrn.get_net_buf_receive()) { + int type = net_buf_receive.second; + // Does this thread have this type. + Memb_list* ml = ml_list[type]; + if (ml) { // needs a NetReceiveBuffer + NetReceiveBuffer_t* nrb = + (NetReceiveBuffer_t*)ecalloc_align(1, sizeof(NetReceiveBuffer_t)); + ml->_net_receive_buffer = nrb; + nrb->_pnt_offset = pnt_offset[type]; + + // begin with a size of 5% of the number of instances + nrb->_size = ml->nodecount; + // or at least 8 + nrb->_size = std::max(8, nrb->_size); + // but not more than nodecount + nrb->_size = std::min(ml->nodecount, nrb->_size); + + nrb->_pnt_index = (int*)ecalloc_align(nrb->_size, sizeof(int)); + nrb->_displ = (int*)ecalloc_align(nrb->_size + 1, sizeof(int)); + nrb->_nrb_index = (int*)ecalloc_align(nrb->_size, sizeof(int)); + nrb->_weight_index = (int*)ecalloc_align(nrb->_size, sizeof(int)); + nrb->_nrb_t = (double*)ecalloc_align(nrb->_size, sizeof(double)); + nrb->_nrb_flag = (double*)ecalloc_align(nrb->_size, sizeof(double)); + } + } + + // NetSendBuffering + for (int type: corenrn.get_net_buf_send_type()) { + // Does this thread have this type. + Memb_list* ml = ml_list[type]; + if (ml) { // needs a NetSendBuffer + NetSendBuffer_t* nsb = (NetSendBuffer_t*)ecalloc_align(1, sizeof(NetSendBuffer_t)); + ml->_net_send_buffer = nsb; + + // begin with a size equal to twice number of instances + // at present there is no provision for dynamically increasing this. + nsb->_size = ml->nodecount * 2; + nsb->_cnt = 0; + + nsb->_sendtype = (int*)ecalloc_align(nsb->_size, sizeof(int)); + nsb->_vdata_index = (int*)ecalloc_align(nsb->_size, sizeof(int)); + nsb->_pnt_index = (int*)ecalloc_align(nsb->_size, sizeof(int)); + nsb->_weight_index = (int*)ecalloc_align(nsb->_size, sizeof(int)); + // when == 1, NetReceiveBuffer_t is newly allocated (i.e. we need to free previous copy + // and recopy new data + nsb->reallocated = 1; + nsb->_nsb_t = (double*)ecalloc_align(nsb->_size, sizeof(double)); + nsb->_nsb_flag = (double*)ecalloc_align(nsb->_size, sizeof(double)); + } + } +} + +void Phase2::restore_events(FileHandler& F) { + int type; + while ((type = F.read_int()) != 0) { + double time; + F.read_array(&time, 1); + switch (type) { + case NetConType: { + auto event = std::make_shared(); + event->time = time; + event->netcon_index = F.read_int(); + events.emplace_back(type, event); + break; + } + case SelfEventType: { + auto event = std::make_shared(); + event->time = time; + event->target_type = F.read_int(); + event->point_proc_instance = F.read_int(); + event->target_instance = F.read_int(); + F.read_array(&event->flag, 1); + event->movable = F.read_int(); + event->weight_index = F.read_int(); + events.emplace_back(type, event); + break; + } + case PreSynType: { + auto event = std::make_shared(); + event->time = time; + event->presyn_index = F.read_int(); + events.emplace_back(type, event); + break; + } + case NetParEventType: { + auto event = std::make_shared(); + event->time = time; + events.emplace_back(type, event); + break; + } + case PlayRecordEventType: { + auto event = std::make_shared(); + event->time = time; + event->play_record_type = F.read_int(); + if (event->play_record_type == VecPlayContinuousType) { + event->vecplay_index = F.read_int(); + events.emplace_back(type, event); + } else { + nrn_assert(0); + } + break; + } + default: { + nrn_assert(0); + break; + } + } + } +} + +void Phase2::fill_before_after_lists(NrnThread& nt, const std::vector& memb_func) { + /// Fill the BA lists + std::vector before_after_map(memb_func.size()); + for (int i = 0; i < BEFORE_AFTER_SIZE; ++i) { + for (size_t ii = 0; ii < memb_func.size(); ++ii) { + before_after_map[ii] = nullptr; + } + for (auto bam = corenrn.get_bamech()[i]; bam; bam = bam->next) { + before_after_map[bam->type] = bam; + } + /* unnecessary but keep in order anyway */ + NrnThreadBAList **ptbl = nt.tbl + i; + for (auto tml = nt.tml; tml; tml = tml->next) { + if (before_after_map[tml->index]) { + auto tbl = (NrnThreadBAList*)emalloc(sizeof(NrnThreadBAList)); + tbl->next = nullptr; + tbl->bam = before_after_map[tml->index]; + tbl->ml = tml->ml; + *ptbl = tbl; + ptbl = &(tbl->next); + } + } + } +} + +void Phase2::pdata_relocation(const NrnThread& nt, const std::vector& memb_func) { + // Some pdata may index into data which has been reordered from AoS to + // SoA. The four possibilities are if semantics is -1 (area), -5 (pointer), + // -9 (diam), // or 0-999 (ion variables). + // Note that pdata has a layout and the // type block in nt.data into which + // it indexes, has a layout. + for (auto tml = nt.tml; tml; tml = tml->next) { + int type = tml->index; + int layout = corenrn.get_mech_data_layout()[type]; + int* pdata = tml->ml->pdata; + int cnt = tml->ml->nodecount; + int szdp = corenrn.get_prop_dparam_size()[type]; + int* semantics = memb_func[type].dparam_semantics; + + // compute only for ARTIFICIAL_CELL (has useful area pointer with semantics=-1) + if (!corenrn.get_is_artificial()[type]) { + if (szdp) { + if (!semantics) + continue; // temporary for HDFReport, Binreport which will be skipped in + // bbcore_write of HBPNeuron + nrn_assert(semantics); + } + + for (int i = 0; i < szdp; ++i) { + int s = semantics[i]; + switch(s) { + case -1: // area + transform_int_data(nt._actual_area - nt._data, cnt, pdata, i, szdp, layout, nt.end); + break; + case -9: // diam + transform_int_data(nt._actual_diam - nt._data, cnt, pdata, i, szdp, layout, nt.end); + break; + case -5: // pointer assumes a pointer to membrane voltage + transform_int_data(nt._actual_v - nt._data, cnt, pdata, i, szdp, layout, nt.end); + break; + default: + if (s >= 0 && s < 1000) { // ion + int etype = s; + /* if ion is SoA, must recalculate pdata values */ + /* if ion is AoS, have to deal with offset */ + Memb_list* eml = nt._ml_list[etype]; + int edata0 = eml->data - nt._data; + int ecnt = eml->nodecount; + int esz = corenrn.get_prop_param_size()[etype]; + for (int iml = 0; iml < cnt; ++iml) { + int* pd = pdata + nrn_i_layout(iml, cnt, i, szdp, layout); + int ix = *pd; // relative to the ion data + nrn_assert((ix >= 0) && (ix < ecnt * esz)); + /* Original pd order assumed ecnt groups of esz */ + *pd = edata0 + nrn_param_layout(ix, etype, eml); + } + } + } + } + } + } +} + +void Phase2::set_dependencies(const NrnThread& nt, const std::vector& memb_func) { + /* here we setup the mechanism dependencies. if there is a mechanism dependency + * then we allocate an array for tml->dependencies otherwise set it to nullptr. + * In order to find out the "real" dependencies i.e. dependent mechanism + * exist at the same compartment, we compare the nodeindices of mechanisms + * returned by nrn_mech_depend. + */ + + /* temporary array for dependencies */ + int* mech_deps = (int*)ecalloc(memb_func.size(), sizeof(int)); + + for (auto tml = nt.tml; tml; tml = tml->next) { + /* initialize to null */ + tml->dependencies = nullptr; + tml->ndependencies = 0; + + /* get dependencies from the models */ + int deps_cnt = nrn_mech_depend(tml->index, mech_deps); + + /* if dependencies, setup dependency array */ + if (deps_cnt) { + /* store "real" dependencies in the vector */ + std::vector actual_mech_deps; + + Memb_list* ml = tml->ml; + int* nodeindices = ml->nodeindices; + + /* iterate over dependencies */ + for (int j = 0; j < deps_cnt; j++) { + /* memb_list of dependency mechanism */ + Memb_list* dml = nt._ml_list[mech_deps[j]]; + + /* dependency mechanism may not exist in the model */ + if (!dml) + continue; + + /* take nodeindices for comparison */ + int* dnodeindices = dml->nodeindices; + + /* set_intersection function needs temp vector to push the common values */ + std::vector node_intersection; + + /* make sure they have non-zero nodes and find their intersection */ + if ((ml->nodecount > 0) && (dml->nodecount > 0)) { + std::set_intersection(nodeindices, nodeindices + ml->nodecount, dnodeindices, + dnodeindices + dml->nodecount, + std::back_inserter(node_intersection)); + } + + /* if they intersect in the nodeindices, it's real dependency */ + if (!node_intersection.empty()) { + actual_mech_deps.push_back(mech_deps[j]); + } + } + + /* copy actual_mech_deps to dependencies */ + if (!actual_mech_deps.empty()) { + tml->ndependencies = actual_mech_deps.size(); + tml->dependencies = (int*)ecalloc(actual_mech_deps.size(), sizeof(int)); + std::copy(actual_mech_deps.begin(), actual_mech_deps.end(), tml->dependencies); + } + } + } + + /* free temp dependency array */ + free(mech_deps); +} + +void Phase2::handle_extracon(NrnThread& nt, int n_netcon, const std::vector& pnt_offset) { + int extracon_target_nweight = 0; + if (nrn_setup_extracon > 0) { + // Fill in the extracon target_ and active_. + // Simplistic. + // Rotate through the pntindex and use only pnttype for ProbAMPANMDA_EMS + // (which happens to have a weight vector length of 5.) + // Edge case: if there is no such synapse, let the target_ be nullptr + // and the netcon be inactive. + // Same pattern as algorithm for extracon netcon_srcgid above in phase1. + int extracon_target_type = nrn_get_mechtype("ProbAMPANMDA_EMS"); + assert(extracon_target_type > 0); + extracon_target_nweight = corenrn.get_pnt_receive_size()[extracon_target_type]; + int j = 0; + for (int i = 0; i < nrn_setup_extracon; ++i) { + bool extracon_find = false; + for (int k = 0; k < n_netcon; ++k) { + if (pnttype[j] == extracon_target_type) { + extracon_find = true; + break; + } + j = (j + 1) % n_netcon; + } + NetCon& nc = nt.netcons[i + n_netcon]; + nc.active_ = extracon_find ? 1 : 0; + if (extracon_find) { + nc.target_ = nt.pntprocs + (pnt_offset[extracon_target_type] + pntindex[j]); + } else { + nc.target_ = nullptr; + } + } + } + + { + nt.n_weight = weights.size(); + int nweight = nt.n_weight; + // weights in netcons order in groups defined by Point_process target type. + nt.n_weight += nrn_setup_extracon * extracon_target_nweight; + nt.weights = (double*)ecalloc_align(nt.n_weight, sizeof(double)); + std::copy(weights.begin(), weights.end(), nt.weights); + + int iw = 0; + for (int i = 0; i < n_netcon; ++i) { + NetCon& nc = nt.netcons[i]; + nc.u.weight_index_ = iw; + if (pnttype[i] != 0) { + iw += corenrn.get_pnt_receive_size()[pnttype[i]]; + } else { + iw += 1; + } + } + assert(iw == nweight); + +#if CHKPNTDEBUG + ntc.delay = new double[n_netcon]; + memcpy(ntc.delay, delay, n_netcon * sizeof(double)); +#endif + for (int i = 0; i < n_netcon; ++i) { + NetCon& nc = nt.netcons[i]; + nc.delay_ = delay[i]; + } + + if (nrn_setup_extracon > 0) { + // simplistic. delay is 1 and weight is 0.001 + for (int i = 0; i < nrn_setup_extracon; ++i) { + NetCon& nc = nt.netcons[n_netcon + i]; + nc.delay_ = 1.0; + nc.u.weight_index_ = nweight + i * extracon_target_nweight; + nt.weights[nc.u.weight_index_] = 2.0; // this value 2.0 is extracted from .dat files + } + } + } +} + +void Phase2::get_info_from_bbcore(NrnThread& nt, const std::vector& memb_func) { + // BBCOREPOINTER information +#if CHKPNTDEBUG + ntc.nbcp = num_point_process; + ntc.bcpicnt = new int[num_point_process]; + ntc.bcpdcnt = new int[num_point_process]; + ntc.bcptype = new int[num_point_process]; +#endif + for (size_t i = 0; i < n_mech; ++i) { + int type = mech_types[i]; + if (!corenrn.get_bbcore_read()[type]) { + continue; + } + type = tmls[i].type; // This is not an error, but it has to be fixed I think + if (!corenrn.get_bbcore_write()[type] && nrn_checkpoint_arg_exists) { + fprintf( + stderr, + "Checkpoint is requested involving BBCOREPOINTER but there is no bbcore_write function for %s\n", + memb_func[type].sym); + assert(corenrn.get_bbcore_write()[type]); + } +#if CHKPNTDEBUG + ntc.bcptype[i] = type; + ntc.bcpicnt[i] = icnt; + ntc.bcpdcnt[i] = dcnt; +#endif + int ik = 0; + int dk = 0; + Memb_list* ml = nt._ml_list[type]; + int dsz = corenrn.get_prop_param_size()[type]; + int pdsz = corenrn.get_prop_dparam_size()[type]; + int cntml = ml->nodecount; + int layout = corenrn.get_mech_data_layout()[type]; + for (int j = 0; j < cntml; ++j) { + int jp = j; + if (ml->_permute) { + jp = ml->_permute[j]; + } + double* d = ml->data; + Datum* pd = ml->pdata; + d += nrn_i_layout(jp, cntml, 0, dsz, layout); + pd += nrn_i_layout(jp, cntml, 0, pdsz, layout); + int aln_cntml = nrn_soa_padded_size(cntml, layout); + (*corenrn.get_bbcore_read()[type])(tmls[i].dArray.data(), tmls[i].iArray.data(), &dk, &ik, 0, aln_cntml, d, pd, ml->_thread, + &nt, 0.0); + } + assert(dk == tmls[i].dArray.size()); + assert(ik == tmls[i].iArray.size()); + } +} + +void Phase2::set_vec_play(NrnThread& nt) { + // VecPlayContinuous instances + // No attempt at memory efficiency + nt.n_vecplay = vec_play_continuous.size(); + if (nt.n_vecplay) { + nt._vecplay = new void*[nt.n_vecplay]; + } else { + nt._vecplay = nullptr; + } +#if CHKPNTDEBUG + ntc.vecplay_ix = new int[nt.n_vecplay]; + ntc.vtype = new int[nt.n_vecplay]; + ntc.mtype = new int[nt.n_vecplay]; +#endif + for (int i = 0; i < nt.n_vecplay; ++i) { + auto& vecPlay = vec_play_continuous[i]; + nrn_assert(vecPlay.vtype == VecPlayContinuousType); +#if CHKPNTDEBUG + ntc.vtype[i] = vecPlay.vtype; +#endif +#if CHKPNTDEBUG + ntc.mtype[i] = vecPlay.mtype; +#endif + Memb_list* ml = nt._ml_list[vecPlay.mtype]; +#if CHKPNTDEBUG + ntc.vecplay_ix[i] = vecPlay.ix; +#endif + + vecPlay.ix = nrn_param_layout(vecPlay.ix, vecPlay.mtype, ml); + if (ml->_permute) { + vecPlay.ix = nrn_index_permute(vecPlay.ix, vecPlay.mtype, ml); + } + nt._vecplay[i] = new VecPlayContinuous(ml->data + vecPlay.ix, std::move(vecPlay.yvec), std::move(vecPlay.tvec), nullptr, nt.id); + } +} + +void Phase2::populate(NrnThread& nt, const UserParams& userParams) { + nrn_assert(userParams.imult[nt.id] >= 0); // avoid imult unused warning + NrnThreadChkpnt& ntc = nrnthread_chkpnt[nt.id]; + ntc.file_id = userParams.gidgroups[nt.id]; + + nt.ncell = n_real_output; + nt.end = n_node; + +#if CHKPNTDEBUG + ntc.n_outputgids = n_output; + ntc.nmech = n_mech; +#endif + + /// Checkpoint in coreneuron is defined for both phase 1 and phase 2 since they are written + /// together + nt._ml_list = (Memb_list**)ecalloc_align(corenrn.get_memb_funcs().size(), sizeof(Memb_list*)); + + auto& memb_func = corenrn.get_memb_funcs(); +#if CHKPNTDEBUG + ntc.mlmap = new Memb_list_chkpnt*[memb_func.size()]; + for (int i = 0; i < _memb_func.size(); ++i) { + ntc.mlmap[i] = nullptr; + } +#endif + + nt.stream_id = 0; + nt.compute_gpu = 0; + auto& nrn_prop_param_size_ = corenrn.get_prop_param_size(); + auto& nrn_prop_dparam_size_ = corenrn.get_prop_dparam_size(); + +/* read_phase2 is being called from openmp region + * and hence we can set the stream equal to current thread id. + * In fact we could set gid as stream_id when we will have nrn threads + * greater than number of omp threads. + */ +#if defined(_OPENMP) + nt.stream_id = omp_get_thread_num(); +#endif + + int shadow_rhs_cnt = 0; + nt.shadow_rhs_cnt = 0; + + NrnThreadMembList* tml_last = nullptr; + for (int i = 0; i < n_mech; ++i) { + auto tml = create_tml(i, memb_func[mech_types[i]], shadow_rhs_cnt); + + nt._ml_list[tml->index] = tml->ml; + +#if CHKPNTDEBUG + Memb_list_chkpnt* mlc = new Memb_list_chkpnt; + ntc.mlmap[tml->index] = mlc; +#endif + + if (nt.tml) { + tml_last->next = tml; + } else { + nt.tml = tml; + } + tml_last = tml; + } + + if (shadow_rhs_cnt) { + nt._shadow_rhs = + (double*)ecalloc_align(nrn_soa_padded_size(shadow_rhs_cnt, 0), sizeof(double)); + nt._shadow_d = + (double*)ecalloc_align(nrn_soa_padded_size(shadow_rhs_cnt, 0), sizeof(double)); + nt.shadow_rhs_cnt = shadow_rhs_cnt; + } + + nt.mapping = nullptr; // section segment mapping + + nt._nidata = n_idata; + if (nt._nidata) + nt._idata = (int*)ecalloc(nt._nidata, sizeof(int)); + else + nt._idata = nullptr; + // see patternstim.cpp + int extra_nv = (&nt == nrn_threads) ? nrn_extra_thread0_vdata : 0; + nt._nvdata = n_vdata; + if (nt._nvdata + extra_nv) + nt._vdata = (void**)ecalloc_align(nt._nvdata + extra_nv, sizeof(void*)); + else + nt._vdata = nullptr; + + // The data format begins with the matrix data + int n_data_padded = nrn_soa_padded_size(nt.end, MATRIX_LAYOUT); + nt._data = _data; + nt._actual_rhs = nt._data + 0 * n_data_padded; + nt._actual_d = nt._data + 1 * n_data_padded; + nt._actual_a = nt._data + 2 * n_data_padded; + nt._actual_b = nt._data + 3 * n_data_padded; + nt._actual_v = nt._data + 4 * n_data_padded; + nt._actual_area = nt._data + 5 * n_data_padded; + nt._actual_diam = n_diam ? nt._data + 6 * n_data_padded : nullptr; + + size_t offset = 6 * n_data_padded; + if (n_diam) { + // in the rare case that a mechanism has dparam with diam semantics + // then actual_diam array added after matrix in nt._data + // Generally wasteful since only a few diam are pointed to. + // Probably better to move the diam semantics to the p array of the mechanism + offset += n_data_padded; + } + + // Memb_list.data points into the nt._data array. + // Also count the number of Point_process + int num_point_process = 0; + for (auto tml = nt.tml; tml; tml = tml->next) { + Memb_list* ml = tml->ml; + int type = tml->index; + int layout = corenrn.get_mech_data_layout()[type]; + int n = ml->nodecount; + int sz = nrn_prop_param_size_[type]; + offset = nrn_soa_byte_align(offset); + ml->data = nt._data + offset; + offset += nrn_soa_padded_size(n, layout) * sz; + if (corenrn.get_pnt_map()[type] > 0) { + num_point_process += n; + } + } + nt.pntprocs = (Point_process*)ecalloc_align( + num_point_process, sizeof(Point_process)); // includes acell with and without gid + nt.n_pntproc = num_point_process; + nt._ndata = offset; + + + // matrix info + nt._v_parent_index = v_parent_index; + +#if CHKPNTDEBUG + ntc.parent = new int[nt.end]; + memcpy(ntc.parent, nt._v_parent_index, nt.end * sizeof(int)); + ntc.area = new double[nt.end]; + memcpy(ntc.area, nt._actual_area, nt.end * sizeof(double)); +#endif + + int synoffset = 0; + std::vector pnt_offset(memb_func.size()); + + // All the mechanism data and pdata. + // Also fill in the pnt_offset + // Complete spec of Point_process except for the acell presyn_ field. + int itml = 0; + for (auto tml = nt.tml; tml; tml = tml->next, ++itml) { + int type = tml->index; + Memb_list* ml = tml->ml; + int n = ml->nodecount; + int szp = nrn_prop_param_size_[type]; + int szdp = nrn_prop_dparam_size_[type]; + int layout = corenrn.get_mech_data_layout()[type]; + + ml->nodeindices = (int*)ecalloc_align(ml->nodecount, sizeof(int)); + std::copy(tmls[itml].nodeindices.begin(), tmls[itml].nodeindices.end(), ml->nodeindices); + + mech_data_layout_transform(ml->data, n, szp, layout); + + if (szdp) { + ml->pdata = (int*)ecalloc_align(nrn_soa_padded_size(n, layout) * szdp, sizeof(int)); + std::copy(tmls[itml].pdata.begin(), tmls[itml].pdata.end(), ml->pdata); + mech_data_layout_transform(ml->pdata, n, szdp, layout); + +#if CHKPNTDEBUG // Not substantive. Only for debugging. + Memb_list_ckpnt* mlc = ntc.mlmap[type]; + mlc->pdata_not_permuted = (int*)coreneuron::ecalloc_align(n * szdp, sizeof(int)); + if (layout == Layout::AoS) { // only copy + for (int i = 0; i < n; ++i) { + for (int j = 0; j < szdp; ++j) { + mlc->pdata_not_permuted[i * szdp + j] = ml->pdata[i * szdp + j]; + } + } + } else if (layout == Layout::SoA) { // transpose and unpad + int align_cnt = nrn_soa_padded_size(n, layout); + for (int i = 0; i < n; ++i) { + for (int j = 0; j < szdp; ++j) { + mlc->pdata_not_permuted[i * szdp + j] = ml->pdata[i + j * align_cnt]; + } + } + } +#endif + } else { + ml->pdata = nullptr; + } + if (corenrn.get_pnt_map()[type] > 0) { // POINT_PROCESS mechanism including acell + int cnt = ml->nodecount; + Point_process* pnt = nullptr; + pnt = nt.pntprocs + synoffset; + pnt_offset[type] = synoffset; + synoffset += cnt; + for (int i = 0; i < cnt; ++i) { + Point_process* pp = pnt + i; + pp->_type = type; + pp->_i_instance = i; + nt._vdata[ml->pdata[nrn_i_layout(i, cnt, 1, szdp, layout)]] = pp; + pp->_tid = nt.id; + } + } + } + + if (nrn_have_gaps) { + nrn_partrans::gap_thread_setup(nt); + } + + pdata_relocation(nt, memb_func); + + /* if desired, apply the node permutation. This involves permuting + at least the node parameter arrays for a, b, and area (and diam) and all + integer vector values that index into nodes. This could have been done + when originally filling the arrays with AoS ordered data, but can also + be done now, after the SoA transformation. The latter has the advantage + that the present order is consistent with all the layout values. Note + that after this portion of the permutation, a number of other node index + vectors will be read and will need to be permuted as well in subsequent + sections of this function. + */ + if (interleave_permute_type) { + nt._permute = interleave_order(nt.id, nt.ncell, nt.end, nt._v_parent_index); + } + if (nt._permute) { + int* p = nt._permute; + permute_data(nt._actual_a, nt.end, p); + permute_data(nt._actual_b, nt.end, p); + permute_data(nt._actual_area, nt.end, p); + permute_data(nt._actual_v, nt.end, + p); // need if restore or finitialize does not initialize voltage + if (nt._actual_diam) { + permute_data(nt._actual_diam, nt.end, p); + } + // index values change as well as ordering + permute_ptr(nt._v_parent_index, nt.end, p); + node_permute(nt._v_parent_index, nt.end, p); + +#if DEBUG + for (int i=0; i < nt.end; ++i) { + printf("parent[%d] = %d\n", i, nt._v_parent_index[i]); + } +#endif + + // specify the ml->_permute and sort the nodeindices + for (auto tml = nt.tml; tml; tml = tml->next) { + if (tml->ml->nodeindices) { // not artificial + permute_nodeindices(tml->ml, p); + permute_ml(tml->ml, tml->index, nt); + } + } + + // permute the Point_process._i_instance + for (int i = 0; i < nt.n_pntproc; ++i) { + Point_process& pp = nt.pntprocs[i]; + Memb_list* ml = nt._ml_list[pp._type]; + if (ml->_permute) { + pp._i_instance = ml->_permute[pp._i_instance]; + } + } + } + + if (nrn_have_gaps && interleave_permute_type) { + nrn_partrans::gap_indices_permute(nt); + } + + set_dependencies(nt, memb_func); + + fill_before_after_lists(nt, memb_func); + + // for fast watch statement checking + // setup a list of types that have WATCH statement + { + int sz = 0; // count the types with WATCH + for (auto tml = nt.tml; tml; tml = tml->next) { + if (corenrn.get_watch_check()[tml->index]) { + ++sz; + } + } + if (sz) { + nt._watch_types = (int*)ecalloc(sz + 1, sizeof(int)); // nullptr terminated + sz = 0; + for (auto tml = nt.tml; tml; tml = tml->next) { + if (corenrn.get_watch_check()[tml->index]) { + nt._watch_types[sz++] = tml->index; + } + } + } + } + auto& pnttype2presyn = corenrn.get_pnttype2presyn(); + auto& nrn_has_net_event_ = corenrn.get_has_net_event(); + // from nrn_has_net_event create pnttype2presyn. + if (pnttype2presyn.empty()) { + pnttype2presyn.resize(memb_func.size(), -1); + } + for (size_t i = 0; i < nrn_has_net_event_.size(); ++i) { + pnttype2presyn[nrn_has_net_event_[i]] = i; + } + // create the nt.pnt2presyn_ix array of arrays. + nt.pnt2presyn_ix = (int**)ecalloc(nrn_has_net_event_.size(), sizeof(int*)); + for (size_t i = 0; i < nrn_has_net_event_.size(); ++i) { + Memb_list* ml = nt._ml_list[nrn_has_net_event_[i]]; + if (ml && ml->nodecount > 0) { + nt.pnt2presyn_ix[i] = (int*)ecalloc(ml->nodecount, sizeof(int)); + } + } + + // Real cells are at the beginning of the nt.presyns followed by + // acells (with and without gids mixed together) + // Here we associate the real cells with voltage pointers and + // acell PreSyn with the Point_process. + // nt.presyns order same as output_vindex order +#if CHKPNTDEBUG + ntc.output_vindex = new int[nt.n_presyn]; + memcpy(ntc.output_vindex, output_vindex, nt.n_presyn * sizeof(int)); +#endif + if (nt._permute) { + // only indices >= 0 (i.e. _actual_v indices) will be changed. + node_permute(output_vindex.data(), nt.n_presyn, nt._permute); + } +#if CHKPNTDEBUG + ntc.output_threshold = new double[nt.ncell]; + memcpy(ntc.output_threshold, output_threshold, nt.ncell * sizeof(double)); +#endif + for (int i = 0; i < nt.n_presyn; ++i) { // real cells + PreSyn* ps = nt.presyns + i; + + int ix = output_vindex[i]; + if (ix == -1 && i < nt.ncell) { // real cell without a presyn + continue; + } + if (ix < 0) { + ix = -ix; + int index = ix / 1000; + int type = ix % 1000; + Point_process* pnt = nt.pntprocs + (pnt_offset[type] + index); + ps->pntsrc_ = pnt; + // pnt->_presyn = ps; + int ip2ps = pnttype2presyn[pnt->_type]; + if (ip2ps >= 0) { + nt.pnt2presyn_ix[ip2ps][pnt->_i_instance] = i; + } + if (ps->gid_ < 0) { + ps->gid_ = -1; + } + } else { + assert(ps->gid_ > -1); + ps->thvar_index_ = ix; // index into _actual_v + assert(ix < nt.end); + ps->threshold_ = output_threshold[i]; + } + } + + // initial net_send_buffer size about 1% of number of presyns + // nt._net_send_buffer_size = nt.ncell/100 + 1; + // but, to avoid reallocation complexity on GPU ... + nt._net_send_buffer_size = nt.ncell; + nt._net_send_buffer = (int*)ecalloc_align(nt._net_send_buffer_size, sizeof(int)); + + // do extracon later as the target and weight info + // is not directly in the file + int nnetcon = nt.n_netcon - nrn_setup_extracon; + + // it may happen that Point_process structures will be made unnecessary + // by factoring into NetCon. + +#if CHKPNTDEBUG + ntc.pnttype = new int[nnetcon]; + ntc.pntindex = new int[nnetcon]; + memcpy(ntc.pnttype, pnttype, nnetcon * sizeof(int)); + memcpy(ntc.pntindex, pntindex, nnetcon * sizeof(int)); +#endif + for (int i = 0; i < nnetcon; ++i) { + int type = pnttype[i]; + if (type > 0) { + int index = pnt_offset[type] + pntindex[i]; /// Potentially uninitialized pnt_offset[], + /// check for previous assignments + NetCon& nc = nt.netcons[i]; + nc.target_ = nt.pntprocs + index; + nc.active_ = true; + } + } + + handle_extracon(nt, nnetcon, pnt_offset); + + get_info_from_bbcore(nt, memb_func); + + set_vec_play(nt); + + if (!events.empty()) { + checkpoint_restore_tqueue(nt, *this); + } + + set_net_send_buffer(nt._ml_list, pnt_offset); +} +} // namespace coreneuron + diff --git a/coreneuron/io/phase2.hpp b/coreneuron/io/phase2.hpp new file mode 100644 index 000000000..e299d4995 --- /dev/null +++ b/coreneuron/io/phase2.hpp @@ -0,0 +1,111 @@ +#pragma once + +#include "coreneuron/io/nrn_filehandler.hpp" +#include "coreneuron/io/user_params.hpp" +#include "coreneuron/utils/ivocvect.hpp" + +#include + +namespace coreneuron { +struct NrnThread; +struct NrnThreadMembList; +struct Memb_func; +struct Memb_list; + +class Phase2 { + public: + void read_file(FileHandler& F, const NrnThread& nt); + void read_direct(int thread_id, const NrnThread& nt); + void populate(NrnThread& nt, const UserParams& userParams); + + std::vector preSynConditionEventFlags; + + // All of this is public for nrn_checkpoint + struct EventTypeBase { + double time; + }; + struct NetConType_: public EventTypeBase { + int netcon_index; + }; + struct SelfEventType_: public EventTypeBase { + int target_type; + int point_proc_instance; + int target_instance; + double flag; + int movable; + int weight_index; + }; + struct PreSynType_: public EventTypeBase { + int presyn_index; + }; + struct NetParEvent_: public EventTypeBase { + }; + struct PlayRecordEventType_: public EventTypeBase { + int play_record_type; + int vecplay_index; + }; + + struct VecPlayContinuous_ { + int vtype; + int mtype; + int ix; + IvocVect yvec; + IvocVect tvec; + + int last_index; + int discon_index; + int ubound_index; + }; + std::vector vec_play_continuous; + int patstim_index; + + std::vector>> events; + + private: + void check_mechanism(); + NrnThreadMembList* create_tml(int mech_id, Memb_func& memb_func, int& shadow_rhs_cnt); + void transform_int_data(int elem0, int nodecount, int* pdata, int i, int dparam_size, int layout, int n_node_); + void set_net_send_buffer(Memb_list** ml_list, const std::vector& pnt_offset); + void restore_events(FileHandler& F); + void fill_before_after_lists(NrnThread& nt, const std::vector& memb_func); + void pdata_relocation(const NrnThread& nt, const std::vector& memb_func); + void set_dependencies(const NrnThread& nt, const std::vector& memb_func); + void handle_extracon(NrnThread& nt, int n_netcon, const std::vector& pnt_offset); + void get_info_from_bbcore(NrnThread& nt, const std::vector& memb_func); + void set_vec_play(NrnThread& nt); + + int n_output; + int n_real_output; + int n_node; + int n_diam; // 0 if not needed, else n_node + int n_mech; + std::vector mech_types; + std::vector nodecounts; + int n_idata; + int n_vdata; + int* v_parent_index; + /* TO DO: when this is fixed use it like that + std::vector actual_a; + std::vector actual_b; + std::vector actual_area; + std::vector actual_v; + std::vector actual_diam; + */ + double* _data; + struct TML { + std::vector nodeindices; + std::vector pdata; + int type; + std::vector iArray; + std::vector dArray; + }; + std::vector tmls; + std::vector output_vindex; + std::vector output_threshold; + std::vector pnttype; + std::vector pntindex; + std::vector weights; + std::vector delay; + int num_point_process; +}; +} // namespace coreneuron diff --git a/coreneuron/io/user_params.hpp b/coreneuron/io/user_params.hpp new file mode 100644 index 000000000..f8d938fb4 --- /dev/null +++ b/coreneuron/io/user_params.hpp @@ -0,0 +1,33 @@ +#pragma once + +namespace coreneuron { + +/// This structure is data needed is several part of nrn_setup, phase1 and phase2. +/// Before it was globals variables, group them to give them as a single argument. +/// They have for the most part, nothing related to each other. +struct UserParams { + UserParams(int ngroup_, int* gidgroups_, int* imult_, + const char* path_, const char* restore_path_) + : ngroup(ngroup_) + , gidgroups(gidgroups_) + , imult(imult_) + , path(path_) + , restore_path(restore_path_) + , file_reader(ngroup_) + {} + + /// direct memory mode with neuron, do not open files + /// Number of local cell groups + const int ngroup; + /// Array of cell group numbers (indices) + const int* const gidgroups; + /// Array of duplicate indices. Normally, with nrn_setup_multiple=1, + // they are ngroup values of 0. + const int* const imult; + /// path to dataset file + const char* const path; + /// Dataset path from where simulation is being restored + const char* const restore_path; + std::vector file_reader; +}; +} // namespace coreneuron diff --git a/coreneuron/nrniv/nrniv_decl.h b/coreneuron/nrniv/nrniv_decl.h index a825c5b1a..3f49d03ee 100644 --- a/coreneuron/nrniv/nrniv_decl.h +++ b/coreneuron/nrniv/nrniv_decl.h @@ -32,7 +32,6 @@ THE POSSIBILITY OF SUCH DAMAGE. #include #include #include "coreneuron/network/netcon.hpp" -#include "coreneuron/utils/endianness.hpp" namespace coreneuron { /// Mechanism type to be used from stdindex2ptr and nrn_dblpntr2nrncore (in Neuron) @@ -58,7 +57,7 @@ extern void mk_netcvode(void); extern void nrn_p_construct(void); extern void nrn_setup(const char* filesdat, bool is_mapping_needed, - bool byte_swap, + bool byte_swap, // We keep it for backward API compatibility but it is not used bool run_setup_cleanup = true, const char* datapath = "", const char* restore_path = "", diff --git a/coreneuron/utils/endianness.hpp b/coreneuron/utils/endianness.hpp deleted file mode 100644 index 34c094ce1..000000000 --- a/coreneuron/utils/endianness.hpp +++ /dev/null @@ -1,57 +0,0 @@ -/* -Copyright (c) 2016, Blue Brain Project -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -3. Neither the name of the copyright holder nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef endianness_h -#define endianness_h - -#include - -namespace endian { - - enum endianness { little_endian, big_endian, mixed_endian }; - - static const union { - unsigned char bytes[4]; - uint32_t value; - } endian_check = {{0, 1, 2, 3}}; - - static const enum endianness native_endian = - endian_check.value == 0x03020100ul - ? little_endian - : endian_check.value == 0x00010203ul ? big_endian : mixed_endian; - - static inline bool is_little_endian() { - return native_endian == little_endian; - } - static inline bool is_big_endian() { - return native_endian == big_endian; - } - -} // namespace endian - -#endif // ifndef endianness_h diff --git a/coreneuron/utils/ivocvect.hpp b/coreneuron/utils/ivocvect.hpp index 146f44929..1b39ecc74 100644 --- a/coreneuron/utils/ivocvect.hpp +++ b/coreneuron/utils/ivocvect.hpp @@ -30,19 +30,37 @@ THE POSSIBILITY OF SUCH DAMAGE. #define ivoc_vector_h #include +#include #include "coreneuron/utils/nrnmutdec.h" namespace coreneuron { template class fixed_vector { size_t n_; - MUTDEC public: T* data_; /*making public for openacc copying */ + + fixed_vector() = default; + fixed_vector(size_t n) : n_(n) { data_ = new T[n_]; } + + fixed_vector(const fixed_vector& vec) = delete; + fixed_vector& operator=(const fixed_vector& vec) = delete; + fixed_vector(fixed_vector&& vec) + : data_(nullptr), n_(vec.n_) + { + std::swap(data_, vec.data_); + } + fixed_vector& operator=(fixed_vector&& vec) { + data_ = nullptr; + std::swap(data_, vec.data_); + n_ = vec.n_; + return *this; + } + ~fixed_vector() { delete[] data_; } @@ -64,22 +82,6 @@ class fixed_vector { size_t size() const { return n_; } - -#if defined(_OPENMP) - void mutconstruct(int mkmut) { - if (!mut_) - MUTCONSTRUCT(mkmut) - } -#else - void mutconstruct(int) { - } -#endif - void lock() { - MUTLOCK - } - void unlock() { - MUTUNLOCK - } }; typedef fixed_vector IvocVect; diff --git a/coreneuron/utils/nrnmutdec.h b/coreneuron/utils/nrnmutdec.h index 98749d0f1..a6da91175 100644 --- a/coreneuron/utils/nrnmutdec.h +++ b/coreneuron/utils/nrnmutdec.h @@ -37,6 +37,46 @@ THE POSSIBILITY OF SUCH DAMAGE. #define MUTDEC omp_lock_t* mut_; #define MUTCONSTRUCTED (mut_ != (omp_lock_t*)0) #if defined(__cplusplus) + +// This class respects the requirement *Mutex* +class OMP_Mutex { + public: + // Default constructible + OMP_Mutex() { + omp_init_lock(&mut_); + } + + // Destructible + ~OMP_Mutex() { + omp_destroy_lock(&mut_); + } + + // Not copyable + OMP_Mutex(const OMP_Mutex&) = delete; + OMP_Mutex& operator=(const OMP_Mutex&) = delete; + + // Not movable + OMP_Mutex(const OMP_Mutex&&) = delete; + OMP_Mutex& operator=(const OMP_Mutex&&) = delete; + + // Basic Lockable + void lock() { + omp_set_lock(&mut_); + } + + void unlock() { + omp_unset_lock(&mut_); + } + + // Lockable + bool try_lock() { + return omp_test_lock(&mut_) != 0; + } + + private: + omp_lock_t mut_; +}; + #define MUTCONSTRUCT(mkmut) \ { \ if (mkmut) { \ diff --git a/coreneuron/utils/swap_endian.h b/coreneuron/utils/swap_endian.h deleted file mode 100644 index 49339e9ea..000000000 --- a/coreneuron/utils/swap_endian.h +++ /dev/null @@ -1,461 +0,0 @@ -/* -Copyright (c) 2016, Blue Brain Project -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -3. Neither the name of the copyright holder nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef swap_endian_h -#define swap_endian_h - -#include -#include -#include -#include -#include -#include - -#ifdef SWAP_ENDIAN_ASSERT -#include -#endif - -#if !defined(SWAP_ENDIAN_DISABLE_ASM) && (defined(__AVX2__) || defined(__SSSE3__)) -#include -#endif - -/* required for correct choice of PPC assembly */ -#include "coreneuron/utils/endianness.hpp" - -#if !defined(SWAP_ENDIAN_MAX_UNROLL) -#define SWAP_ENDIAN_MAX_UNROLL 8 -#endif - -#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 -#define SWAP_ENDIAN_BROKEN_MEMCPY -#endif - -#ifdef SWAP_ENDIAN_BROKEN_MEMCPY -#define memcpy(d, s, n) ::endian::impl::safe_memcpy((d), (s), (n)) -namespace endian { - namespace impl { - static inline void* safe_memcpy(void* d, void* s, size_t n) { - char* d_ = (char*)d; - char* s_ = (char*)s; - while (n-- > 0) - *d_++ = *s_++; - return d; - } - } // namespace impl -} // namespace endian -#endif - -namespace endian { - - namespace impl { - template - struct is_pointer { - enum { value = false }; - }; - - template - struct is_pointer { - enum { value = true }; - }; - - struct not_implemented { - static void eval(...) { - abort(); - } - }; - - /** Check to ensure a class is not derived from not_implemented */ - template - struct is_implemented { - typedef char no; - typedef double yes; - static no check(not_implemented*); - static yes check(...); - enum { value = sizeof(check((C*)0)) == sizeof(yes) }; - }; - - template - struct swap_endian_basic : not_implemented {}; - - template - struct swap_endian_basic { - static void eval(unsigned char* d) { - std::reverse(d, d + K); - } - }; - - template - struct swap_endian_fast : not_implemented {}; - - /** Reverse bytes within values of fixed size in buffer - * - * Basic overload does one item at a time, using std::reverse. - * Specialisations provide faster implementations for particular - * item sizes and unrolls. - * - * If a _fast version is implemented (e.g., architecture specific) - * use that in preference. - * - * If Align is true, we can assume that d is aligned to a multiple - * of K*Unroll bytes. - */ - template - struct swap_endian { - static void eval(unsigned char* d) { -#ifdef SWAP_ENDIAN_ASSERT - assert(!Aligned || (uintptr_t)d % (K * Unroll) == 0); -#endif - if (is_implemented >::value) - swap_endian_fast::eval(d); - else if (is_implemented >::value) - swap_endian_basic::eval(d); - else if (Unroll % 2 == 0 || !Aligned) { - swap_endian::eval(d); - swap_endian::eval(d + K * (Unroll / 2)); - if (Unroll % 2) - swap_endian::eval(d + K * (Unroll - 1)); - } else { - // Unroll is odd: can't make guarantees that we're aligned wrt - // (Unroll/2). - swap_endian::eval(d); - swap_endian::eval(d + K * (Unroll / 2)); - swap_endian::eval(d + K * (Unroll - 1)); - } - } - }; - - // This specialization is required ONLY to convince gcc 4.4.7 - // that it will never divide by zero statically. - template - struct swap_endian { - static void eval(unsigned char*) { - } - }; - - template - struct swap_endian<1, Unroll, Aligned> { - static void eval(unsigned char*) { - } - }; - - // specialise swap_endian_basic for integer data sizes - template - struct swap_endian_basic<2, 1, Aligned> { - static void eval(unsigned char* d) { - uint16_t v; - memcpy(&v, d, 2); - v = (uint16_t)((v >> 8u) | (v << 8u)); - memcpy(d, &v, 2); - } - }; - - template - struct swap_endian_basic<4, 1, Aligned> { - static void eval(unsigned char* d) { - uint32_t v; - memcpy(&v, d, 4); - v = (v >> 24) | ((v >> 8) & 0x0000ff00ul) | ((v << 8) & 0x00ff0000ul) | (v << 24); - memcpy(d, &v, 4); - } - }; - - template - struct swap_endian_basic<8, 1, Aligned> { - static void eval(unsigned char* d) { - uint64_t v; - memcpy(&v, d, 8); - v = (v >> 56) | ((v << 40) & 0x00FF000000000000ull) | - ((v << 24) & 0x0000FF0000000000ull) | ((v << 8) & 0x000000FF00000000ull) | - ((v >> 8) & 0x00000000FF000000ull) | ((v >> 24) & 0x0000000000FF0000ull) | - ((v >> 40) & 0x000000000000FF00ull) | (v << 56); - memcpy(d, &v, 8); - } - }; - -#if !defined(SWAP_ENDIAN_DISABLE_ASM) && defined(__PPC64__) - /* generic methods very slow on bgq */ - - template - struct swap_endian_fast<4, 1, Aligned> { - static void eval(unsigned char* d) { - struct chunk_t { - unsigned char x[4]; - }& u = *(chunk_t*)(void*)d; - - uint32_t v; - asm("lwz %[v],%[ldata] \n\t" - "stwbrx %[v],0,%[sdata] \n\t" - : [v] "=&r"(v), "+o"(u) - : [ldata] "o"(u), [sdata] "r"(d) - :); - } - }; - - template <> - struct swap_endian_fast<4, 2, true> { - static void eval(unsigned char* d) { - struct chunk_t { - unsigned char x[8]; - }& u = *(chunk_t*)(void*)d; - - uint64_t v; - if (endian::is_big_endian()) { - asm("ld %[v],%[ldata] \n\t" - "stwbrx %[v],%[word],%[sdata] \n\t" - "srd %[v],%[v],%[shift] \n\t" - "stwbrx %[v],0,%[sdata] \n\t" - : [v] "=&r"(v), "+o"(u) - : [ldata] "o"(u), [sdata] "r"(d), [word] "b"(4), [shift] "b"(32) - :); - } else { - asm("ld %[v],%[ldata] \n\t" - "stwbrx %[v],0,%[sdata] \n\t" - "srd %[v],%[v],%[shift] \n\t" - "stwbrx %[v],%[word],%[sdata] \n\t" - : [v] "=&r"(v), "+o"(u) - : [ldata] "o"(u), [sdata] "r"(d), [word] "b"(4), [shift] "b"(32) - :); - } - } - }; - -#if !defined(__POWER8_VECTOR__) - // 2.06 ISA does not have stdbrx - template <> - struct swap_endian_fast<8, 1, true> { - static void eval(unsigned char* d) { - struct chunk_t { - unsigned char x[8]; - }& u = *(chunk_t*)(void*)d; - - uint64_t v; - if (endian::is_big_endian()) { - asm("ld %[v],%[ldata] \n\t" - "stwbrx %[v],0,%[sdata] \n\t" - "srd %[v],%[v],%[shift] \n\t" - "stwbrx %[v],%[word],%[sdata] \n\t" - : [v] "=&r"(v), "+o"(u) - : [ldata] "o"(u), [sdata] "r"(d), [word] "b"(4), [shift] "b"(32) - :); - } else { - asm("ld %[v],%[ldata] \n\t" - "stwbrx %[v],%[word],%[sdata] \n\t" - "srd %[v],%[v],%[shift] \n\t" - "stwbrx %[v],0,%[sdata] \n\t" - : [v] "=&r"(v), "+o"(u) - : [ldata] "o"(u), [sdata] "r"(d), [word] "b"(4), [shift] "b"(32) - :); - } - } - }; -#else - template <> - struct swap_endian_fast<8, 1, true> { - static void eval(unsigned char* d) { - struct chunk_t { - unsigned char x[8]; - }& u = *(chunk_t*)(void*)d; - - uint64_t v; - asm("ld %[v],%[ldata] \n\t" - "stdbrx %[v],0,%[sdata] \n\t" - : [v] "=&r"(v), "+o"(u) - : [ldata] "o"(u), [sdata] "r"(d) - :); - } - }; - -#endif -#endif - -#if !defined(SWAP_ENDIAN_DISABLE_ASM) && defined(__SSSE3__) - template <> - struct swap_endian_fast<2, 8, true> { - static void eval(unsigned char* d) { - __m128i permute = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - __m128i x = _mm_load_si128((__m128i*)d); - x = _mm_shuffle_epi8(x, permute); - _mm_store_si128((__m128i*)d, x); - } - }; - - template <> - struct swap_endian_fast<4, 4, true> { - static void eval(unsigned char* d) { - __m128i permute = - _mm_setr_epi8(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); - __m128i x = _mm_load_si128((__m128i*)d); - x = _mm_shuffle_epi8(x, permute); - _mm_store_si128((__m128i*)d, x); - } - }; - - template <> - struct swap_endian_fast<8, 2, true> { - static void eval(unsigned char* d) { - __m128i permute = - _mm_setr_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); - __m128i x = _mm_load_si128((__m128i*)d); - x = _mm_shuffle_epi8(x, permute); - _mm_store_si128((__m128i*)d, x); - } - }; -#endif - -#if !defined(SWAP_ENDIAN_DISABLE_ASM) && defined(__AVX2__) - // Modern implementations suffer little or no penalty from unaligned load. - template - struct swap_endian_fast<4, 8, Aligned> { - static void eval(unsigned char* d) { - __m256i permute = - _mm256_setr_epi8(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12, 19, 18, - 17, 16, 23, 22, 21, 20, 27, 26, 25, 24, 31, 30, 29, 28); - __m256i x = _mm256_loadu_si256((__m256i*)d); - x = _mm256_shuffle_epi8(x, permute); - _mm256_storeu_si256((__m256i*)d, x); - } - }; - - template - struct swap_endian_fast<8, 4, Aligned> { - static void eval(unsigned char* d) { - __m256i permute = - _mm256_setr_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, 23, 22, - 21, 20, 19, 18, 17, 16, 31, 30, 29, 28, 27, 26, 25, 24); - __m256i x = _mm256_loadu_si256((__m256i*)d); - x = _mm256_shuffle_epi8(x, permute); - _mm256_storeu_si256((__m256i*)d, x); - } - }; - -#endif - - template - void swap_endian_unroll(V* b, V* e) { - static const size_t n_unroll = SWAP_ENDIAN_MAX_UNROLL; - if (e <= b) - return; - - size_t n = e - b; - bool aligned_vsize = ((uintptr_t)b % sizeof(V) == 0); - - if (n >= n_unroll) { - if (!aligned_vsize) { - /* No guarantees on alignment for swap_endian: elements - are not aligned to multiple of the element size. - This can happen with doubles on some 32-bit ABIs. */ - - while (n >= n_unroll) { - swap_endian::eval((unsigned char*)b); - b += n_unroll; - n -= n_unroll; - } - } else { - /* process elements singly until we get to a n_unroll*sizeof(V) - boundary, and then do an alignment-guaranteed swap_endian. */ - - size_t off_align_count = - (size_t)((uintptr_t)b % (sizeof(V) * n_unroll)) / sizeof(V); - if (off_align_count > 0) { - while (off_align_count++ < n_unroll) { - swap_endian::eval((unsigned char*)b++); - --n; - } - } - -/* b should now be n_unroll*sizeof(V) aligned. */ -#ifdef SWAP_ENDIAN_ASSERT - assert((uintptr_t)b % (sizeof(V) * n_unroll) == 0); -#endif - while (n >= n_unroll) { - swap_endian::eval((unsigned char*)b); - b += n_unroll; - n -= n_unroll; - } - } - } - - if (aligned_vsize) { - while (n-- > 0) - swap_endian::eval((unsigned char*)b++); - } else { - while (n-- > 0) - swap_endian::eval((unsigned char*)b++); - } - } - } // namespace impl - - /** Reverse the endianness of a value in-place. - * - * /tparam T value type - * /param v value to byte-reorder - */ - template - T& swap_endian(T& v) { - impl::swap_endian::eval((unsigned char*)&v); - return v; - } - - namespace impl { - template - struct swap_endian_range_dispatch { - static void eval(I b, I e) { - while (b != e) - ::endian::swap_endian(*b++); - } - }; - - template - struct swap_endian_range_dispatch { - static void eval(I b, I e) { - swap_endian_unroll(b, e); - } - }; - } // namespace impl - - /** Reverse the endianness of the values in the given range. - * - * /tparam I iterator type - * /param b iterator representing the beginning of the range. - * /param e iterator representing the end of the range. - * - * All values in the iterator range [b,e) are byte-reversed. - */ - template - void swap_endian_range(I b, I e) { - impl::swap_endian_range_dispatch::value>::eval(b, e); - } - -} // namespace endian - -#ifdef SWAP_ENDIAN_BROKEN_MEMCPY -#undef memcpy -#endif - -#endif // ifndef swap_endian_h diff --git a/coreneuron/utils/vrecitem.h b/coreneuron/utils/vrecitem.h index 2f0f3b905..752b3e7ef 100644 --- a/coreneuron/utils/vrecitem.h +++ b/coreneuron/utils/vrecitem.h @@ -78,9 +78,8 @@ class PlayRecord { class VecPlayContinuous : public PlayRecord { public: - VecPlayContinuous(double*, IvocVect* yvec, IvocVect* tvec, IvocVect* discon, int ith); + VecPlayContinuous(double*, IvocVect&& yvec, IvocVect&& tvec, IvocVect* discon, int ith); virtual ~VecPlayContinuous(); - void init(IvocVect* yvec, IvocVect* tvec, IvocVect* tdiscon); virtual void play_init(); virtual void deliver(double tt, NetCvode*); virtual PlayRecordEvent* event() { @@ -99,8 +98,8 @@ class VecPlayContinuous : public PlayRecord { return VecPlayContinuousType; } - IvocVect* y_; - IvocVect* t_; + IvocVect y_; + IvocVect t_; IvocVect* discon_indices_; size_t last_index_; size_t discon_index_; diff --git a/coreneuron/utils/vrecord.cpp b/coreneuron/utils/vrecord.cpp index 7c172933b..86a8e470e 100644 --- a/coreneuron/utils/vrecord.cpp +++ b/coreneuron/utils/vrecord.cpp @@ -69,28 +69,18 @@ void PlayRecord::pr() { } VecPlayContinuous::VecPlayContinuous(double* pd, - IvocVect* yvec, - IvocVect* tvec, + IvocVect&& yvec, + IvocVect&& tvec, IvocVect* discon, int ith) - : PlayRecord(pd, ith) { - // printf("VecPlayContinuous\n"); - init(yvec, tvec, discon); -} - -void VecPlayContinuous::init(IvocVect* yvec, IvocVect* tvec, IvocVect* discon) { - y_ = yvec; - t_ = tvec; - discon_indices_ = discon; - ubound_index_ = 0; - last_index_ = 0; - discon_index_ = 0; - e_ = new PlayRecordEvent(); + : PlayRecord(pd, ith) + , y_(std::move(yvec)), t_(std::move(tvec)), discon_indices_(discon) + , last_index_(0), discon_index_(0), ubound_index_(0) + , e_(new PlayRecordEvent{}) { e_->plr_ = this; } VecPlayContinuous::~VecPlayContinuous() { - // printf("~VecPlayContinuous\n"); delete e_; } @@ -102,13 +92,13 @@ void VecPlayContinuous::play_init() { if (discon_indices_->size() > 0) { ubound_index_ = (int)(*discon_indices_)[discon_index_++]; // printf("play_init %d %g\n", ubound_index_, t_->elem(ubound_index_)); - e_->send((*t_)[ubound_index_], net_cvode_instance, nt); + e_->send(t_[ubound_index_], net_cvode_instance, nt); } else { - ubound_index_ = t_->size() - 1; + ubound_index_ = t_.size() - 1; } } else { ubound_index_ = 0; - e_->send((*t_)[ubound_index_], net_cvode_instance, nt); + e_->send(t_[ubound_index_], net_cvode_instance, nt); } } @@ -123,14 +113,14 @@ void VecPlayContinuous::deliver(double tt, NetCvode* ns) { if (discon_index_ < discon_indices_->size()) { ubound_index_ = (int)(*discon_indices_)[discon_index_++]; // printf("after deliver:send %d %g\n", ubound_index_, t_->elem(ubound_index_)); - e_->send((*t_)[ubound_index_], ns, nt); + e_->send(t_[ubound_index_], ns, nt); } else { - ubound_index_ = t_->size() - 1; + ubound_index_ = t_.size() - 1; } } else { - if (ubound_index_ < t_->size() - 1) { + if (ubound_index_ < t_.size() - 1) { ubound_index_++; - e_->send((*t_)[ubound_index_], ns, nt); + e_->send(t_[ubound_index_], ns, nt); } } // clang-format off @@ -150,24 +140,24 @@ void VecPlayContinuous::continuous(double tt) { } double VecPlayContinuous::interpolate(double tt) { - if (tt >= (*t_)[ubound_index_]) { + if (tt >= t_[ubound_index_]) { last_index_ = ubound_index_; if (last_index_ == 0) { // printf("return last tt=%g ubound=%g y=%g\n", tt, t_->elem(ubound_index_), // y_->elem(last_index_)); - return (*y_)[last_index_]; + return y_[last_index_]; } - } else if (tt <= (*t_)[0]) { + } else if (tt <= t_[0]) { last_index_ = 0; // printf("return elem(0) tt=%g t0=%g y=%g\n", tt, t_->elem(0), y_->elem(0)); - return (*y_)[0]; + return y_[0]; } else { search(tt); } - double x0 = (*y_)[last_index_ - 1]; - double x1 = (*y_)[last_index_]; - double t0 = (*t_)[last_index_ - 1]; - double t1 = (*t_)[last_index_]; + double x0 = y_[last_index_ - 1]; + double x1 = y_[last_index_]; + double t0 = t_[last_index_ - 1]; + double t1 = t_[last_index_]; // printf("IvocVectRecorder::continuous tt=%g t0=%g t1=%g theta=%g x0=%g x1=%g\n", tt, t0, t1, // (tt - t0)/(t1 - t0), x0, x1); if (t0 == t1) { @@ -178,10 +168,10 @@ double VecPlayContinuous::interpolate(double tt) { void VecPlayContinuous::search(double tt) { // assert (tt > t_->elem(0) && tt < t_->elem(t_->size() - 1)) - while (tt < (*t_)[last_index_]) { + while (tt < t_[last_index_]) { --last_index_; } - while (tt >= (*t_)[last_index_]) { + while (tt >= t_[last_index_]) { ++last_index_; } } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 6f439c1b2..c647ac1c8 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -22,7 +22,6 @@ find_package(Boost 1.41.0 QUIET if(Boost_FOUND) if(CORENRN_ENABLE_UNIT_TESTS) include_directories(${Boost_INCLUDE_DIRS}) - add_subdirectory(unit/endian) add_subdirectory(unit/cmdline_interface) add_subdirectory(unit/interleave_info) add_subdirectory(unit/alignment) diff --git a/tests/integration/reportinglib/reporting_test.sh.in b/tests/integration/reportinglib/reporting_test.sh.in index 28319366b..6148536f9 100644 --- a/tests/integration/reportinglib/reporting_test.sh.in +++ b/tests/integration/reportinglib/reporting_test.sh.in @@ -1,5 +1,5 @@ #! /bin/sh export OMP_NUM_THREADS=1 -@SRUN_PREFIX@ @CMAKE_BINARY_DIR@/bin/@CMAKE_SYSTEM_PROCESSOR@/special-core --mpi --read-config @TEST_NAME@.conf -chmod +x ./@TEST_NAME@.check -exit `./@TEST_NAME@.check` +@SRUN_PREFIX@ @CMAKE_BINARY_DIR@/bin/@CMAKE_SYSTEM_PROCESSOR@/special-core --mpi --read-config @CMAKE_CURRENT_BINARY_DIR@/@SIM_NAME@/@TEST_NAME@.conf +chmod +x @CMAKE_CURRENT_BINARY_DIR@/@SIM_NAME@/@TEST_NAME@.check +exit `@CMAKE_CURRENT_BINARY_DIR@/@SIM_NAME@/@TEST_NAME@.check` diff --git a/tests/unit/endian/CMakeLists.txt b/tests/unit/endian/CMakeLists.txt deleted file mode 100644 index 24f82cf72..000000000 --- a/tests/unit/endian/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -# ============================================================================= -# Copyright (C) 2016-2019 Blue Brain Project -# -# See top-level LICENSE file for details. -# ============================================================================= - -# Need to test the endian code against aggressive compiler optimization -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_${COMPILER_LANGUAGE}_OPT_AGGRESSIVE}") - -file(GLOB endian_test_src "*.cpp") - -foreach(test_src_file ${endian_test_src}) - get_filename_component(test_name ${test_src_file} NAME) - - add_executable(${test_name}_test_bin ${test_src_file}) - target_link_libraries(${test_name}_test_bin ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY}) - - add_test(NAME ${test_name}_test - COMMAND ${TEST_EXEC_PREFIX} ${CMAKE_CURRENT_BINARY_DIR}/${test_name}_test_bin) - -endforeach() diff --git a/tests/unit/endian/endianness_test.cpp b/tests/unit/endian/endianness_test.cpp deleted file mode 100644 index 2bf9bec60..000000000 --- a/tests/unit/endian/endianness_test.cpp +++ /dev/null @@ -1,107 +0,0 @@ -/* -Copyright (c) 2016, Blue Brain Project -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -3. Neither the name of the copyright holder nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -/* confirm that reported native endianness is big- or little-endian, and that - * this corresponds with our expectations across 16-, 32- and 64-bit integers, - * and floats and doubles (assumes IEEE 4- and 8- byte representation) */ - -#define BOOST_TEST_MODULE NativeEndianCheck -#define BOOST_TEST_MAIN - -#include -#include - -#include -#include -#include - -#include "coreneuron/utils/endianness.hpp" - -BOOST_AUTO_TEST_CASE(confirm_big_or_little) { - BOOST_CHECK(endian::little_endian!=endian::big_endian); - BOOST_CHECK(endian::little_endian!=endian::mixed_endian); - BOOST_CHECK(endian::big_endian!=endian::mixed_endian); - - BOOST_CHECK(endian::native_endian==endian::big_endian || - endian::native_endian==endian::little_endian); -} - -template -struct check_data { - static const unsigned char data[sizeof(T)]; - static const T le,be; - - static void confirm_endian(enum endian::endianness E) { - union { - unsigned char bytes[sizeof(T)]; - T value; - } join; - - memcpy(join.bytes,data,sizeof(T)); - switch (E) { - case endian::little_endian: - BOOST_CHECK_EQUAL(join.value,le); - break; - case endian::big_endian: - BOOST_CHECK_EQUAL(join.value,be); - break; - default: - BOOST_ERROR("Endian neither little nor big"); - } - } -}; - -typedef boost::mpl::list test_value_types; - -#define CHECK_DATA(T,le_,be_)\ -template <> const T check_data::le=le_;\ -template <> const T check_data::be=be_;\ -template <> const unsigned char check_data::data[] - -CHECK_DATA(char,'x','x')={'x'}; - -CHECK_DATA(float,(float)1.1,(float)-428967904.0)= - {0xcd,0xcc,0x8c,0x3f}; - -CHECK_DATA(double,1.1,-1.5423487136706484e-180)= - {0x9a,0x99,0x99,0x99,0x99,0x99,0xf1,0x3f}; - -CHECK_DATA(uint16_t,0xf12e,0x2ef1)= - {0x2e,0xf1}; - -CHECK_DATA(uint32_t,0x192a3b4c,0x4c3b2a19)= - {0x4c,0x3b,0x2a,0x19}; - -CHECK_DATA(uint64_t,0xab13cd24ef350146,0x460135ef24cd13ab)= - {0x46,0x01,0x35,0xef,0x24,0xcd,0x13,0xab}; - -BOOST_AUTO_TEST_CASE_TEMPLATE(correct_endianness,T,test_value_types) { - check_data::confirm_endian(endian::native_endian); -} - diff --git a/tests/unit/endian/swap_endian_common.ipp b/tests/unit/endian/swap_endian_common.ipp deleted file mode 100644 index bb8a017c6..000000000 --- a/tests/unit/endian/swap_endian_common.ipp +++ /dev/null @@ -1,305 +0,0 @@ -/* -Copyright (c) 2016, Blue Brain Project -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -3. Neither the name of the copyright holder nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -/* Wish to run unit tests for swap_endian code under multiple configurations: - * 1. default: SWAP_ENDIAN_DISABLE_ASM and SWAP_ENDIAN_MAX_UNROLL are undef. - * 2. using fallback methods, with SWAP_ENDIAN_DISABLE_ASM defined. - * 3. using a strange unroll factor, with SWAP_ENDIAN_DISABLE_ASM set to 7. - * - * This file presents the common code for testing, to be included in - * the specific test code for each of the above scenarios. - */ - -#define SWAP_ENDIAN_ASSERT -#include "coreneuron/utils/swap_endian.h" - -#ifndef SWAP_ENDIAN_CONFIG -#define SWAP_ENDIAN_CONFIG _ -#endif -#define CONCAT(a,b) a##b -#define CONCAT2(a,b) CONCAT(a,b) - -#define BOOST_TEST_MODULE CONCAT2(SwapEndian,SWAP_ENDIAN_CONFIG) -#define BOOST_TEST_MAIN -#include -#include -#include - -#include -#include -#include - -#include - -#ifdef SWAP_ENDIAN_BROKEN_MEMCPY -#define memcpy(d,s,n) ::endian::impl::safe_memcpy((d),(s),(n)) -#endif - -/* Some of the tests require an unaligned fill: can't ask std::fill to - * do this and not expect issues with alignment */ -template -void fill(T *b,T *e,T value) { - while (b -void run_test_array(T *buf,size_t N,T (*fill_value)(unsigned),T (*check_value)(unsigned)) { - for (size_t i=0;i -void run_test_list(size_t N,T (*fill_value)(unsigned),T (*check_value)(unsigned)) { - std::list data(N); - unsigned j=0; - for (typename std::list::iterator i=data.begin();i!=data.end();++i) *i=fill_value(j++); - - endian::swap_endian_range(data.begin(),data.end()); - - j=0; - for (typename std::list::const_iterator i=data.begin();i!=data.end();++i) { - BOOST_REQUIRE_EQUAL(*i,check_value(j++)); - } -} - -template -void check_small_containers(T (*fill_value)(unsigned),T (*check_value)(unsigned)) { - std::vector data(MAXN); - - for (size_t i=0;i test_value_types; - -template V sample_fill_value(unsigned i=0); -template V sample_check_value(unsigned i=0); - -#define NELEM(arr) (sizeof(arr)/sizeof(arr[0])) - -template <> double sample_fill_value(unsigned i) { - double table[]={ - 1.1, - 12.345e36, - -0.987e-39 - }; - return table[i%NELEM(table)]; -} -template <> double sample_check_value(unsigned i) { - double table[]={ - -1.5423487136706484e-180, - 130023111.64417373, - 2.703780148780151e-145 - }; - return table[i%NELEM(table)]; -} - -template <> uint64_t sample_fill_value(unsigned i) { - uint64_t table[]={ - 0x123456789abcdef0ull, - 0x5342312f1e0dfcebull, - 0xb56cd13e90023347ull, - 0x45b6e72aa9c692b5ull, - 0x2b45634ec623974full - }; - return table[i%NELEM(table)]; -} - -template <> uint64_t sample_check_value(unsigned i) { - uint64_t table[]={ - 0xf0debc9a78563412ull, - 0xebfc0d1e2f314253ull, - 0x473302903ed16cb5ull, - 0xb592c6a92ae7b645ull, - 0x4f9723c64e63452bull - }; - return table[i%NELEM(table)]; -} - - -template <> uint32_t sample_fill_value(unsigned i) { - uint32_t table[]={ - 0x1234567ful, - 0x89abcde0ul, - 0x11223344ul - }; - return table[i%NELEM(table)]; -} - -template <> uint32_t sample_check_value(unsigned i) { - uint32_t table[]={ - 0x7f563412ul, - 0xe0cdab89ul, - 0x44332211ul - }; - return table[i%NELEM(table)]; -} - -template <> float sample_fill_value(unsigned i) { - float table[]={ - (float)1.1, - (float)12.345e12, - (float)-0.987e-13 - }; - return table[i%NELEM(table)]; -} - -template <> float sample_check_value(unsigned i) { - float table[]={ - (float)-428967904.0, - (float)-1.2233891766300076e-06, - (float)-1.0963376907702216e-11 - }; - return table[i%NELEM(table)]; -} - - -template <> uint16_t sample_fill_value(unsigned i) { - uint16_t table[]={ - 0xb2c6u, - 0xa391u, - 0x3456u - }; - return table[i%NELEM(table)]; -} - -template <> uint16_t sample_check_value(unsigned i) { - uint16_t table[]={ - 0xc6b2u, - 0x91a3u, - 0x5634u - }; - return table[i%NELEM(table)]; -} - - -template <> char sample_fill_value(unsigned i) { - char table[]={ - 'z','a','/' - }; - return table[i%NELEM(table)]; -} - -template <> char sample_check_value(unsigned i) { - char table[]={ - 'z','a','/' - }; - return table[i%NELEM(table)]; -} - - -BOOST_AUTO_TEST_CASE_TEMPLATE(small_collection,T,test_value_types) { - check_small_containers(sample_fill_value,sample_check_value); -} - -/* run test over longer (unrollable) array; using N_guard items at front and back of the - * buffer to check against overwrites */ - -template -void run_longer_test_array(T *buf,size_t N,size_t N_guard, - T (*fill_value)(unsigned),T (*check_value)(unsigned),const T &guard_value) -{ - if (N_guard*2>=N) return; - - T *start=buf+N_guard; - size_t count=N-2*N_guard; - - fill(buf,buf+N_guard,guard_value); - for (size_t i=0;i -void check_longer_unroll(T (*fill_value)(unsigned),T (*check_value)(unsigned),const T &guard_value) { - std::vector data((1+unroll_multiple)*SWAP_ENDIAN_MAX_UNROLL); - - size_t n_guard=8; - for (size_t n_over=0;n_over -void check_unroll_badalign(T (*fill_value)(unsigned),T (*check_value)(unsigned),const T &guard_value) { - static const size_t n_item=unroll_multiple*SWAP_ENDIAN_MAX_UNROLL; - std::vector bytes(sizeof(T)*(n_item+1)); - - size_t n_guard=3; - for (size_t offset=0;offset -void fill_with_guard_byte(T *value) { - unsigned char bytes[sizeof(T)]; - for (size_t i=0;i(sample_fill_value,sample_check_value,guard_value); -} - -BOOST_AUTO_TEST_CASE_TEMPLATE(unaligned_array,T,test_value_types) { - T guard_value; - fill_with_guard_byte(&guard_value); - check_unroll_badalign(sample_fill_value,sample_check_value,guard_value); -} - - - diff --git a/tests/unit/endian/swap_endian_default.cpp b/tests/unit/endian/swap_endian_default.cpp deleted file mode 100644 index ddb53fd86..000000000 --- a/tests/unit/endian/swap_endian_default.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* -Copyright (c) 2016, Blue Brain Project -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -3. Neither the name of the copyright holder nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -/* Run endian tests with default configuration of swap_endian.h */ - -#define SWAP_ENDIAN_CONFIG Default -#include "swap_endian_common.ipp" - diff --git a/tests/unit/endian/swap_endian_noasm.cpp b/tests/unit/endian/swap_endian_noasm.cpp deleted file mode 100644 index a337eaf8d..000000000 --- a/tests/unit/endian/swap_endian_noasm.cpp +++ /dev/null @@ -1,40 +0,0 @@ -/* -Copyright (c) 2016, Blue Brain Project -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -3. Neither the name of the copyright holder nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -/* Run endian tests with assembly and intrinsics disabled. - */ - -#define SWAP_ENDIAN_CONFIG DisableAsm - -#ifndef SWAP_ENDIAN_DISABLE_ASM -#define SWAP_ENDIAN_DISABLE_ASM -#endif - -#include "swap_endian_common.ipp" - diff --git a/tests/unit/endian/swap_endian_nounroll.cpp b/tests/unit/endian/swap_endian_nounroll.cpp deleted file mode 100644 index 3b35c6d1e..000000000 --- a/tests/unit/endian/swap_endian_nounroll.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/* -Copyright (c) 2016, Blue Brain Project -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -3. Neither the name of the copyright holder nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -/* Run endian tests with unroll factor forced to be one. - */ - -#define SWAP_ENDIAN_CONFIG NoUnroll -#define SWAP_ENDIAN_MAX_UNROLL 1 - -#include "swap_endian_common.ipp" - diff --git a/tests/unit/endian/swap_endian_oddunroll.cpp b/tests/unit/endian/swap_endian_oddunroll.cpp deleted file mode 100644 index 91fc49e15..000000000 --- a/tests/unit/endian/swap_endian_oddunroll.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* -Copyright (c) 2016, Blue Brain Project -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -3. Neither the name of the copyright holder nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -/* Run endian tests with an unusual, odd unroll factor. - * - * In practice, unroll should be left at the default value or set - * to a power of two. This test ensures correctness even if this - * is not the case. - */ - -#define SWAP_ENDIAN_CONFIG OddUnroll - -#define SWAP_ENDIAN_MAX_UNROLL 5 -#include "swap_endian_common.ipp" -