diff --git a/CMake/ClangFormatHelper.cmake b/CMake/ClangFormatHelper.cmake
index 69494c339..173757f53 100644
--- a/CMake/ClangFormatHelper.cmake
+++ b/CMake/ClangFormatHelper.cmake
@@ -23,12 +23,6 @@ if(CLANG_FORMAT_FOUND)
                     ${CORENEURON_PROJECT_SOURCE_DIR}/coreneuron/*.h*
                     ${CORENEURON_PROJECT_SOURCE_DIR}/coreneuron/*.ipp*)
 
-  # exclude ezoption headers
-  list(REMOVE_ITEM SRC_FILES_FOR_CLANG_FORMAT
-                   ${CORENEURON_PROJECT_SOURCE_DIR}/coreneuron/utils/ezoption/ezOptionParser.hpp
-                   ${CORENEURON_PROJECT_SOURCE_DIR}/coreneuron/utils/endianness.h
-                   ${CORENEURON_PROJECT_SOURCE_DIR}/coreneuron/utils/swap_endian.h)
-
   # exclude random123
   file(GLOB_RECURSE RANDOM123_FILES
                     ${CORENEURON_PROJECT_SOURCE_DIR}/coreneuron/utils/randoms/Random123/*.cpp
diff --git a/coreneuron/apps/main1.cpp b/coreneuron/apps/main1.cpp
index f76e12d46..0f6c3bea0 100644
--- a/coreneuron/apps/main1.cpp
+++ b/coreneuron/apps/main1.cpp
@@ -603,7 +603,7 @@ extern "C" int run_solve_core(int argc, char** argv) {
 
     {
         Instrumentor::phase p("checkpoint");
-        write_checkpoint(nrn_threads, nrn_nthread, corenrn_param.checkpointpath.c_str(), nrn_need_byteswap);
+        write_checkpoint(nrn_threads, nrn_nthread, corenrn_param.checkpointpath.c_str());
     }
 
     // must be done after checkpoint (to avoid deleting events)
diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp
index 52c41d071..a9237e542 100644
--- a/coreneuron/gpu/nrn_acc_manager.cpp
+++ b/coreneuron/gpu/nrn_acc_manager.cpp
@@ -22,7 +22,7 @@
 #endif
 namespace coreneuron {
 extern InterleaveInfo* interleave_info;
-void copy_ivoc_vect_to_device(IvocVect*& iv, IvocVect*& div);
+void copy_ivoc_vect_to_device(const IvocVect& iv, IvocVect& div);
 void nrn_ion_global_map_copyto_device();
 void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay);
 
@@ -389,21 +389,19 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) {
 #endif
 }
 
-void copy_ivoc_vect_to_device(IvocVect*& iv, IvocVect*& div) {
+void copy_ivoc_vect_to_device(const IvocVect& from, IvocVect& to) {
 #ifdef _OPENACC
-    if (iv) {
-        IvocVect* d_iv = (IvocVect*)acc_copyin(iv, sizeof(IvocVect));
-        acc_memcpy_to_device(&div, &d_iv, sizeof(IvocVect*));
-
-        size_t n = iv->size();
-        if (n) {
-            double* d_data = (double*)acc_copyin(iv->data(), sizeof(double) * n);
-            acc_memcpy_to_device(&(d_iv->data_), &d_data, sizeof(double*));
-        }
+    IvocVect* d_iv = (IvocVect*)acc_copyin((void*)&from, sizeof(IvocVect));
+    acc_memcpy_to_device(&to, d_iv, sizeof(IvocVect));
+
+    size_t n = from.size();
+    if (n) {
+        double* d_data = (double*)acc_copyin((void*)from.data(), sizeof(double) * n);
+        acc_memcpy_to_device(&(d_iv->data_), &d_data, sizeof(double*));
     }
 #else
-    (void)iv;
-    (void)div;
+    (void)from;
+    (void)to;
 #endif
 }
 
@@ -1004,8 +1002,10 @@ void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay) {
         /** copy y_, t_ and discon_indices_ */
         copy_ivoc_vect_to_device(vecplay_instance->y_, d_vecplay_instance->y_);
         copy_ivoc_vect_to_device(vecplay_instance->t_, d_vecplay_instance->t_);
-        copy_ivoc_vect_to_device(vecplay_instance->discon_indices_,
-                                 d_vecplay_instance->discon_indices_);
+        if (vecplay_instance->discon_indices_) {
+            copy_ivoc_vect_to_device(*(vecplay_instance->discon_indices_),
+                                 *(d_vecplay_instance->discon_indices_));
+         }
 
         /** copy PlayRecordEvent : todo: verify this */
         PlayRecordEvent* d_e_ =
diff --git a/coreneuron/io/mem_layout_util.cpp b/coreneuron/io/mem_layout_util.cpp
new file mode 100644
index 000000000..dfc7ee0f0
--- /dev/null
+++ b/coreneuron/io/mem_layout_util.cpp
@@ -0,0 +1,56 @@
+#include "mem_layout_util.hpp"
+
+namespace coreneuron {
+
+/// calculate size after padding for specific memory layout
+// Warning: this function is declared extern in nrniv_decl.h
+int nrn_soa_padded_size(int cnt, int layout) {
+    return soa_padded_size<NRN_SOA_PAD>(cnt, layout);
+}
+
+/// return the new offset considering the byte aligment settings
+size_t nrn_soa_byte_align(size_t size) {
+    if (LAYOUT == Layout::SoA) {
+        size_t dbl_align = NRN_SOA_BYTE_ALIGN / sizeof(double);
+        size_t remainder = size % dbl_align;
+        if (remainder) {
+            size += dbl_align - remainder;
+        }
+        nrn_assert((size * sizeof(double)) % NRN_SOA_BYTE_ALIGN == 0);
+    }
+    return size;
+}
+
+int nrn_i_layout(int icnt, int cnt, int isz, int sz, int layout) {
+    switch(layout) {
+      case Layout::AoS:
+        return icnt * sz + isz;
+      case Layout::SoA:
+        int padded_cnt = nrn_soa_padded_size(cnt, layout);  // may want to factor out to save time
+        return icnt + isz * padded_cnt;
+    }
+
+    nrn_assert(false);
+    return 0;
+}
+
+// file data is AoS. ie.
+// organized as cnt array instances of mtype each of size sz.
+// So input index i refers to i_instance*sz + i_item offset
+// Return the corresponding SoA index -- taking into account the
+// alignment requirements. Ie. i_instance + i_item*align_cnt.
+
+int nrn_param_layout(int i, int mtype, Memb_list* ml) {
+    int layout = corenrn.get_mech_data_layout()[mtype];
+    switch(layout) {
+      case Layout::AoS:
+        return i;
+      case Layout::SoA:
+        nrn_assert(layout == Layout::SoA);
+        int sz = corenrn.get_prop_param_size()[mtype];
+        return nrn_i_layout(i / sz, ml->nodecount, i % sz, sz, layout);
+    }
+    nrn_assert(false);
+    return 0;
+}
+}  // namespace coreneuron
diff --git a/coreneuron/io/mem_layout_util.hpp b/coreneuron/io/mem_layout_util.hpp
new file mode 100644
index 000000000..b47d7373d
--- /dev/null
+++ b/coreneuron/io/mem_layout_util.hpp
@@ -0,0 +1,45 @@
+#pragma once
+
+#include "coreneuron/coreneuron.hpp"
+#include "coreneuron/nrniv/nrniv_decl.h"
+
+namespace coreneuron {
+
+#if !defined(NRN_SOA_PAD)
+// for layout 0, every range variable array must have a size which
+// is a multiple of NRN_SOA_PAD doubles
+#define NRN_SOA_PAD 8
+#endif
+
+// If MATRIX_LAYOUT is 1 then a,b,d,rhs,v,area is not padded using NRN_SOA_PAD
+// When MATRIX_LAYOUT is 0 then mechanism pdata index values into _actual_v
+// and _actual_area data need to be updated.
+enum Layout {
+    SoA = 0,
+    AoS = 1
+};
+
+#if !defined(LAYOUT)
+#define LAYOUT Layout::AoS
+#define MATRIX_LAYOUT Layout::AoS
+#else
+#define MATRIX_LAYOUT LAYOUT
+#endif
+
+/// return the new offset considering the byte aligment settings
+size_t nrn_soa_byte_align(size_t i); 
+
+/// This function return the index in a flat array of a matrix coordinate (icnt, isz).
+/// The matrix size is (cnt, sz)
+/// Depending of the layout some padding can be calculated
+int nrn_i_layout(int icnt, int cnt, int isz, int sz, int layout);
+
+// file data is AoS. ie.
+// organized as cnt array instances of mtype each of size sz.
+// So input index i refers to i_instance*sz + i_item offset
+// Return the corresponding SoA index -- taking into account the
+// alignment requirements. Ie. i_instance + i_item*align_cnt.
+
+int nrn_param_layout(int i, int mtype, Memb_list* ml);
+}  // namespace coreneuron
+
diff --git a/coreneuron/io/nrn_checkpoint.cpp b/coreneuron/io/nrn_checkpoint.cpp
index ca1029e62..fc995030a 100644
--- a/coreneuron/io/nrn_checkpoint.cpp
+++ b/coreneuron/io/nrn_checkpoint.cpp
@@ -28,6 +28,7 @@ THE POSSIBILITY OF SUCH DAMAGE.
 #include <iostream>
 #include <sstream>
 #include <cassert>
+#include <memory>
 
 #include "coreneuron/sim/multicore.hpp"
 #include "coreneuron/nrniv/nrniv_decl.h"
@@ -60,11 +61,11 @@ class FileHandlerWrap {
     std::fstream G;
     FileHandlerWrap(){};
 
-    void open(const char* filename, bool reorder, std::ios::openmode mode = std::ios::in) {
-        F.open(filename, reorder, mode);
+    void open(const std::string& filename, std::ios::openmode mode = std::ios::in) {
+        F.open(filename, mode);
         std::ostringstream fname;
         fname << filename << ".txt";
-        G.open(fname.str().c_str(), mode);
+        G.open(fname.str(), mode);
     }
 
     void close() {
@@ -143,13 +144,12 @@ int patstimtype;
 
 // output directory to for checkpoint
 static const char* output_dir;
-static bool swap_bytes;
 
 static void write_phase2(NrnThread& nt, FileHandlerWrap& file_handle);
 static void write_tqueue(NrnThread& nt, FileHandlerWrap& file_handle);
 static void write_time(const char* dir);
 
-void write_checkpoint(NrnThread* nt, int nb_threads, const char* dir, bool swap_bytes_order) {
+void write_checkpoint(NrnThread* nt, int nb_threads, const char* dir) {
     // empty directory means the option is not enabled
     if (!strlen(dir)) {
         return;
@@ -162,7 +162,6 @@ void write_checkpoint(NrnThread* nt, int nb_threads, const char* dir, bool swap_
 #if NRNMPI
     nrnmpi_barrier();
 #endif
-    swap_bytes = swap_bytes_order;
 
     /**
      * if openmp threading needed:
@@ -189,7 +188,7 @@ static void write_phase2(NrnThread& nt, FileHandlerWrap& fh) {
     NrnThreadChkpnt& ntc = nrnthread_chkpnt[nt.id];
     filename << output_dir << "/" << ntc.file_id << "_2.dat";
 
-    fh.open(filename.str().c_str(), swap_bytes, std::ios::out);
+    fh.open(filename.str(), std::ios::out);
     fh.checkpoint(2);
 
     int n_outputgid = 0;  // calculate PreSyn with gid >= 0
@@ -580,10 +579,10 @@ static void write_phase2(NrnThread& nt, FileHandlerWrap& fh) {
 #endif
         if (vtype == VecPlayContinuousType) {
             VecPlayContinuous* vpc = (VecPlayContinuous*)pr;
-            int sz = vector_capacity(vpc->y_);
+            int sz = vpc->y_.size();
             fh << sz << "\n";
-            fh.write_array<double>(vector_vec(vpc->y_), sz);
-            fh.write_array<double>(vector_vec(vpc->t_), sz);
+            fh.write_array<double>(vpc->y_.data(), sz);
+            fh.write_array<double>(vpc->t_.data(), sz);
         } else {
             std::cerr << "Error checkpointing vecplay type" << std::endl;
             assert(0);
@@ -605,7 +604,7 @@ static void write_time(const char* output_dir) {
     std::ostringstream filename;
     FileHandler f;
     filename << output_dir << "/time.dat";
-    f.open(filename.str().c_str(), swap_bytes, std::ios::out);
+    f.open(filename.str(), std::ios::out);
     f.write_array(&t, 1);
     f.close();
 }
@@ -617,7 +616,7 @@ double restore_time(const char* restore_dir) {
         std::ostringstream filename;
         FileHandler f;
         filename << restore_dir << "/time.dat";
-        f.open(filename.str().c_str(), swap_bytes, std::ios::in);
+        f.open(filename.str(), std::ios::in);
         f.read_array(&rtime, 1);
         f.close();
     }
@@ -698,48 +697,40 @@ static void write_tqueue(TQItem* q, NrnThread& nt, FileHandlerWrap& fh) {
 static int patstim_index;
 static double patstim_te;
 
-static void checkpoint_restore_tqitem(int type, NrnThread& nt, FileHandler& fh) {
-    double te;
-    fh.read_array(&te, 1);
-    // printf("restore tqitem type=%d te=%.20g\n", type, te);
+static void checkpoint_restore_tqitem(int type, std::shared_ptr<Phase2::EventTypeBase> event, NrnThread& nt) {
+    // printf("restore tqitem type=%d time=%.20g\n", type, time);
 
     switch (type) {
         case NetConType: {
-            int ncindex = fh.read_int();
-            // printf("  NetCon %d\n", ncindex);
-            NetCon* nc = nt.netcons + ncindex;
-            nc->send(te, net_cvode_instance, &nt);
+            auto e = static_cast<Phase2::NetConType_*>(event.get());
+            // printf("  NetCon %d\n", netcon_index);
+            NetCon* nc = nt.netcons + e->netcon_index;
+            nc->send(e->time, net_cvode_instance, &nt);
             break;
         }
         case SelfEventType: {
-            int target_type = fh.read_int();  // not really needed (except for assert below)
-            int pinstance = fh.read_int();
-            int target_instance = fh.read_int();
-            double flag;
-            fh.read_array(&flag, 1);
-            int movable = fh.read_int();
-            int weight_index = fh.read_int();
-            if (target_type == patstimtype) {
+            auto e = static_cast<Phase2::SelfEventType_*>(event.get());
+            if (e->target_type == patstimtype) {
                 if (nt.id == 0) {
-                    patstim_te = te;
+                    patstim_te = e->time;
                 }
                 break;
             }
-            Point_process* pnt = nt.pntprocs + pinstance;
-            // printf("  SelfEvent %d %d %d %g %d %d\n", target_type, pinstance, target_instance,
+            Point_process* pnt = nt.pntprocs + e->point_proc_instance;
+            // printf("  SelfEvent %d %d %d %g %d %d\n", target_type, point_proc_instance, target_instance,
             // flag, movable, weight_index);
-            assert(target_instance == pnt->_i_instance);
-            assert(target_type == pnt->_type);
-            net_send(nt._vdata + movable, weight_index, pnt, te, flag);
+            nrn_assert(e->target_instance == pnt->_i_instance);
+            nrn_assert(e->target_type == pnt->_type);
+            net_send(nt._vdata + e->movable, e->weight_index, pnt, e->time, e->flag);
             break;
         }
         case PreSynType: {
-            int psindex = fh.read_int();
-            // printf("  PreSyn %d\n", psindex);
-            PreSyn* ps = nt.presyns + psindex;
+            auto e = static_cast<Phase2::PreSynType_*>(event.get());
+            // printf("  PreSyn %d\n", presyn_index);
+            PreSyn* ps = nt.presyns + e->presyn_index;
             int gid = ps->output_index_;
             ps->output_index_ = -1;
-            ps->send(te, net_cvode_instance, &nt);
+            ps->send(e->time, net_cvode_instance, &nt);
             ps->output_index_ = gid;
             break;
         }
@@ -749,13 +740,9 @@ static void checkpoint_restore_tqitem(int type, NrnThread& nt, FileHandler& fh)
             break;
         }
         case PlayRecordEventType: {
-            int prtype = fh.read_int();
-            if (prtype == VecPlayContinuousType) {
-                VecPlayContinuous* vpc = (VecPlayContinuous*)(nt._vecplay[fh.read_int()]);
-                vpc->e_->send(te, net_cvode_instance, &nt);
-            } else {
-                assert(0);
-            }
+            auto e = static_cast<Phase2::PlayRecordEventType_*>(event.get());
+            VecPlayContinuous* vpc = (VecPlayContinuous*)(nt._vecplay[e->vecplay_index]);
+            vpc->e_->send(e->time, net_cvode_instance, &nt);
             break;
         }
         default: {
@@ -817,38 +804,50 @@ static void write_tqueue(NrnThread& nt, FileHandlerWrap& fh) {
 
 static bool checkpoint_restored_ = false;
 
-void checkpoint_restore_tqueue(NrnThread& nt, FileHandler& fh) {
+// Read a tqueue/checkpoint
+// int :: should be equal to the previous n_vecplay
+// n_vecplay:
+//   int: last_index
+//   int: discon_index
+//   int: ubound_index
+// int: patstim_index
+// int: should be -1
+// n_presyn:
+//   int: flags of presyn_helper
+// int: should be -1
+// null terminated:
+//   int: type
+//   array of size 1:
+//     double: time
+//   ... depends of the type
+// int: should be -1
+// null terminated:
+//   int: TO BE DEFINED
+//   ... depends of the type
+void checkpoint_restore_tqueue(NrnThread& nt, const Phase2& p2) {
     int type;
     checkpoint_restored_ = true;
 
-    // VecPlayContinuous
-    assert(fh.read_int() == nt.n_vecplay);  // VecPlayContinuous state
     for (int i = 0; i < nt.n_vecplay; ++i) {
         VecPlayContinuous* vpc = (VecPlayContinuous*)nt._vecplay[i];
-        vpc->last_index_ = fh.read_int();
-        vpc->discon_index_ = fh.read_int();
-        vpc->ubound_index_ = fh.read_int();
+        auto& vec = p2.vec_play_continuous[i];
+        vpc->last_index_ = vec.last_index;
+        vpc->discon_index_ = vec.discon_index;
+        vpc->ubound_index_ = vec.ubound_index;
     }
 
     // PatternStim
-    patstim_index = fh.read_int();  // PatternStim
+    patstim_index = p2.patstim_index;  // PatternStim
     if (nt.id == 0) {
         patstim_te = -1.0;  // changed if relevant SelfEvent;
     }
 
-    assert(fh.read_int() == -1);  // -1 PreSyn ConditionEvent flags
     for (int i = 0; i < nt.n_presyn; ++i) {
-        nt.presyns_helper[i].flag_ = fh.read_int();
-    }
-
-    assert(fh.read_int() == -1);  // -1 TQItems from atomic_dq
-    while ((type = fh.read_int()) != 0) {
-        checkpoint_restore_tqitem(type, nt, fh);
+        nt.presyns_helper[i].flag_ = p2.preSynConditionEventFlags[i];
     }
 
-    assert(fh.read_int() == -1);  // -1 TQItems from binq_
-    while ((type - fh.read_int()) != 0) {
-        checkpoint_restore_tqitem(type, nt, fh);
+    for (const auto& event: p2.events) {
+        checkpoint_restore_tqitem(event.first, event.second, nt);
     }
 }
 
diff --git a/coreneuron/io/nrn_checkpoint.hpp b/coreneuron/io/nrn_checkpoint.hpp
index 1b62da337..2371de1e8 100644
--- a/coreneuron/io/nrn_checkpoint.hpp
+++ b/coreneuron/io/nrn_checkpoint.hpp
@@ -28,6 +28,9 @@ THE POSSIBILITY OF SUCH DAMAGE.
 
 #ifndef _H_NRNCHECKPOINT_
 #define _H_NRNCHECKPOINT_
+
+#include "coreneuron/io/phase2.hpp"
+
 namespace coreneuron {
 class NrnThread;
 class FileHandler;
@@ -36,10 +39,9 @@ extern bool nrn_checkpoint_arg_exists;
 
 void write_checkpoint(NrnThread* nt,
                       int nb_threads,
-                      const char* dir,
-                      bool swap_bytes_order = false);
+                      const char* dir);
 
-void checkpoint_restore_tqueue(NrnThread&, FileHandler&);
+void checkpoint_restore_tqueue(NrnThread&, const Phase2& p2);
 
 int* inverse_permute(int* p, int n);
 void nrn_inverse_i_layout(int i, int& icnt, int cnt, int& isz, int sz, int layout);
diff --git a/coreneuron/io/nrn_filehandler.cpp b/coreneuron/io/nrn_filehandler.cpp
index 4631a41a4..8045823a3 100644
--- a/coreneuron/io/nrn_filehandler.cpp
+++ b/coreneuron/io/nrn_filehandler.cpp
@@ -31,24 +31,23 @@ THE POSSIBILITY OF SUCH DAMAGE.
 #include "coreneuron/nrnconf.h"
 
 namespace coreneuron {
-FileHandler::FileHandler(const char* filename, bool reorder) {
-    this->open(filename, reorder);
-    checkpoint(0);
-    stored_chkpnt = 0;
+FileHandler::FileHandler(const std::string& filename)
+    : chkpnt(0), stored_chkpnt(0) {
+    this->open(filename);
 }
 
-bool FileHandler::file_exist(const char* filename) const {
+bool FileHandler::file_exist(const std::string& filename) const {
     struct stat buffer;
-    return (stat(filename, &buffer) == 0);
+    return (stat(filename.c_str(), &buffer) == 0);
 }
 
-void FileHandler::open(const char* filename, bool reorder, std::ios::openmode mode) {
+void FileHandler::open(const std::string& filename, std::ios::openmode mode) {
     nrn_assert((mode & (std::ios::in | std::ios::out)));
-    reorder_bytes = reorder;
     close();
     F.open(filename, mode | std::ios::binary);
-    if (!F.is_open())
-        fprintf(stderr, "cannot open file %s\n", filename);
+    if (!F.is_open()) {
+        std::cerr << "cannot open file '" << filename << "'" << std::endl;
+    }
     nrn_assert(F.is_open());
     current_mode = mode;
     char version[256];
diff --git a/coreneuron/io/nrn_filehandler.hpp b/coreneuron/io/nrn_filehandler.hpp
index b2336ce3c..3242244cd 100644
--- a/coreneuron/io/nrn_filehandler.hpp
+++ b/coreneuron/io/nrn_filehandler.hpp
@@ -34,8 +34,6 @@ THE POSSIBILITY OF SUCH DAMAGE.
 #include <vector>
 #include <sys/stat.h>
 
-#include "coreneuron/utils/endianness.hpp"
-#include "coreneuron/utils/swap_endian.h"
 #include "coreneuron/utils/nrn_assert.h"
 
 namespace coreneuron {
@@ -43,11 +41,7 @@ namespace coreneuron {
  *
  * Error handling is simple: abort()!
  *
- * Reader will abort() if native integer size is not 4 bytes,
- * or if need a convoluted byte reordering between native
- * and file endianness. Complicated endianness configurations and
- * the possibility of non-IEEE floating point representations
- * are happily ignored.
+ * Reader will abort() if native integer size is not 4 bytes.
  *
  * All automatic allocations performed by read_int_array()
  * and read_dbl_array() methods use new [].
@@ -58,7 +52,6 @@ const int max_line_length = 1024;
 
 class FileHandler {
     std::fstream F;                        //!< File stream associated with reader.
-    bool reorder_bytes;                    //!< True if we need to reorder for native endiannes.
     std::ios_base::openmode current_mode;  //!< File open mode (not stored in fstream)
     int chkpnt;                            //!< Current checkpoint number state.
     int stored_chkpnt;                     //!< last "remembered" checkpoint number state.
@@ -75,20 +68,20 @@ class FileHandler {
     FileHandler& operator=(const FileHandler&);
 
   public:
-    FileHandler() : reorder_bytes(false), chkpnt(0), stored_chkpnt(0) {
+    FileHandler() : chkpnt(0), stored_chkpnt(0) {
     }
 
-    explicit FileHandler(const char* filename, bool reorder = false);
+    explicit FileHandler(const std::string& filename);
 
     /** Preserving chkpnt state, move to a new file. */
-    void open(const char* filename, bool reorder, std::ios::openmode mode = std::ios::in);
+    void open(const std::string& filename, std::ios::openmode mode = std::ios::in);
 
     /** Is the file not open */
     bool fail() const {
         return F.fail();
     }
 
-    bool file_exist(const char* filename) const;
+    bool file_exist(const std::string& filename) const;
 
     /** nothing more to read */
     bool eof();
@@ -194,8 +187,6 @@ class FileHandler {
                 break;
             case read:
                 F.read((char*)p, count * sizeof(T));
-                if (reorder_bytes)
-                    endian::swap_endian_range(p, p + count);
                 break;
         }
 
@@ -205,7 +196,7 @@ class FileHandler {
 
     // convenience interfaces:
 
-    /** Read and optionally allocate an integer array of fixed length. */
+    /** Read an integer array of fixed length. */
     template <typename T>
     inline T* read_array(T* p, size_t count) {
         return parse_array(p, count, read);
@@ -217,6 +208,13 @@ class FileHandler {
         return parse_array(new T[count], count, read);
     }
 
+    template <typename T>
+    inline std::vector<T> read_vector(size_t count) {
+        std::vector<T> vec(count);
+        parse_array(vec.data(), count, read);
+        return vec;
+    }
+
     /** Close currently open file. */
     void close();
 
@@ -226,13 +224,7 @@ class FileHandler {
         nrn_assert(F.is_open());
         nrn_assert(current_mode & std::ios::out);
         write_checkpoint();
-        if (reorder_bytes) {
-            endian::swap_endian_range(p, p + nb_elements);
-            F.write((const char*)p, nb_elements * (sizeof(T)));
-            endian::swap_endian_range(p, p + nb_elements);
-        } else {
-            F.write((const char*)p, nb_elements * (sizeof(T)));
-        }
+        F.write((const char*)p, nb_elements * (sizeof(T)));
         nrn_assert(!F.fail());
     }
 
@@ -258,9 +250,6 @@ class FileHandler {
         } else {
             memcpy(temp_cpy, p, nb_elements * sizeof(T) * nb_lines);
         }
-        if (reorder_bytes) {
-            endian::swap_endian_range(temp_cpy, temp_cpy + nb_elements * nb_lines);
-        }
         // AoS never use padding, SoA is translated above, so one write
         // operation is enought in both cases
         F.write((const char*)temp_cpy, nb_elements * sizeof(T) * nb_lines);
diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp
index 97329fe54..f629f469e 100644
--- a/coreneuron/io/nrn_setup.cpp
+++ b/coreneuron/io/nrn_setup.cpp
@@ -30,6 +30,7 @@ THE POSSIBILITY OF SUCH DAMAGE.
 #include <vector>
 #include <map>
 #include <cstring>
+#include <mutex>
 
 #include "coreneuron/apps/corenrn_parameters.hpp"
 #include "coreneuron/nrnconf.h"
@@ -37,7 +38,6 @@ THE POSSIBILITY OF SUCH DAMAGE.
 #include "coreneuron/sim/multicore.hpp"
 #include "coreneuron/nrniv/nrniv_decl.h"
 #include "coreneuron/sim/fast_imem.hpp"
-#include "coreneuron/utils/vrecitem.h"
 #include "coreneuron/network/multisend.hpp"
 #include "coreneuron/utils/nrn_assert.h"
 #include "coreneuron/utils/nrnmutdec.h"
@@ -49,10 +49,11 @@ THE POSSIBILITY OF SUCH DAMAGE.
 #include "coreneuron/permute/cellorder.hpp"
 #include "coreneuron/io/nrnsection_mapping.hpp"
 #include "coreneuron/utils/nrnoc_aux.hpp"
+#include "coreneuron/io/phase1.hpp"
+#include "coreneuron/io/phase2.hpp"
 
 // callbacks into nrn/src/nrniv/nrnbbcore_write.cpp
 #include "coreneuron/sim/fast_imem.hpp"
-#include "coreneuron/io/nrn2core_direct.h"
 #include "coreneuron/coreneuron.hpp"
 
 
@@ -71,67 +72,8 @@ void (*nrn2core_get_partrans_setup_info_)(int tid,
                                           int*& sid_src,
                                           int*& v_indices);
 
-int (*nrn2core_get_dat1_)(int tid,
-                          int& n_presyn,
-                          int& n_netcon,
-                          int*& output_gid,
-                          int*& netcon_srcgid);
-
-int (*nrn2core_get_dat2_1_)(int tid,
-                            int& ngid,
-                            int& n_real_gid,
-                            int& nnode,
-                            int& ndiam,
-                            int& nmech,
-                            int*& tml_index,
-                            int*& ml_nodecount,
-                            int& nidata,
-                            int& nvdata,
-                            int& nweight);
-
-int (*nrn2core_get_dat2_2_)(int tid,
-                            int*& v_parent_index,
-                            double*& a,
-                            double*& b,
-                            double*& area,
-                            double*& v,
-                            double*& diamvec);
-
-int (*nrn2core_get_dat2_mech_)(int tid,
-                               size_t i,
-                               int dsz_inst,
-                               int*& nodeindices,
-                               double*& data,
-                               int*& pdata);
-
-int (*nrn2core_get_dat2_3_)(int tid,
-                            int nweight,
-                            int*& output_vindex,
-                            double*& output_threshold,
-                            int*& netcon_pnttype,
-                            int*& netcon_pntindex,
-                            double*& weights,
-                            double*& delays);
-
-int (*nrn2core_get_dat2_corepointer_)(int tid, int& n);
-
-int (*nrn2core_get_dat2_corepointer_mech_)(int tid,
-                                           int type,
-                                           int& icnt,
-                                           int& dcnt,
-                                           int*& iarray,
-                                           double*& darray);
-
-int (*nrn2core_get_dat2_vecplay_)(int tid, int& n);
-
-int (*nrn2core_get_dat2_vecplay_inst_)(int tid,
-                                       int i,
-                                       int& vptype,
-                                       int& mtype,
-                                       int& ix,
-                                       int& sz,
-                                       double*& yvec,
-                                       double*& tvec);
+
+
 
 void (*nrn2core_get_trajectory_requests_)(int tid,
                                           int& bsize,
@@ -221,9 +163,6 @@ namespace coreneuron {
 extern corenrn_parameters corenrn_param;
 int nrn_setup_multiple = 1; /* default */
 int nrn_setup_extracon = 0; /* default */
-static int maxgid;
-// no gid in any file can be greater than maxgid. maxgid will be set so that
-// maxgid * nrn_setup_multiple < 0x7fffffff
 
 // nrn_setup_extracon extra connections per NrnThread.
 // i.e. nrn_setup_extracon * nrn_setup_multiple * nrn_nthread
@@ -250,31 +189,14 @@ static int maxgid;
 // This is done between phase1 and phase2 during the call to
 // determine_inputpresyn().
 
-// If MATRIX_LAYOUT is 1 then a,b,d,rhs,v,area is not padded using NRN_SOA_PAD
-// When MATRIX_LAYOUT is 0 then mechanism pdata index values into _actual_v
-// and _actual_area data need to be updated.
-#if !defined(LAYOUT)
-#define LAYOUT 1
-#define MATRIX_LAYOUT 1
-#else
-#define MATRIX_LAYOUT LAYOUT
-#endif
-
-#if !defined(NRN_SOA_PAD)
-// for layout 0, every range variable array must have a size which
-// is a multiple of NRN_SOA_PAD doubles
-#define NRN_SOA_PAD 8
-#endif
-
 #ifdef _OPENMP
-static MUTDEC
+static OMP_Mutex mut;
 #endif
 
-    static size_t
-    model_size(void);
+static size_t model_size(void);
 
 /// Vector of maps for negative presyns
-std::vector<std::map<int, PreSyn*> > neg_gid2out;
+std::vector<std::map<int, PreSyn*>> neg_gid2out;
 /// Maps for ouput and input presyns
 std::map<int, PreSyn*> gid2out;
 std::map<int, InputPreSyn*> gid2in;
@@ -285,26 +207,6 @@ std::vector<NetCon*> netcon_in_presyn_order_;
 /// Only for setup vector of netcon source gids
 std::vector<int*> netcon_srcgid;
 
-
-// Wrap read_phase1 and read_phase2 calls to allow using  nrn_multithread_job.
-// Args marshaled by store_phase_args are used by phase1_wrapper
-// and phase2_wrapper.
-static void store_phase_args(int ngroup,
-                             int* gidgroups,
-                             int* imult,
-                             FileHandler* file_reader,
-                             const char* path,
-                             const char* restore_path,
-                             bool byte_swap) {
-    ngroup_w = ngroup;
-    gidgroups_w = gidgroups;
-    imult_w = imult;
-    file_reader_w = file_reader;
-    path_w = path;
-    restore_path_w = restore_path;
-    byte_swap_w = byte_swap;
-}
-
 /* read files.dat file and distribute cellgroups to all mpi ranks */
 void nrn_read_filesdat(int& ngrp, int*& grp, int multiple, int*& imult, const char* filesdat) {
     patstimtype = nrn_get_mechtype("PatternStim");
@@ -373,145 +275,6 @@ void nrn_read_filesdat(int& ngrp, int*& grp, int multiple, int*& imult, const ch
     fclose(fp);
 }
 
-static void read_phase1(int* output_gid, int imult, NrnThread& nt);
-
-static void* direct_phase1(NrnThread* n) {
-    NrnThread& nt = *n;
-    int* output_gid;
-    int valid =
-        (*nrn2core_get_dat1_)(nt.id, nt.n_presyn, nt.n_netcon, output_gid, netcon_srcgid[nt.id]);
-    if (valid) {
-        read_phase1(output_gid, 0, nt);
-    }
-    return nullptr;
-}
-
-void read_phase1(FileHandler& F, int imult, NrnThread& nt) {
-    assert(!F.fail());
-    nt.n_presyn = F.read_int();  /// Number of PreSyn-s in NrnThread nt
-    nt.n_netcon = F.read_int();  /// Number of NetCon-s in NrnThread nt
-
-    int* output_gid = F.read_array<int>(nt.n_presyn);
-    // the extra netcon_srcgid will be filled in later
-    netcon_srcgid[nt.id] = new int[nt.n_netcon + nrn_setup_extracon];
-    F.read_array<int>(netcon_srcgid[nt.id], nt.n_netcon);
-    F.close();
-
-    read_phase1(output_gid, imult, nt);
-}
-
-static void read_phase1(int* output_gid, int imult, NrnThread& nt) {
-    int zz = imult * maxgid;  // offset for each gid
-    // offset the (non-negative) gids according to multiple
-    // make sure everything fits into gid space.
-    for (int i = 0; i < nt.n_presyn; ++i) {
-        if (output_gid[i] >= 0) {
-            nrn_assert(output_gid[i] < maxgid);
-            output_gid[i] += zz;
-        }
-    }
-
-    nt.presyns = new PreSyn[nt.n_presyn];
-    nt.netcons = new NetCon[nt.n_netcon + nrn_setup_extracon];
-    nt.presyns_helper = (PreSynHelper*)ecalloc_align(nt.n_presyn, sizeof(PreSynHelper));
-
-    int* nc_srcgid = netcon_srcgid[nt.id];
-    for (int i = 0; i < nt.n_netcon; ++i) {
-        if (nc_srcgid[i] >= 0) {
-            nrn_assert(nc_srcgid[i] < maxgid);
-            nc_srcgid[i] += zz;
-        }
-    }
-
-    for (int i = 0; i < nt.n_presyn; ++i) {
-        int gid = output_gid[i];
-        if (gid == -1)
-            continue;
-
-        // Note that the negative (type, index)
-        // coded information goes into the neg_gid2out[tid] hash table.
-        // See netpar.cpp for the netpar_tid_... function implementations.
-        // Both that table and the process wide gid2out table can be deleted
-        // before the end of setup
-
-        MUTLOCK
-        /// Put gid into the gid2out hash table with correspondent output PreSyn
-        /// Or to the negative PreSyn map
-        PreSyn* ps = nt.presyns + i;
-        if (gid >= 0) {
-            char m[200];
-            if (gid2in.find(gid) != gid2in.end()) {
-                sprintf(m, "gid=%d already exists as an input port", gid);
-                hoc_execerror(
-                    m,
-                    "Setup all the output ports on this process before using them as input ports.");
-            }
-            if (gid2out.find(gid) != gid2out.end()) {
-                sprintf(m, "gid=%d already exists on this process as an output port", gid);
-                hoc_execerror(m, 0);
-            }
-            gid2out[gid] = ps;
-            ps->gid_ = gid;
-            ps->output_index_ = gid;
-        } else {
-            nrn_assert(neg_gid2out[nt.id].find(gid) == neg_gid2out[nt.id].end());
-            neg_gid2out[nt.id][gid] = ps;
-        }
-        MUTUNLOCK
-
-        if (gid < 0) {
-            nt.presyns[i].output_index_ = -1;
-        }
-    }
-
-    delete[] output_gid;
-
-    if (nrn_setup_extracon > 0) {
-        // very simplistic
-        // Use this threads positive source gids - zz in nt.netcon order as the
-        // source gids for extracon.
-        // The edge cases are:
-        // The 0th duplicate uses uses source gids for the last duplicate.
-        // If there are fewer positive source gids than extracon, then keep
-        // rotating through the nt.netcon .
-        // If there are no positive source gids, use a source gid of -1.
-        // Would not be difficult to modify so that random positive source was
-        // used, and/or random connect to another duplicate.
-        // Note that we increment the nt.n_netcon at the end of this function.
-        int sidoffset;  // how much to increment the corresponding positive gid
-        // like ring connectivity
-        if (imult > 0) {
-            sidoffset = -maxgid;
-        } else if (nrn_setup_multiple > 1) {
-            sidoffset = (nrn_setup_multiple - 1) * maxgid;
-        } else {
-            sidoffset = 0;
-        }
-        // set up the extracon srcgid_
-        int* nc_srcgid = netcon_srcgid[nt.id];
-        int j = 0;  // rotate through the n_netcon netcon_srcgid
-        for (int i = 0; i < nrn_setup_extracon; ++i) {
-            int sid = -1;
-            for (int k = 0; k < nt.n_netcon; ++k) {
-                // potentially rotate j through the entire n_netcon but no further
-                sid = nc_srcgid[j];
-                j = (j + 1) % nt.n_netcon;
-                if (sid >= 0) {
-                    break;
-                }
-            }
-            if (sid < 0) {  // only connect to real cells.
-                sid = -1;
-            } else {
-                sid += sidoffset;
-            }
-            nc_srcgid[i + nt.n_netcon] = sid;
-        }
-        // finally increment the n_netcon
-        nt.n_netcon += nrn_setup_extracon;
-    }
-}
-
 void netpar_tid_gid2ps(int tid, int gid, PreSyn** ps, InputPreSyn** psi) {
     /// for gid < 0 returns the PreSyn* in the thread (tid) specific map.
     *ps = nullptr;
@@ -676,35 +439,30 @@ void nrn_setup_cleanup() {
 
 void nrn_setup(const char* filesdat,
                bool is_mapping_needed,
-               bool byte_swap,
+               bool /* byte_swap */,
                bool run_setup_cleanup,
                const char* datpath,
                const char* restore_path,
                double* mindelay) {
-    /// Number of local cell groups
-    int ngroup = 0;
-
-    /// Array of cell group numbers (indices)
-    int* gidgroups = nullptr;
-
-    /// Array of duplicate indices. Normally, with nrn_setup_multiple=1,
-    //   they are ngroup values of 0.
-    int* imult = nullptr;
-
     double time = nrn_wtime();
 
-    maxgid = 0x7fffffff / nrn_setup_multiple;
+    int ngroup;
+    int* gidgroups;
+    int* imult;
     nrn_read_filesdat(ngroup, gidgroups, nrn_setup_multiple, imult, filesdat);
+    UserParams userParams(ngroup,
+                          gidgroups,
+                          imult,
+                          datpath,
+                          strlen(restore_path) == 0 ? datpath : restore_path);
+   
 
-    if (!MUTCONSTRUCTED) {
-        MUTCONSTRUCT(1)
-    }
     // temporary bug work around. If any process has multiple threads, no
     // process can have a single thread. So, for now, if one thread, make two.
     // Fortunately, empty threads work fine.
     // Allocate NrnThread* nrn_threads of size ngroup (minimum 2)
     // Note that rank with 0 dataset/cellgroup works fine
-    nrn_threads_create(ngroup <= 1 ? 2 : ngroup);
+    nrn_threads_create(userParams.ngroup <= 1 ? 2 : userParams.ngroup);
 
     nrnthread_chkpnt = new NrnThreadChkpnt[nrn_nthread];
 
@@ -721,7 +479,7 @@ void nrn_setup(const char* filesdat,
 
     /// Reserve vector of maps of size ngroup for negative gid-s
     /// std::vector< std::map<int, PreSyn*> > neg_gid2out;
-    neg_gid2out.resize(ngroup);
+    neg_gid2out.resize(userParams.ngroup);
 
     // bug fix. gid2out is cumulative over all threads and so do not
     // know how many there are til after phase1
@@ -737,35 +495,39 @@ void nrn_setup(const char* filesdat,
     for (int i = 0; i < nrn_nthread; ++i)
         netcon_srcgid[i] = nullptr;
 
-    FileHandler* file_reader = new FileHandler[ngroup];
-
-    /* nrn_multithread_job supports serial, pthread, and openmp. */
-    store_phase_args(ngroup, gidgroups, imult, file_reader, datpath,
-            strlen(restore_path) != 0 ? restore_path : datpath, byte_swap);
-
     // gap junctions
     if (nrn_have_gaps) {
         assert(nrn_setup_multiple == 1);
         nrn_partrans::transfer_thread_data_ = new nrn_partrans::TransferThreadData[nrn_nthread];
-        nrn_partrans::setup_info_ = new nrn_partrans::SetupInfo[ngroup];
+        nrn_partrans::setup_info_ = new nrn_partrans::SetupInfo[userParams.ngroup];
         if (!corenrn_embedded) {
-            coreneuron::phase_wrapper<coreneuron::gap>();
+            coreneuron::phase_wrapper<coreneuron::gap>(userParams);
         } else {
             nrn_assert(sizeof(nrn_partrans::sgid_t) == sizeof(int));
-            for (int i = 0; i < ngroup; ++i) {
+            for (int i = 0; i < userParams.ngroup; ++i) {
                 nrn_partrans::SetupInfo& si = nrn_partrans::setup_info_[i];
                 (*nrn2core_get_partrans_setup_info_)(i, si.ntar, si.nsrc, si.type, si.ix_vpre,
                                                      si.sid_target, si.sid_src, si.v_indices);
             }
         }
-        nrn_partrans::gap_mpi_setup(ngroup);
+        nrn_partrans::gap_mpi_setup(userParams.ngroup);
     }
 
     if (!corenrn_embedded) {
-        coreneuron::phase_wrapper<(coreneuron::phase)1>();  /// If not the xlc compiler, it should
-                                                            /// be coreneuron::phase::one
+        coreneuron::phase_wrapper<coreneuron::phase::one>(userParams);
     } else {
-        nrn_multithread_job(direct_phase1);
+        nrn_multithread_job([](NrnThread* n) {
+            Phase1 p1; 
+            p1.read_direct(n->id);
+            NrnThread& nt = *n;
+            {
+#ifdef _OPENMP
+                p1.populate(nt, 0, mut);
+#else
+                p1.populate(nt, 0);
+#endif
+            }
+        });
     }
 
     // from the gid2out map and the netcon_srcgid array,
@@ -776,10 +538,10 @@ void nrn_setup(const char* filesdat,
     // read the rest of the gidgroup's data and complete the setup for each
     // thread.
     /* nrn_multithread_job supports serial, pthread, and openmp. */
-    coreneuron::phase_wrapper<(coreneuron::phase)2>(corenrn_embedded);
+    coreneuron::phase_wrapper<coreneuron::phase::two>(userParams, corenrn_embedded);
 
     if (is_mapping_needed)
-        coreneuron::phase_wrapper<(coreneuron::phase)3>();
+        coreneuron::phase_wrapper<coreneuron::phase::three>(userParams);
 
     *mindelay = set_mindelay(*mindelay);
 
@@ -801,11 +563,9 @@ void nrn_setup(const char* filesdat,
     /// which is only executed by StochKV.c.
     nrn_mk_table_check();  // was done in nrn_thread_memblist_setup in multicore.c
 
-    delete[] file_reader;
-
     model_size();
-    delete[] gidgroups;
-    delete[] imult;
+    delete[] userParams.gidgroups;
+    delete[] userParams.imult;
 
     if (nrnmpi_myid == 0 && !corenrn_param.is_quiet()) {
         printf(" Setup Done   : %.2lf seconds \n", nrn_wtime() - time);
@@ -819,8 +579,12 @@ void setup_ThreadData(NrnThread& nt) {
         if (mf.thread_size_) {
             ml->_thread = (ThreadDatum*)ecalloc_align(mf.thread_size_, sizeof(ThreadDatum));
             if (mf.thread_mem_init_) {
-                MUTLOCK (*mf.thread_mem_init_)(ml->_thread);
-                MUTUNLOCK
+                {
+#ifdef _OPENMP
+                    const std::lock_guard<OMP_Mutex> lock(mut);
+#endif
+                    (*mf.thread_mem_init_)(ml->_thread);
+                }
             }
         } else {
             ml->_thread = nullptr;
@@ -828,12 +592,13 @@ void setup_ThreadData(NrnThread& nt) {
     }
 }
 
-void read_phasegap(FileHandler& F, int imult, NrnThread& nt) {
-    nrn_assert(imult == 0);
+void read_phasegap(NrnThread& nt, UserParams& userParams) {
+    nrn_assert(userParams.imult[nt.id] == 0);
     nrn_partrans::SetupInfo& si = nrn_partrans::setup_info_[nt.id];
     si.ntar = 0;
     si.nsrc = 0;
 
+    auto& F = userParams.file_reader[nt.id];
     if (F.fail()) {
         return;
     }
@@ -864,49 +629,6 @@ void read_phasegap(FileHandler& F, int imult, NrnThread& nt) {
 #endif
 }
 
-int nrn_soa_padded_size(int cnt, int layout) {
-    return soa_padded_size<NRN_SOA_PAD>(cnt, layout);
-}
-
-static size_t nrn_soa_byte_align(size_t i) {
-    if (LAYOUT == 0) {
-        size_t dbl_align = NRN_SOA_BYTE_ALIGN / sizeof(double);
-        size_t rem = i % dbl_align;
-        if (rem) {
-            i += dbl_align - rem;
-        }
-        assert((i * sizeof(double)) % NRN_SOA_BYTE_ALIGN == 0);
-    }
-    return i;
-}
-
-// file data is AoS. ie.
-// organized as cnt array instances of mtype each of size sz.
-// So input index i refers to i_instance*sz + i_item offset
-// Return the corresponding SoA index -- taking into account the
-// alignment requirements. Ie. i_instance + i_item*align_cnt.
-
-int nrn_param_layout(int i, int mtype, Memb_list* ml) {
-    int layout = corenrn.get_mech_data_layout()[mtype];
-    if (layout == 1) {
-        return i;
-    }
-    assert(layout == 0);
-    int sz = corenrn.get_prop_param_size()[mtype];
-    return nrn_i_layout(i / sz, ml->nodecount, i % sz, sz, layout);
-}
-
-int nrn_i_layout(int icnt, int cnt, int isz, int sz, int layout) {
-    if (layout == 1) {
-        return icnt * sz + isz;
-    } else if (layout == 0) {
-        int padded_cnt = nrn_soa_padded_size(cnt, layout);  // may want to factor out to save time
-        return icnt + isz * padded_cnt;
-    }
-    assert(0);
-    return 0;
-}
-
 // This function is related to nrn_dblpntr2nrncore in Neuron to determine which values should
 // be transferred from CoreNeuron. Types correspond to the value to be transferred based on
 // mech_type enum or non-artificial cell mechanisms.
@@ -960,35 +682,6 @@ void nrn_inverse_i_layout(int i, int& icnt, int cnt, int& isz, int sz, int layou
     }
 }
 
-template <typename T>
-inline void mech_layout(FileHandler& F, T* data, int cnt, int sz, int layout) {
-    if (layout == 1) { /* AoS */
-        if (corenrn_embedded) {
-            return;
-        }
-        F.read_array<T>(data, cnt * sz);
-    } else if (layout == 0) { /* SoA */
-        int align_cnt = nrn_soa_padded_size(cnt, layout);
-        T* d;
-        if (corenrn_embedded) {
-            d = new T[cnt * sz];
-            for (int i = 0; i < cnt; ++i) {
-                for (int j = 0; j < sz; ++j) {
-                    d[i * sz + j] = data[i * sz + j];
-                }
-            }
-        } else {
-            d = F.read_array<T>(cnt * sz);
-        }
-        for (int i = 0; i < cnt; ++i) {
-            for (int j = 0; j < sz; ++j) {
-                data[i + j * align_cnt] = d[i * sz + j];
-            }
-        }
-        delete[] d;
-    }
-}
-
 /**
  * Cleanup global ion map created during mechanism registration
  *
@@ -1204,959 +897,33 @@ void delete_trajectory_requests(NrnThread& nt) {
     }
 }
 
-void read_phase2(FileHandler& F, int imult, NrnThread& nt) {
-    bool direct = corenrn_embedded;
-    if (!direct) {
-        assert(!F.fail());  // actually should assert that it is open
-    }
-    nrn_assert(imult >= 0);  // avoid imult unused warning
-    NrnThreadChkpnt& ntc = nrnthread_chkpnt[nt.id];
-    ntc.file_id = gidgroups_w[nt.id];
-
-    int n_outputgid, ndiam, nmech, *tml_index, *ml_nodecount;
-    if (direct) {
-        int nidata, nvdata;
-        (*nrn2core_get_dat2_1_)(nt.id, n_outputgid, nt.ncell, nt.end, ndiam, nmech, tml_index,
-                                ml_nodecount, nidata, nvdata, nt.n_weight);
-        nt._nidata = nidata;
-        nt._nvdata = nvdata;
-    } else {
-        n_outputgid = F.read_int();
-        nt.ncell = F.read_int();
-        nt.end = F.read_int();
-        ndiam = F.read_int();  // 0 if not needed, else nt.end
-        nmech = F.read_int();
-        tml_index = new int[nmech];
-        ml_nodecount = new int[nmech];
-        int diff_mech_count = 0;
-        for (int i = 0; i < nmech; ++i) {
-            tml_index[i] = F.read_int();
-            ml_nodecount[i] = F.read_int();
-            if (std::any_of(corenrn.get_different_mechanism_type().begin(),
-                            corenrn.get_different_mechanism_type().end(),
-                [&](int e) { return e == tml_index[i]; })) {
-                if (nrnmpi_myid == 0) {
-                    printf("Error: %s is a different MOD file than used by NEURON!\n",
-                           nrn_get_mechname(tml_index[i]));
-                }
-                diff_mech_count++;
-            }
-        }
-
-        if (diff_mech_count > 0) {
-            if (nrnmpi_myid == 0) {
-                printf(
-                    "Error : NEURON and CoreNEURON must use same mod files for compatibility, %d different mod file(s) found. Re-compile special and special-core!\n",
-                    diff_mech_count);
-                nrn_abort(1);
-            }
-        }
-
-        nt._nidata = F.read_int();
-        nt._nvdata = F.read_int();
-        nt.n_weight = F.read_int();
-    }
-
-#if CHKPNTDEBUG
-    ntc.n_outputgids = n_outputgid;
-    ntc.nmech = nmech;
-#endif
-    if (!direct) {
-        nrn_assert(n_outputgid > 0);  // avoid n_outputgid unused warning
-    }
-
-    /// Checkpoint in coreneuron is defined for both phase 1 and phase 2 since they are written
-    /// together
-    // printf("ncell=%d end=%d nmech=%d\n", nt.ncell, nt.end, nmech);
-    // printf("nart=%d\n", nart);
-    nt._ml_list = (Memb_list**)ecalloc_align(corenrn.get_memb_funcs().size(), sizeof(Memb_list*));
-
-#if CHKPNTDEBUG
-    ntc.mlmap = new Memb_list_chkpnt*[memb_func.size()];
-    for (int i = 0; i < _memb_func.size(); ++i) {
-        ntc.mlmap[i] = nullptr;
-    }
-#endif
-
-    int shadow_rhs_cnt = 0;
-    nt.shadow_rhs_cnt = 0;
-
-    nt.stream_id = 0;
-    nt.compute_gpu = 0;
-    auto& nrn_prop_param_size_ = corenrn.get_prop_param_size();
-    auto& nrn_prop_dparam_size_ = corenrn.get_prop_dparam_size();
-
-/* read_phase2 is being called from openmp region
- * and hence we can set the stream equal to current thread id.
- * In fact we could set gid as stream_id when we will have nrn threads
- * greater than number of omp threads.
- */
-#if defined(_OPENMP)
-    nt.stream_id = omp_get_thread_num();
-#endif
-    auto& memb_func = corenrn.get_memb_funcs();
-    NrnThreadMembList* tml_last = nullptr;
-    for (int i = 0; i < nmech; ++i) {
-        auto tml = (NrnThreadMembList*)emalloc_align(sizeof(NrnThreadMembList));
-        tml->next = nullptr;
-        tml->index = tml_index[i];
-
-        tml->ml = (Memb_list*)ecalloc_align(1, sizeof(Memb_list));
-        tml->ml->_net_receive_buffer = nullptr;
-        tml->ml->_net_send_buffer = nullptr;
-        tml->ml->_permute = nullptr;
-        if (memb_func[tml->index].alloc == nullptr) {
-            hoc_execerror(memb_func[tml->index].sym, "mechanism does not exist");
-        }
-        tml->ml->nodecount = ml_nodecount[i];
-        if (!memb_func[tml->index].sym) {
-            printf("%s (type %d) is not available\n", nrn_get_mechname(tml->index), tml->index);
-            exit(1);
-        }
-        tml->ml->_nodecount_padded =
-            nrn_soa_padded_size(tml->ml->nodecount, corenrn.get_mech_data_layout()[tml->index]);
-        if (memb_func[tml->index].is_point && corenrn.get_is_artificial()[tml->index] == 0) {
-            // Avoid race for multiple PointProcess instances in same compartment.
-            if (tml->ml->nodecount > shadow_rhs_cnt) {
-                shadow_rhs_cnt = tml->ml->nodecount;
-            }
-        }
-
-        nt._ml_list[tml->index] = tml->ml;
-#if CHKPNTDEBUG
-        Memb_list_chkpnt* mlc = new Memb_list_chkpnt;
-        ntc.mlmap[tml->index] = mlc;
-#endif
-        // printf("index=%d nodecount=%d membfunc=%s\n", tml->index, tml->ml->nodecount,
-        // memb_func[tml->index].sym?memb_func[tml->index].sym:"None");
-        if (nt.tml) {
-            tml_last->next = tml;
-        } else {
-            nt.tml = tml;
-        }
-        tml_last = tml;
-    }
-    delete[] tml_index;
-    delete[] ml_nodecount;
-
-    if (shadow_rhs_cnt) {
-        nt._shadow_rhs =
-            (double*)ecalloc_align(nrn_soa_padded_size(shadow_rhs_cnt, 0), sizeof(double));
-        nt._shadow_d =
-            (double*)ecalloc_align(nrn_soa_padded_size(shadow_rhs_cnt, 0), sizeof(double));
-        nt.shadow_rhs_cnt = shadow_rhs_cnt;
-    }
-
-    nt._data = nullptr;    // allocated below after padding
-    nt.mapping = nullptr;  // section segment mapping
-
-    if (nt._nidata)
-        nt._idata = (int*)ecalloc(nt._nidata, sizeof(int));
-    else
-        nt._idata = nullptr;
-    // see patternstim.cpp
-    int extra_nv = (&nt == nrn_threads) ? nrn_extra_thread0_vdata : 0;
-    if (nt._nvdata + extra_nv)
-        nt._vdata = (void**)ecalloc_align(nt._nvdata + extra_nv, sizeof(void*));
-    else
-        nt._vdata = nullptr;
-    // printf("_nidata=%d _nvdata=%d\n", nt._nidata, nt._nvdata);
-
-    // The data format begins with the matrix data
-    int ne = nrn_soa_padded_size(nt.end, MATRIX_LAYOUT);
-    size_t offset = 6 * ne;
-
-    if (ndiam) {
-        // in the rare case that a mechanism has dparam with diam semantics
-        // then actual_diam array added after matrix in nt._data
-        // Generally wasteful since only a few diam are pointed to.
-        // Probably better to move the diam semantics to the p array of the mechanism
-        offset += ne;
-    }
-
-    // Memb_list.data points into the nt.data array.
-    // Also count the number of Point_process
-    int npnt = 0;
-    for (auto tml = nt.tml; tml; tml = tml->next) {
-        Memb_list* ml = tml->ml;
-        int type = tml->index;
-        int layout = corenrn.get_mech_data_layout()[type];
-        int n = ml->nodecount;
-        int sz = nrn_prop_param_size_[type];
-        offset = nrn_soa_byte_align(offset);
-        ml->data = (double*)0 + offset;  // adjust below since nt._data not allocated
-        offset += nrn_soa_padded_size(n, layout) * sz;
-        if (corenrn.get_pnt_map()[type] > 0) {
-            npnt += n;
-        }
-    }
-    nt.pntprocs = (Point_process*)ecalloc_align(
-        npnt, sizeof(Point_process));  // includes acell with and without gid
-    nt.n_pntproc = npnt;
-    // printf("offset=%ld\n", offset);
-    nt._ndata = offset;
-
-    // now that we know the effect of padding, we can allocate data space,
-    // fill matrix, and adjust Memb_list data pointers
-    nt._data = (double*)ecalloc_align(nt._ndata, sizeof(double));
-    nt._actual_rhs = nt._data + 0 * ne;
-    nt._actual_d = nt._data + 1 * ne;
-    nt._actual_a = nt._data + 2 * ne;
-    nt._actual_b = nt._data + 3 * ne;
-    nt._actual_v = nt._data + 4 * ne;
-    nt._actual_area = nt._data + 5 * ne;
-    nt._actual_diam = ndiam ? nt._data + 6 * ne : nullptr;
-    for (auto tml = nt.tml; tml; tml = tml->next) {
-        Memb_list* ml = tml->ml;
-        ml->data = nt._data + (ml->data - (double*)0);
-    }
-
-    // matrix info
-    nt._v_parent_index = (int*)ecalloc_align(nt.end, sizeof(int));
-    if (direct) {
-        (*nrn2core_get_dat2_2_)(nt.id, nt._v_parent_index, nt._actual_a, nt._actual_b,
-                                nt._actual_area, nt._actual_v, nt._actual_diam);
-    } else {
-        F.read_array<int>(nt._v_parent_index, nt.end);
-        F.read_array<double>(nt._actual_a, nt.end);
-        F.read_array<double>(nt._actual_b, nt.end);
-        F.read_array<double>(nt._actual_area, nt.end);
-        F.read_array<double>(nt._actual_v, nt.end);
-        if (ndiam) {
-            F.read_array<double>(nt._actual_diam, nt.end);
-        }
-    }
-#if CHKPNTDEBUG
-    ntc.parent = new int[nt.end];
-    memcpy(ntc.parent, nt._v_parent_index, nt.end * sizeof(int));
-    ntc.area = new double[nt.end];
-    memcpy(ntc.area, nt._actual_area, nt.end * sizeof(double));
-#endif
-
-    int synoffset = 0;
-    std::vector<int> pnt_offset(memb_func.size());
-
-    // All the mechanism data and pdata.
-    // Also fill in the pnt_offset
-    // Complete spec of Point_process except for the acell presyn_ field.
-    int itml = 0;
-    int dsz_inst = 0;
-    for (auto tml = nt.tml; tml; tml = tml->next, ++itml) {
-        int type = tml->index;
-        Memb_list* ml = tml->ml;
-        int is_art = corenrn.get_is_artificial()[type];
-        int n = ml->nodecount;
-        int szp = nrn_prop_param_size_[type];
-        int szdp = nrn_prop_dparam_size_[type];
-        int layout = corenrn.get_mech_data_layout()[type];
-
-        if (!is_art && !direct) {
-            ml->nodeindices = (int*)ecalloc_align(ml->nodecount, sizeof(int));
-        } else {
-            ml->nodeindices = nullptr;
-        }
-        if (szdp) {
-            ml->pdata = (int*)ecalloc_align(nrn_soa_padded_size(n, layout) * szdp, sizeof(int));
-        }
-
-        if (direct) {
-            (*nrn2core_get_dat2_mech_)(nt.id, itml, dsz_inst, ml->nodeindices, ml->data, ml->pdata);
-        } else {
-            if (!is_art) {
-                F.read_array<int>(ml->nodeindices, ml->nodecount);
-            }
-        }
-        if (szdp) {
-            ++dsz_inst;
-        }
-
-        mech_layout<double>(F, ml->data, n, szp, layout);
-
-        if (szdp) {
-            mech_layout<int>(F, ml->pdata, n, szdp, layout);
-#if CHKPNTDEBUG  // Not substantive. Only for debugging.
-            Memb_list_ckpnt* mlc = ntc.mlmap[type];
-            mlc->pdata_not_permuted = (int*)coreneuron::ecalloc_align(n * szdp, sizeof(int));
-            if (layout == 1) {  // AoS just copy
-                for (int i = 0; i < n; ++i) {
-                    for (int j = 0; j < szdp; ++j) {
-                        mlc->pdata_not_permuted[i * szdp + j] = ml->pdata[i * szdp + j];
-                    }
-                }
-            } else if (layout == 0) {  // SoA transpose and unpad
-                int align_cnt = nrn_soa_padded_size(n, layout);
-                for (int i = 0; i < n; ++i) {
-                    for (int j = 0; j < szdp; ++j) {
-                        mlc->pdata_not_permuted[i * szdp + j] = ml->pdata[i + j * align_cnt];
-                    }
-                }
-            }
+void read_phase1(NrnThread& nt, UserParams& userParams) {
+    Phase1 p1;
+    p1.read_file(userParams.file_reader[nt.id]);
+    
+    { // Protect gid2in, gid2out and neg_gid2out
+#ifdef _OPENMP
+        p1.populate(nt, userParams.imult[nt.id], mut);
+#else
+        p1.populate(nt, userParams.imult[nt.id]);
 #endif
-        } else {
-            ml->pdata = nullptr;
-        }
-        if (corenrn.get_pnt_map()[type] > 0) {  // POINT_PROCESS mechanism including acell
-            int cnt = ml->nodecount;
-            Point_process* pnt = nullptr;
-            pnt = nt.pntprocs + synoffset;
-            pnt_offset[type] = synoffset;
-            synoffset += cnt;
-            for (int i = 0; i < cnt; ++i) {
-                Point_process* pp = pnt + i;
-                pp->_type = type;
-                pp->_i_instance = i;
-                nt._vdata[ml->pdata[nrn_i_layout(i, cnt, 1, szdp, layout)]] = pp;
-                pp->_tid = nt.id;
-            }
-        }
     }
-
-    if (nrn_have_gaps) {
-        nrn_partrans::gap_thread_setup(nt);
-    }
-
-    // Some pdata may index into data which has been reordered from AoS to
-    // SoA. The four possibilities are if semantics is -1 (area), -5 (pointer),
-    // -9 (diam),
-    // or 0-999 (ion variables). Note that pdata has a layout and the
-    // type block in nt.data into which it indexes, has a layout.
-    for (auto tml = nt.tml; tml; tml = tml->next) {
-        int type = tml->index;
-        int layout = corenrn.get_mech_data_layout()[type];
-        int* pdata = tml->ml->pdata;
-        int cnt = tml->ml->nodecount;
-        int szdp = nrn_prop_dparam_size_[type];
-        int* semantics = memb_func[type].dparam_semantics;
-
-        // ignore ARTIFICIAL_CELL (has useless area pointer with semantics=-1)
-        if (corenrn.get_is_artificial()[type]) {
-            continue;
-        }
-
-        if (szdp) {
-            if (!semantics)
-                continue;  // temporary for HDFReport, Binreport which will be skipped in
-                           // bbcore_write of HBPNeuron
-            nrn_assert(semantics);
-        }
-
-        for (int i = 0; i < szdp; ++i) {
-            int s = semantics[i];
-            if (s == -1) {  // area
-                int area0 = nt._actual_area - nt._data;
-                for (int iml = 0; iml < cnt; ++iml) {
-                    int* pd = pdata + nrn_i_layout(iml, cnt, i, szdp, layout);
-                    int ix = *pd;  // relative to beginning of _actual_area
-                    nrn_assert((ix >= 0) && (ix < nt.end));
-                    *pd = area0 + ix;  // relative to nt._data
-                }
-            } else if (s == -9) {  // diam
-                int diam0 = nt._actual_diam - nt._data;
-                for (int iml = 0; iml < cnt; ++iml) {
-                    int* pd = pdata + nrn_i_layout(iml, cnt, i, szdp, layout);
-                    int ix = *pd;  // relative to beginning of _actual_diam
-                    nrn_assert((ix >= 0) && (ix < nt.end));
-                    *pd = diam0 + ix;  // relative to nt._data
-                }
-            } else if (s == -5) {  // pointer assumes a pointer to membrane voltage
-                int v0 = nt._actual_v - nt._data;
-                for (int iml = 0; iml < cnt; ++iml) {
-                    int* pd = pdata + nrn_i_layout(iml, cnt, i, szdp, layout);
-                    int ix = *pd;  // relative to _actual_v
-                    nrn_assert((ix >= 0) && (ix < nt.end));
-                    *pd = v0 + ix;  // relative to nt._data
-                }
-            } else if (s >= 0 && s < 1000) {  // ion
-                int etype = s;
-                /* if ion is SoA, must recalculate pdata values */
-                /* if ion is AoS, have to deal with offset */
-                Memb_list* eml = nt._ml_list[etype];
-                int edata0 = eml->data - nt._data;
-                int ecnt = eml->nodecount;
-                int esz = nrn_prop_param_size_[etype];
-                for (int iml = 0; iml < cnt; ++iml) {
-                    int* pd = pdata + nrn_i_layout(iml, cnt, i, szdp, layout);
-                    int ix = *pd;  // relative to the ion data
-                    nrn_assert((ix >= 0) && (ix < ecnt * esz));
-                    /* Original pd order assumed ecnt groups of esz */
-                    *pd = edata0 + nrn_param_layout(ix, etype, eml);
-                }
-            }
-        }
-    }
-
-    /* if desired, apply the node permutation. This involves permuting
-       at least the node parameter arrays for a, b, and area (and diam) and all
-       integer vector values that index into nodes. This could have been done
-       when originally filling the arrays with AoS ordered data, but can also
-       be done now, after the SoA transformation. The latter has the advantage
-       that the present order is consistent with all the layout values. Note
-       that after this portion of the permutation, a number of other node index
-       vectors will be read and will need to be permuted as well in subsequent
-       sections of this function.
-    */
-    if (interleave_permute_type) {
-        nt._permute = interleave_order(nt.id, nt.ncell, nt.end, nt._v_parent_index);
-    }
-    if (nt._permute) {
-        int* p = nt._permute;
-        permute_data(nt._actual_a, nt.end, p);
-        permute_data(nt._actual_b, nt.end, p);
-        permute_data(nt._actual_area, nt.end, p);
-        permute_data(nt._actual_v, nt.end,
-                     p);  // need if restore or finitialize does not initialize voltage
-        if (nt._actual_diam) {
-            permute_data(nt._actual_diam, nt.end, p);
-        }
-        // index values change as well as ordering
-        permute_ptr(nt._v_parent_index, nt.end, p);
-        node_permute(nt._v_parent_index, nt.end, p);
-
-#if DEBUG
-for (int i=0; i < nt.end; ++i) {
-  printf("parent[%d] = %d\n", i, nt._v_parent_index[i]);
 }
-#endif
-
-        // specify the ml->_permute and sort the nodeindices
-        for (auto tml = nt.tml; tml; tml = tml->next) {
-            if (tml->ml->nodeindices) {  // not artificial
-                permute_nodeindices(tml->ml, p);
-            }
-        }
-
-        // permute mechanism data, pdata (and values)
-        for (auto tml = nt.tml; tml; tml = tml->next) {
-            if (tml->ml->nodeindices) {  // not artificial
-                permute_ml(tml->ml, tml->index, nt);
-            }
-        }
-
-        // permute the Point_process._i_instance
-        for (int i = 0; i < nt.n_pntproc; ++i) {
-            Point_process& pp = nt.pntprocs[i];
-            Memb_list* ml = nt._ml_list[pp._type];
-            if (ml->_permute) {
-                pp._i_instance = ml->_permute[pp._i_instance];
-            }
-        }
-    }
-
-    if (nrn_have_gaps && interleave_permute_type) {
-        nrn_partrans::gap_indices_permute(nt);
-    }
-
-    /* here we setup the mechanism dependencies. if there is a mechanism dependency
-     * then we allocate an array for tml->dependencies otherwise set it to nullptr.
-     * In order to find out the "real" dependencies i.e. dependent mechanism
-     * exist at the same compartment, we compare the nodeindices of mechanisms
-     * returned by nrn_mech_depend.
-     */
-
-    /* temporary array for dependencies */
-    int* mech_deps = (int*)ecalloc(memb_func.size(), sizeof(int));
-
-    for (auto tml = nt.tml; tml; tml = tml->next) {
-        /* initialize to null */
-        tml->dependencies = nullptr;
-        tml->ndependencies = 0;
-
-        /* get dependencies from the models */
-        int deps_cnt = nrn_mech_depend(tml->index, mech_deps);
-
-        /* if dependencies, setup dependency array */
-        if (deps_cnt) {
-            /* store "real" dependencies in the vector */
-            std::vector<int> actual_mech_deps;
-
-            Memb_list* ml = tml->ml;
-            int* nodeindices = ml->nodeindices;
-
-            /* iterate over dependencies */
-            for (int j = 0; j < deps_cnt; j++) {
-                /* memb_list of dependency mechanism */
-                Memb_list* dml = nt._ml_list[mech_deps[j]];
-
-                /* dependency mechanism may not exist in the model */
-                if (!dml)
-                    continue;
-
-                /* take nodeindices for comparison */
-                int* dnodeindices = dml->nodeindices;
-
-                /* set_intersection function needs temp vector to push the common values */
-                std::vector<int> node_intersection;
-
-                /* make sure they have non-zero nodes and find their intersection */
-                if ((ml->nodecount > 0) && (dml->nodecount > 0)) {
-                    std::set_intersection(nodeindices, nodeindices + ml->nodecount, dnodeindices,
-                                          dnodeindices + dml->nodecount,
-                                          std::back_inserter(node_intersection));
-                }
-
-                /* if they intersect in the nodeindices, it's real dependency */
-                if (!node_intersection.empty()) {
-                    actual_mech_deps.push_back(mech_deps[j]);
-                }
-            }
-
-            /* copy actual_mech_deps to dependencies */
-            if (!actual_mech_deps.empty()) {
-                tml->ndependencies = actual_mech_deps.size();
-                tml->dependencies = (int*)ecalloc(actual_mech_deps.size(), sizeof(int));
-                memcpy(tml->dependencies, &actual_mech_deps[0],
-                       sizeof(int) * actual_mech_deps.size());
-            }
-        }
-    }
-
-    /* free temp dependency array */
-    free(mech_deps);
-
-    /// Fill the BA lists
-    std::vector<BAMech*> bamap(memb_func.size());
-    for (int i = 0; i < BEFORE_AFTER_SIZE; ++i) {
-        for (size_t ii = 0; ii < memb_func.size(); ++ii) {
-            bamap[ii] = nullptr;
-        }
-        for (auto bam = corenrn.get_bamech()[i]; bam; bam = bam->next) {
-            bamap[bam->type] = bam;
-        }
-        /* unnecessary but keep in order anyway */
-        NrnThreadBAList **ptbl = nt.tbl + i;
-        for (auto tml = nt.tml; tml; tml = tml->next) {
-            if (bamap[tml->index]) {
-                Memb_list* ml = tml->ml;
-                auto tbl = (NrnThreadBAList*)emalloc(sizeof(NrnThreadBAList));
-                tbl->next = nullptr;
-                tbl->bam = bamap[tml->index];
-                tbl->ml = ml;
-                *ptbl = tbl;
-                ptbl = &(tbl->next);
-            }
-        }
-    }
-
-    // for fast watch statement checking
-    // setup a list of types that have WATCH statement
-    {
-        int sz = 0;  // count the types with WATCH
-        for (auto tml = nt.tml; tml; tml = tml->next) {
-            if (corenrn.get_watch_check()[tml->index]) {
-                ++sz;
-            }
-        }
-        if (sz) {
-            nt._watch_types = (int*)ecalloc(sz + 1, sizeof(int));  // nullptr terminated
-            sz = 0;
-            for (auto tml = nt.tml; tml; tml = tml->next) {
-                if (corenrn.get_watch_check()[tml->index]) {
-                    nt._watch_types[sz++] = tml->index;
-                }
-            }
-        }
-    }
-    auto& pnttype2presyn = corenrn.get_pnttype2presyn();
-    auto& nrn_has_net_event_ = corenrn.get_has_net_event();
-    // from nrn_has_net_event create pnttype2presyn.
-    if (pnttype2presyn.empty()) {
-        pnttype2presyn.resize(memb_func.size(), -1);
-    }
-    for (size_t i = 0; i < nrn_has_net_event_.size(); ++i) {
-        pnttype2presyn[nrn_has_net_event_[i]] = i;
-    }
-    // create the nt.pnt2presyn_ix array of arrays.
-    nt.pnt2presyn_ix = (int**)ecalloc(nrn_has_net_event_.size(), sizeof(int*));
-    for (size_t i = 0; i < nrn_has_net_event_.size(); ++i) {
-        Memb_list* ml = nt._ml_list[nrn_has_net_event_[i]];
-        if (ml && ml->nodecount > 0) {
-            nt.pnt2presyn_ix[i] = (int*)ecalloc(ml->nodecount, sizeof(int));
-        }
-    }
-
-    // Real cells are at the beginning of the nt.presyns followed by
-    // acells (with and without gids mixed together)
-    // Here we associate the real cells with voltage pointers and
-    // acell PreSyn with the Point_process.
-    // nt.presyns order same as output_vindex order
-    int *output_vindex, *pnttype, *pntindex;
-    double *output_threshold, *delay;
-    if (direct) {
-        (*nrn2core_get_dat2_3_)(nt.id, nt.n_weight, output_vindex, output_threshold, pnttype,
-                                pntindex, nt.weights, delay);
-    }
-    if (!direct) {
-        output_vindex = F.read_array<int>(nt.n_presyn);
-    }
-#if CHKPNTDEBUG
-    ntc.output_vindex = new int[nt.n_presyn];
-    memcpy(ntc.output_vindex, output_vindex, nt.n_presyn * sizeof(int));
-#endif
-    if (nt._permute) {
-        // only indices >= 0 (i.e. _actual_v indices) will be changed.
-        node_permute(output_vindex, nt.n_presyn, nt._permute);
-    }
-    if (!direct) {
-        output_threshold = F.read_array<double>(nt.ncell);
-    }
-#if CHKPNTDEBUG
-    ntc.output_threshold = new double[nt.ncell];
-    memcpy(ntc.output_threshold, output_threshold, nt.ncell * sizeof(double));
-#endif
-    for (int i = 0; i < nt.n_presyn; ++i) {  // real cells
-        PreSyn* ps = nt.presyns + i;
-
-        int ix = output_vindex[i];
-        if (ix == -1 && i < nt.ncell) {  // real cell without a presyn
-            continue;
-        }
-        if (ix < 0) {
-            ix = -ix;
-            int index = ix / 1000;
-            int type = ix - index * 1000;
-            Point_process* pnt = nt.pntprocs + (pnt_offset[type] + index);
-            ps->pntsrc_ = pnt;
-            // pnt->_presyn = ps;
-            int ip2ps = pnttype2presyn[pnt->_type];
-            if (ip2ps >= 0) {
-                nt.pnt2presyn_ix[ip2ps][pnt->_i_instance] = i;
-            }
-            if (ps->gid_ < 0) {
-                ps->gid_ = -1;
-            }
-        } else {
-            assert(ps->gid_ > -1);
-            ps->thvar_index_ = ix;  // index into _actual_v
-            assert(ix < nt.end);
-            ps->threshold_ = output_threshold[i];
-        }
-    }
-    delete[] output_vindex;
-    delete[] output_threshold;
-
-    // initial net_send_buffer size about 1% of number of presyns
-    // nt._net_send_buffer_size = nt.ncell/100 + 1;
-    // but, to avoid reallocation complexity on GPU ...
-    nt._net_send_buffer_size = nt.ncell;
-    nt._net_send_buffer = (int*)ecalloc_align(nt._net_send_buffer_size, sizeof(int));
-
-    // do extracon later as the target and weight info
-    // is not directly in the file
-    int nnetcon = nt.n_netcon - nrn_setup_extracon;
-
-    int nweight = nt.n_weight;
-    // printf("nnetcon=%d nweight=%d\n", nnetcon, nweight);
-    // it may happen that Point_process structures will be made unnecessary
-    // by factoring into NetCon.
-
-    // Make NetCon.target_ point to proper Point_process. Only the NetCon
-    // with pnttype[i] > 0 have a target.
-    if (!direct) {
-        pnttype = F.read_array<int>(nnetcon);
-    }
-    if (!direct) {
-        pntindex = F.read_array<int>(nnetcon);
-    }
-#if CHKPNTDEBUG
-    ntc.pnttype = new int[nnetcon];
-    ntc.pntindex = new int[nnetcon];
-    memcpy(ntc.pnttype, pnttype, nnetcon * sizeof(int));
-    memcpy(ntc.pntindex, pntindex, nnetcon * sizeof(int));
-#endif
-    for (int i = 0; i < nnetcon; ++i) {
-        int type = pnttype[i];
-        if (type > 0) {
-            int index = pnt_offset[type] + pntindex[i];  /// Potentially uninitialized pnt_offset[],
-                                                         /// check for previous assignments
-            NetCon& nc = nt.netcons[i];
-            nc.target_ = nt.pntprocs + index;
-            nc.active_ = true;
-        }
-    }
-
-    int extracon_target_nweight = 0;
-    if (nrn_setup_extracon > 0) {
-        // Fill in the extracon target_ and active_.
-        // Simplistic.
-        // Rotate through the pntindex and use only pnttype for ProbAMPANMDA_EMS
-        // (which happens to have a weight vector length of 5.)
-        // Edge case: if there is no such synapse, let the target_ be nullptr
-        //   and the netcon be inactive.
-        // Same pattern as algorithm for extracon netcon_srcgid above in phase1.
-        int extracon_target_type = nrn_get_mechtype("ProbAMPANMDA_EMS");
-        assert(extracon_target_type > 0);
-        extracon_target_nweight = corenrn.get_pnt_receive_size()[extracon_target_type];
-        int j = 0;
-        for (int i = 0; i < nrn_setup_extracon; ++i) {
-            int active = 0;
-            for (int k = 0; k < nnetcon; ++k) {
-                if (pnttype[j] == extracon_target_type) {
-                    active = 1;
-                    break;
-                }
-                j = (j + 1) % nnetcon;
-            }
-            NetCon& nc = nt.netcons[i + nnetcon];
-            nc.active_ = active;
-            if (active) {
-                nc.target_ = nt.pntprocs + (pnt_offset[extracon_target_type] + pntindex[j]);
-            } else {
-                nc.target_ = nullptr;
-            }
-        }
-    }
-
-    delete[] pntindex;
-
-    // weights in netcons order in groups defined by Point_process target type.
-    nt.n_weight += nrn_setup_extracon * extracon_target_nweight;
-    if (!direct) {
-        nt.weights = (double*)ecalloc_align(nt.n_weight, sizeof(double));
-        F.read_array<double>(nt.weights, nweight);
-    }
-
-    int iw = 0;
-    for (int i = 0; i < nnetcon; ++i) {
-        NetCon& nc = nt.netcons[i];
-        nc.u.weight_index_ = iw;
-        if (pnttype[i] != 0) {
-            iw += corenrn.get_pnt_receive_size()[pnttype[i]];
-        } else {
-            iw += 1;
-        }
-    }
-    assert(iw == nweight);
-    delete[] pnttype;
-
-    // delays in netcons order
-    if (!direct) {
-        delay = F.read_array<double>(nnetcon);
-    }
-#if CHKPNTDEBUG
-    ntc.delay = new double[nnetcon];
-    memcpy(ntc.delay, delay, nnetcon * sizeof(double));
-#endif
-    for (int i = 0; i < nnetcon; ++i) {
-        NetCon& nc = nt.netcons[i];
-        nc.delay_ = delay[i];
-    }
-    delete[] delay;
-
-    if (nrn_setup_extracon > 0) {
-        // simplistic. delay is 1 and weight is 0.001
-        for (int i = 0; i < nrn_setup_extracon; ++i) {
-            NetCon& nc = nt.netcons[nnetcon + i];
-            nc.delay_ = 1.0;
-            nc.u.weight_index_ = nweight + i * extracon_target_nweight;
-            nt.weights[nc.u.weight_index_] = 2.0;  // this value 2.0 is extracted from .dat files
-        }
-    }
-
-    // BBCOREPOINTER information
-    if (direct) {
-        (*nrn2core_get_dat2_corepointer_)(nt.id, npnt);
-    } else {
-        npnt = F.read_int();
-    }
-#if CHKPNTDEBUG
-    ntc.nbcp = npnt;
-    ntc.bcpicnt = new int[npnt];
-    ntc.bcpdcnt = new int[npnt];
-    ntc.bcptype = new int[npnt];
-#endif
-    for (auto tml = nt.tml; tml; tml = tml->next) {
-        int type = tml->index;
-        if (!corenrn.get_bbcore_read()[type]) {
-            continue;
-        }
-        int* iArray = nullptr;
-        double* dArray = nullptr;
-        int icnt, dcnt;
-        if (direct) {
-            (*nrn2core_get_dat2_corepointer_mech_)(nt.id, type, icnt, dcnt, iArray, dArray);
-        } else {
-            type = F.read_int();
-            icnt = F.read_int();
-            dcnt = F.read_int();
-            if (icnt) {
-                iArray = F.read_array<int>(icnt);
-            }
-            if (dcnt) {
-                dArray = F.read_array<double>(dcnt);
-            }
-        }
-        if (!corenrn.get_bbcore_write()[type] && nrn_checkpoint_arg_exists) {
-            fprintf(
-                stderr,
-                "Checkpoint is requested involving BBCOREPOINTER but there is no bbcore_write function for %s\n",
-                memb_func[type].sym);
-            assert(corenrn.get_bbcore_write()[type]);
-        }
-#if CHKPNTDEBUG
-        ntc.bcptype[i] = type;
-        ntc.bcpicnt[i] = icnt;
-        ntc.bcpdcnt[i] = dcnt;
-#endif
-        int ik = 0;
-        int dk = 0;
-        Memb_list* ml = nt._ml_list[type];
-        int dsz = nrn_prop_param_size_[type];
-        int pdsz = nrn_prop_dparam_size_[type];
-        int cntml = ml->nodecount;
-        int layout = corenrn.get_mech_data_layout()[type];
-        for (int j = 0; j < cntml; ++j) {
-            int jp = j;
-            if (ml->_permute) {
-                jp = ml->_permute[j];
-            }
-            double* d = ml->data;
-            Datum* pd = ml->pdata;
-            d += nrn_i_layout(jp, cntml, 0, dsz, layout);
-            pd += nrn_i_layout(jp, cntml, 0, pdsz, layout);
-            int aln_cntml = nrn_soa_padded_size(cntml, layout);
-            (*corenrn.get_bbcore_read()[type])(dArray, iArray, &dk, &ik, 0, aln_cntml, d, pd, ml->_thread,
-                                      &nt, 0.0);
-        }
-        assert(dk == dcnt);
-        assert(ik == icnt);
-        if (ik) {
-            delete[] iArray;
-        }
-        if (dk) {
-            delete[] dArray;
-        }
-    }
 
-    // VecPlayContinuous instances
-    // No attempt at memory efficiency
-    int n;
-    if (direct) {
-        (*nrn2core_get_dat2_vecplay_)(nt.id, n);
-    } else {
-        n = F.read_int();
-    }
-    nt.n_vecplay = n;
-    if (n) {
-        nt._vecplay = new void*[n];
+void read_phase2(NrnThread& nt, UserParams& userParams) {
+    Phase2 p2;
+    if (corenrn_embedded) {
+        p2.read_direct(nt.id, nt);
     } else {
-        nt._vecplay = nullptr;
-    }
-#if CHKPNTDEBUG
-    ntc.vecplay_ix = new int[n];
-    ntc.vtype = new int[n];
-    ntc.mtype = new int[n];
-#endif
-    for (int i = 0; i < n; ++i) {
-        int vtype, mtype, ix, sz;
-        double *yvec1, *tvec1;
-        if (direct) {
-            (*nrn2core_get_dat2_vecplay_inst_)(nt.id, i, vtype, mtype, ix, sz, yvec1, tvec1);
-        } else {
-            vtype = F.read_int();
-            mtype = F.read_int();
-            ix = F.read_int();
-            sz = F.read_int();
-        }
-        nrn_assert(vtype == VecPlayContinuousType);
-#if CHKPNTDEBUG
-        ntc.vtype[i] = vtype;
-#endif
-#if CHKPNTDEBUG
-        ntc.mtype[i] = mtype;
-#endif
-        Memb_list* ml = nt._ml_list[mtype];
-#if CHKPNTDEBUG
-        ntc.vecplay_ix[i] = ix;
-#endif
-        IvocVect* yvec = vector_new(sz);
-        IvocVect* tvec = vector_new(sz);
-        if (direct) {
-            double* py = vector_vec(yvec);
-            double* pt = vector_vec(tvec);
-            for (int j = 0; j < sz; ++j) {
-                py[j] = yvec1[j];
-                pt[j] = tvec1[j];
-            }
-            // yvec1 and tvec1 are not deleted as that space is within
-            // NEURON Vector
-        } else {
-            F.read_array<double>(vector_vec(yvec), sz);
-            F.read_array<double>(vector_vec(tvec), sz);
-        }
-        ix = nrn_param_layout(ix, mtype, ml);
-        if (ml->_permute) {
-            ix = nrn_index_permute(ix, mtype, ml);
-        }
-        nt._vecplay[i] = new VecPlayContinuous(ml->data + ix, yvec, tvec, nullptr, nt.id);
-    }
-    if (!direct) {
-        // store current checkpoint state to continue reading mapping
-        F.record_checkpoint();
-
-        // If not at end of file, then this must be a checkpoint and restore tqueue.
-        if (!F.eof()) {
-            checkpoint_restore_tqueue(nt, F);
-        }
-    }
-
-    // NetReceiveBuffering
-    for (auto& net_buf_receive : corenrn.get_net_buf_receive()) {
-        int type = net_buf_receive.second;
-        // Does this thread have this type.
-        Memb_list* ml = nt._ml_list[type];
-        if (ml) {  // needs a NetReceiveBuffer
-            NetReceiveBuffer_t* nrb =
-                (NetReceiveBuffer_t*)ecalloc_align(1, sizeof(NetReceiveBuffer_t));
-            ml->_net_receive_buffer = nrb;
-            nrb->_pnt_offset = pnt_offset[type];
-
-            // begin with a size of 5% of the number of instances
-            nrb->_size = ml->nodecount;
-            // or at least 8
-            nrb->_size = std::max(8, nrb->_size);
-            // but not more than nodecount
-            nrb->_size = std::min(ml->nodecount, nrb->_size);
-
-            nrb->_pnt_index = (int*)ecalloc_align(nrb->_size, sizeof(int));
-            nrb->_displ = (int*)ecalloc_align(nrb->_size + 1, sizeof(int));
-            nrb->_nrb_index = (int*)ecalloc_align(nrb->_size, sizeof(int));
-            nrb->_weight_index = (int*)ecalloc_align(nrb->_size, sizeof(int));
-            nrb->_nrb_t = (double*)ecalloc_align(nrb->_size, sizeof(double));
-            nrb->_nrb_flag = (double*)ecalloc_align(nrb->_size, sizeof(double));
-        }
-    }
-
-    // NetSendBuffering
-    for (int type : corenrn.get_net_buf_send_type()) {
-        // Does this thread have this type.
-        Memb_list* ml = nt._ml_list[type];
-        if (ml) {  // needs a NetSendBuffer
-            NetSendBuffer_t* nsb = (NetSendBuffer_t*)ecalloc_align(1, sizeof(NetSendBuffer_t));
-            ml->_net_send_buffer = nsb;
-
-            // begin with a size equal to twice number of instances
-            // at present there is no provision for dynamically increasing this.
-            nsb->_size = ml->nodecount * 2;
-            nsb->_cnt = 0;
-
-            nsb->_sendtype = (int*)ecalloc_align(nsb->_size, sizeof(int));
-            nsb->_vdata_index = (int*)ecalloc_align(nsb->_size, sizeof(int));
-            nsb->_pnt_index = (int*)ecalloc_align(nsb->_size, sizeof(int));
-            nsb->_weight_index = (int*)ecalloc_align(nsb->_size, sizeof(int));
-            // when == 1, NetReceiveBuffer_t is newly allocated (i.e. we need to free previous copy
-            // and recopy new data
-            nsb->reallocated = 1;
-            nsb->_nsb_t = (double*)ecalloc_align(nsb->_size, sizeof(double));
-            nsb->_nsb_flag = (double*)ecalloc_align(nsb->_size, sizeof(double));
-        }
+        p2.read_file(userParams.file_reader[nt.id], nt);
     }
+    p2.populate(nt, userParams);
 }
 
 /** read mapping information for neurons */
-void read_phase3(FileHandler& F, int imult, NrnThread& nt) {
-    (void)imult;
-
+void read_phase3(NrnThread& nt, UserParams& userParams) {
     /** restore checkpoint state (before restoring queue items */
+    auto& F = userParams.file_reader[nt.id];
     F.restore_checkpoint();
 
     /** mapping information for all neurons in single NrnThread */
diff --git a/coreneuron/io/nrn_setup.hpp b/coreneuron/io/nrn_setup.hpp
index c1c57ad13..212381d75 100644
--- a/coreneuron/io/nrn_setup.hpp
+++ b/coreneuron/io/nrn_setup.hpp
@@ -32,21 +32,15 @@ THE POSSIBILITY OF SUCH DAMAGE.
 #include <string>
 #include "coreneuron/sim/multicore.hpp"
 #include "coreneuron/io/nrn_filehandler.hpp"
+#include "coreneuron/io/nrn2core_direct.h"
+#include "coreneuron/io/user_params.hpp"
+#include "coreneuron/io/mem_layout_util.hpp"
 
 namespace coreneuron {
-static bool do_not_open;
-static int ngroup_w;
-static int* gidgroups_w;
-static int* imult_w;
-static const char* path_w;
-static const char* restore_path_w;
-static FileHandler* file_reader_w;
-static bool byte_swap_w;
-
-static void read_phase1(FileHandler& F, int imult, NrnThread& nt);
-static void read_phase2(FileHandler& F, int imult, NrnThread& nt);
-static void read_phase3(FileHandler& F, int imult, NrnThread& nt);
-static void read_phasegap(FileHandler& F, int imult, NrnThread& nt);
+static void read_phase1(NrnThread& nt, UserParams& userParams);
+static void read_phase2(NrnThread& nt, UserParams& userParams);
+static void read_phase3(NrnThread& nt, UserParams& userParams);
+static void read_phasegap(NrnThread& nt, UserParams& userParams);
 static void setup_ThreadData(NrnThread& nt);
 
 // Functions to load and clean data;
@@ -89,58 +83,56 @@ inline std::string getPhaseName<gap>() {
 
 /// Reading phase selector.
 template <phase P>
-inline void read_phase_aux(FileHandler& F, int imult, NrnThread& nt);
+inline void read_phase_aux(NrnThread& nt, UserParams&);
 
 template <>
-inline void read_phase_aux<one>(FileHandler& F, int imult, NrnThread& nt) {
-    read_phase1(F, imult, nt);
+inline void read_phase_aux<one>(NrnThread& nt, UserParams& userParams) {
+    read_phase1(nt, userParams);
 }
 
 template <>
-inline void read_phase_aux<two>(FileHandler& F, int imult, NrnThread& nt) {
-    read_phase2(F, imult, nt);
+inline void read_phase_aux<two>(NrnThread& nt, UserParams& userParams) {
+    read_phase2(nt, userParams);
 }
 
 template <>
-inline void read_phase_aux<three>(FileHandler& F, int imult, NrnThread& nt) {
-    read_phase3(F, imult, nt);
+inline void read_phase_aux<three>(NrnThread& nt, UserParams& userParams) {
+    read_phase3(nt, userParams);
 }
 
 template <>
-inline void read_phase_aux<gap>(FileHandler& F, int imult, NrnThread& nt) {
-    read_phasegap(F, imult, nt);
+inline void read_phase_aux<gap>(NrnThread& nt, UserParams& userParams) {
+    read_phasegap(nt, userParams);
 }
 
 /// Reading phase wrapper for each neuron group.
 template <phase P>
-inline void* phase_wrapper_w(NrnThread* nt) {
-    bool no_open = do_not_open;
+inline void* phase_wrapper_w(NrnThread* nt, UserParams& userParams, bool in_memory_transfer) {
     int i = nt->id;
-    char fnamebuf[1000];
-    if (i < ngroup_w) {
-        if (!no_open) {
-            const char* data_dir = path_w;
+    if (i < userParams.ngroup) {
+        if (!in_memory_transfer) {
+            const char* data_dir = userParams.path;
             // directory to read could be different for phase 2 if we are restoring
             // all other phases still read from dataset directory because the data
             // is constant
             if (P == 2) {
-                data_dir = restore_path_w;
+                data_dir = userParams.restore_path;
             }
 
-            std::string fname = std::string(data_dir) + "/" + std::to_string(gidgroups_w[i]) + "_" + getPhaseName<P>() + ".dat";
+            std::string fname = std::string(data_dir) + "/" + std::to_string(userParams.gidgroups[i]) + "_" + getPhaseName<P>() + ".dat";
 
             // Avoid trying to open the gid_gap.dat file if it doesn't exist when there are no
             // gap junctions in this gid
-            if (P == gap && !file_reader_w[i].file_exist(fname.c_str())) {
-                file_reader_w[i].close();
+            if (P == gap && !userParams.file_reader[i].file_exist(fname)) {
+                userParams.file_reader[i].close();
             } else {
                 // if no file failed to open or not opened at all
-                file_reader_w[i].open(fname.c_str(), byte_swap_w);
+                userParams.file_reader[i].open(fname);
             }
         }
-        read_phase_aux<P>(file_reader_w[i], imult_w[i], *nt);
-        if (!no_open) {
-            file_reader_w[i].close();
+        read_phase_aux<P>(*nt, userParams);
+        if (!in_memory_transfer) {
+            userParams.file_reader[i].close();
         }
         if (P == 2) {
             setup_ThreadData(*nt);
@@ -151,14 +143,8 @@ inline void* phase_wrapper_w(NrnThread* nt) {
 
 /// Specific phase reading executed by threads.
 template <phase P>
-inline static void phase_wrapper(int direct = 0) {
-    if (direct) {
-        do_not_open = true;
-    }
-    nrn_multithread_job(phase_wrapper_w<P>);
-    if (direct) {
-        do_not_open = false;
-    }
+inline static void phase_wrapper(UserParams& userParams, int direct = 0) {
+    nrn_multithread_job(phase_wrapper_w<P>, userParams, direct != 0);
 }
 }  // namespace coreneuron
 }  // namespace coreneuron
diff --git a/coreneuron/io/phase1.cpp b/coreneuron/io/phase1.cpp
new file mode 100644
index 000000000..422ceb3d1
--- /dev/null
+++ b/coreneuron/io/phase1.cpp
@@ -0,0 +1,184 @@
+#include <cassert>
+#include <mutex>
+
+#include "coreneuron/nrniv/nrniv_decl.h"
+#include "coreneuron/sim/multicore.hpp"
+#include "coreneuron/io/phase1.hpp"
+#include "coreneuron/utils/nrnoc_aux.hpp"
+
+int (*nrn2core_get_dat1_)(int tid,
+                          int& n_presyn,
+                          int& n_netcon,
+                          int*& output_gid,
+                          int*& netcon_srcgid);
+
+namespace coreneuron {
+void Phase1::read_file(FileHandler& F) {
+    assert(!F.fail());
+    int n_presyn = F.read_int();  /// Number of PreSyn-s in NrnThread nt
+    int n_netcon = F.read_int();  /// Number of NetCon-s in NrnThread nt
+
+    this->output_gids = F.read_vector<int>(n_presyn);
+    this->netcon_srcgids = F.read_vector<int>(n_netcon);
+
+    F.close();
+}
+
+void Phase1::read_direct(int thread_id) {
+    int* output_gids;
+    int* netcon_srcgid;
+    int n_presyn;
+    int n_netcon;
+
+    // TODO : check error codes for NEURON - CoreNEURON communication
+    int valid =
+        (*nrn2core_get_dat1_)(thread_id, n_presyn, n_netcon, output_gids, netcon_srcgid);
+    if (!valid) {
+        return;
+    }
+
+    this->output_gids = std::vector<int>(output_gids, output_gids + n_presyn);
+    delete[] output_gids;
+    this->netcon_srcgids = std::vector<int>(netcon_srcgid, netcon_srcgid + n_netcon);
+    delete[] netcon_srcgid;
+}
+
+void Phase1::shift_gids(int imult) {
+    // maxgid is the maxgid to have the different multiple in the same gid space.
+    int maxgid = 0x7fffffff / nrn_setup_multiple;
+    // this value is the beginning of the new cluster of gids.
+    // the correct cluster is choose with imult.
+    int offset_gids = imult * maxgid;  // offset for each gid
+
+    // offset the (non-negative) gids according to multiple
+    // make sure everything fits into gid space.
+    for (auto& gid: this->output_gids) {
+        if (gid >= 0) {
+            nrn_assert(gid < maxgid);
+            gid += offset_gids;
+        }
+    }
+
+    for (auto& srcgid: this->netcon_srcgids) {
+        if (srcgid >= 0) {
+            nrn_assert(srcgid < maxgid);
+            srcgid += offset_gids;
+        }
+    }
+}
+
+void Phase1::add_extracon(NrnThread& nt, int imult) {
+    int maxgid = 0x7fffffff / nrn_setup_multiple;
+    if (nrn_setup_extracon <= 0) {
+        return;
+    }
+
+    // very simplistic
+    // Use this threads positive source gids - zz in nt.netcon order as the
+    // source gids for extracon.
+    // The edge cases are:
+    // The 0th duplicate uses uses source gids for the last duplicate.
+    // If there are fewer positive source gids than extracon, then keep
+    // rotating through the nt.netcon .
+    // If there are no positive source gids, use a source gid of -1.
+    // Would not be difficult to modify so that random positive source was
+    // used, and/or random connect to another duplicate.
+    // Note that we increment the nt.n_netcon at the end of this function.
+    int sidoffset = 0;  // how much to increment the corresponding positive gid
+    // like ring connectivity
+    if (imult > 0) {
+        sidoffset = -maxgid;
+    } else if (nrn_setup_multiple > 1) {
+        sidoffset = (nrn_setup_multiple - 1) * maxgid;
+    }
+    // set up the extracon srcgid_
+    int* nc_srcgid = netcon_srcgid[nt.id];
+    int j = 0;  // rotate through the n_netcon netcon_srcgid
+    for (int i = 0; i < nrn_setup_extracon; ++i) {
+        int sid = -1;
+        for (int k = 0; k < nt.n_netcon; ++k) {
+            // potentially rotate j through the entire n_netcon but no further
+            sid = nc_srcgid[j];
+            j = (j + 1) % nt.n_netcon;
+            if (sid >= 0) {
+                break;
+            }
+        }
+        if (sid < 0) {  // only connect to real cells.
+            sid = -1;
+        } else {
+            sid += sidoffset;
+        }
+        nc_srcgid[nt.n_netcon + i] = sid;
+    }
+    // finally increment the n_netcon
+    nt.n_netcon += nrn_setup_extracon;
+}
+
+#ifdef _OPENMP
+void Phase1::populate(NrnThread& nt, int imult, OMP_Mutex& mut) {
+#else
+void Phase1::populate(NrnThread& nt, int imult) {
+#endif
+    nt.n_presyn = this->output_gids.size();
+    nt.n_netcon = this->netcon_srcgids.size();
+
+    shift_gids(imult);
+
+    netcon_srcgid[nt.id] = new int[nt.n_netcon + nrn_setup_extracon];
+    std::copy(this->netcon_srcgids.begin(), this->netcon_srcgids.end(),
+              netcon_srcgid[nt.id]);
+
+    nt.netcons = new NetCon[nt.n_netcon + nrn_setup_extracon];
+    nt.presyns_helper = (PreSynHelper*)ecalloc_align(nt.n_presyn, sizeof(PreSynHelper));
+
+    nt.presyns = new PreSyn[nt.n_presyn];
+    PreSyn* ps = nt.presyns;
+    /// go through all presyns
+    for (auto& gid: this->output_gids) {
+        if (gid == -1) {
+            ++ps;
+            continue;
+        }
+
+        {
+#ifdef _OPENMP
+            const std::lock_guard<OMP_Mutex> lock(mut);
+#endif
+            // Note that the negative (type, index)
+            // coded information goes into the neg_gid2out[tid] hash table.
+            // See netpar.cpp for the netpar_tid_... function implementations.
+            // Both that table and the process wide gid2out table can be deleted
+            // before the end of setup
+
+            /// Put gid into the gid2out hash table with correspondent output PreSyn
+            /// Or to the negative PreSyn map
+            if (gid >= 0) {
+                char m[200];
+                if (gid2in.find(gid) != gid2in.end()) {
+                    sprintf(m, "gid=%d already exists as an input port", gid);
+                    hoc_execerror(
+                        m,
+                        "Setup all the output ports on this process before using them as input ports.");
+                }
+                if (gid2out.find(gid) != gid2out.end()) {
+                    sprintf(m, "gid=%d already exists on this process as an output port", gid);
+                    hoc_execerror(m, 0);
+                }
+                ps->gid_ = gid;
+                ps->output_index_ = gid;
+                gid2out[gid] = ps;
+            } else {
+                nrn_assert(neg_gid2out[nt.id].find(gid) == neg_gid2out[nt.id].end());
+                ps->output_index_ = -1;
+                neg_gid2out[nt.id][gid] = ps;
+            }
+        } // end of the mutex
+
+        ++ps;
+    }
+
+    add_extracon(nt, imult);
+}
+
+}  // namespace coreneuron
diff --git a/coreneuron/io/phase1.hpp b/coreneuron/io/phase1.hpp
new file mode 100644
index 000000000..65a9034ba
--- /dev/null
+++ b/coreneuron/io/phase1.hpp
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <vector>
+
+#include "coreneuron/io/nrn_filehandler.hpp"
+#include "coreneuron/utils/nrnmutdec.h"
+
+namespace coreneuron {
+
+struct NrnThread;
+
+class Phase1 {
+    public:
+    void read_file(FileHandler& F);
+    void read_direct(int thread_id);
+#ifdef _OPENMP
+    void populate(NrnThread& nt, int imult, OMP_Mutex& mut);
+#else
+    void populate(NrnThread& nt, int imult);
+#endif
+
+    private:
+    void shift_gids(int imult);
+    void add_extracon(NrnThread& nt, int imult);
+
+    std::vector<int> output_gids;
+    std::vector<int> netcon_srcgids;
+};
+
+}  // namespace coreneuron
diff --git a/coreneuron/io/phase2.cpp b/coreneuron/io/phase2.cpp
new file mode 100644
index 000000000..445d612a5
--- /dev/null
+++ b/coreneuron/io/phase2.cpp
@@ -0,0 +1,1243 @@
+#include "coreneuron/io/phase2.hpp"
+#include "coreneuron/coreneuron.hpp"
+#include "coreneuron/sim/multicore.hpp"
+#include "coreneuron/io/nrn_checkpoint.hpp"
+#include "coreneuron/utils/nrnoc_aux.hpp"
+#include "coreneuron/network/partrans.hpp"
+#include "coreneuron/permute/cellorder.hpp"
+#include "coreneuron/permute/node_permute.h"
+#include "coreneuron/utils/vrecitem.h"
+#include "coreneuron/io/mem_layout_util.hpp"
+
+int (*nrn2core_get_dat2_1_)(int tid,
+                            int& ngid,
+                            int& n_real_gid,
+                            int& nnode,
+                            int& ndiam,
+                            int& nmech,
+                            int*& tml_index,
+                            int*& ml_nodecount,
+                            int& nidata,
+                            int& nvdata,
+                            int& nweight);
+
+int (*nrn2core_get_dat2_2_)(int tid,
+                            int*& v_parent_index,
+                            double*& a,
+                            double*& b,
+                            double*& area,
+                            double*& v,
+                            double*& diamvec);
+
+int (*nrn2core_get_dat2_mech_)(int tid,
+                               size_t i,
+                               int dsz_inst,
+                               int*& nodeindices,
+                               double*& data,
+                               int*& pdata);
+
+int (*nrn2core_get_dat2_3_)(int tid,
+                            int nweight,
+                            int*& output_vindex,
+                            double*& output_threshold,
+                            int*& netcon_pnttype,
+                            int*& netcon_pntindex,
+                            double*& weights,
+                            double*& delays);
+
+int (*nrn2core_get_dat2_corepointer_)(int tid, int& n);
+
+int (*nrn2core_get_dat2_corepointer_mech_)(int tid,
+                                           int type,
+                                           int& icnt,
+                                           int& dcnt,
+                                           int*& iarray,
+                                           double*& darray);
+
+int (*nrn2core_get_dat2_vecplay_)(int tid, int& n);
+
+int (*nrn2core_get_dat2_vecplay_inst_)(int tid,
+                                       int i,
+                                       int& vptype,
+                                       int& mtype,
+                                       int& ix,
+                                       int& sz,
+                                       double*& yvec,
+                                       double*& tvec);
+
+namespace coreneuron {
+template <typename T>
+inline void mech_data_layout_transform(T* data, int cnt, int sz, int layout) {
+    if (layout == Layout::AoS) {
+        return;
+    }
+    // layout is equal to Layout::SoA
+    int align_cnt = nrn_soa_padded_size(cnt, layout);
+    std::vector<T> d(cnt * sz);
+    // copy matrix
+    for (int i = 0; i < cnt; ++i) {
+        for (int j = 0; j < sz; ++j) {
+            d[i * sz + j] = data[i * sz + j];
+        }
+    }
+    // transform memory layout
+    for (int i = 0; i < cnt; ++i) {
+        for (int j = 0; j < sz; ++j) {
+            data[i + j * align_cnt] = d[i * sz + j];
+        }
+    }
+}
+
+void Phase2::read_file(FileHandler& F, const NrnThread& nt) {
+    n_output = F.read_int();
+    n_real_output = F.read_int();
+    n_node = F.read_int();
+    n_diam = F.read_int();
+    n_mech = F.read_int();
+    mech_types = std::vector<int>(n_mech, 0);
+    nodecounts = std::vector<int>(n_mech, 0);
+    for (int i = 0; i < n_mech; ++i) {
+        mech_types[i] = F.read_int();
+        nodecounts[i] = F.read_int();
+    }
+
+    // check mechanism compatibility before reading data
+    check_mechanism();
+
+    n_idata = F.read_int();
+    n_vdata = F.read_int();
+    int n_weight = F.read_int();
+    v_parent_index = (int*)ecalloc_align(n_node, sizeof(int));
+    F.read_array<int>(v_parent_index, n_node);
+
+    int n_data_padded = nrn_soa_padded_size(n_node, MATRIX_LAYOUT);
+    {
+        { // Compute size of _data and allocate
+            int n_data = 6 * n_data_padded;
+            if (n_diam > 0) {
+                n_data += n_data_padded;
+            }
+            for (size_t i = 0; i < n_mech; ++i) {
+                int layout = corenrn.get_mech_data_layout()[mech_types[i]];
+                int n = nodecounts[i];
+                int sz = corenrn.get_prop_param_size()[mech_types[i]];
+                n_data = nrn_soa_byte_align(n_data);
+                n_data += nrn_soa_padded_size(n, layout) * sz;
+            }
+            _data = (double*)ecalloc_align(n_data, sizeof(double));
+        }
+        F.read_array<double>(_data + 2 * n_data_padded, n_node);
+        F.read_array<double>(_data + 3 * n_data_padded, n_node);
+        F.read_array<double>(_data + 5 * n_data_padded, n_node);
+        F.read_array<double>(_data + 4 * n_data_padded, n_node);
+        if (n_diam > 0) {
+            F.read_array<double>(_data + 6 * n_data_padded, n_node);
+        }
+    }
+
+    size_t offset = 6 * n_data_padded;
+    if (n_diam > 0) {
+        offset += n_data_padded;
+    }
+    for (size_t i = 0; i < n_mech; ++i) {
+        int layout = corenrn.get_mech_data_layout()[mech_types[i]];
+        int n = nodecounts[i];
+        int sz = corenrn.get_prop_param_size()[mech_types[i]];
+        int dsz = corenrn.get_prop_dparam_size()[mech_types[i]];
+        offset = nrn_soa_byte_align(offset);
+        std::vector<int> nodeindices;
+        if (!corenrn.get_is_artificial()[mech_types[i]]) {
+            nodeindices = F.read_vector<int>(n);
+        }
+        F.read_array<double>(_data + offset, sz * n);
+        offset += nrn_soa_padded_size(n, layout) * sz;
+        std::vector<int> pdata;
+        if (dsz > 0) {
+            pdata = F.read_vector<int>(dsz * n);
+        }
+        tmls.emplace_back(TML{nodeindices, pdata, 0, {}, {}});
+    }
+    output_vindex = F.read_vector<int>(nt.n_presyn);
+    output_threshold = F.read_vector<double>(n_real_output);
+    pnttype = F.read_vector<int>(nt.n_netcon - nrn_setup_extracon);
+    pntindex = F.read_vector<int>(nt.n_netcon - nrn_setup_extracon);
+    weights = F.read_vector<double>(n_weight);
+    delay = F.read_vector<double>(nt.n_netcon);
+    num_point_process = F.read_int();
+
+    for (size_t i = 0; i < n_mech; ++i) {
+        if (!corenrn.get_bbcore_read()[mech_types[i]]) {
+            continue;
+        }
+        tmls[i].type = F.read_int();
+        int icnt = F.read_int();
+        int dcnt = F.read_int();
+        if (icnt > 0) {
+            tmls[i].iArray = F.read_vector<int>(icnt);
+        }
+        if (dcnt > 0) {
+            tmls[i].dArray = F.read_vector<double>(dcnt);
+        }
+    }
+
+    int n_vec_play_continuous = F.read_int();
+    vec_play_continuous.reserve(n_vec_play_continuous);
+    for (size_t i = 0; i < n_vec_play_continuous; ++i) {
+        VecPlayContinuous_ item;
+        item.vtype = F.read_int();
+        item.mtype = F.read_int();
+        item.ix = F.read_int();
+        int sz = F.read_int();
+        item.yvec = IvocVect(sz);
+        item.tvec = IvocVect(sz);
+        F.read_array<double>(item.yvec.data(), sz);
+        F.read_array<double>(item.tvec.data(), sz);
+        vec_play_continuous.push_back(std::move(item));
+    }
+
+    // store current checkpoint state to continue reading mapping
+    // The checkpoint numbering in phase 3 is a continuing of phase 2, and so will be restored
+    F.record_checkpoint();
+
+    if (F.eof())
+        return;
+
+    nrn_assert(F.read_int() == n_vec_play_continuous);
+
+    for (int i = 0; i < n_vec_play_continuous; ++i) {
+        auto &vecPlay = vec_play_continuous[i];
+        vecPlay.last_index = F.read_int();
+        vecPlay.discon_index = F.read_int();
+        vecPlay.ubound_index = F.read_int();
+    }
+
+    patstim_index = F.read_int();
+
+    nrn_assert(F.read_int() == -1);
+
+    for (int i = 0; i < nt.n_presyn; ++i) {
+        preSynConditionEventFlags.push_back(F.read_int());
+    }
+
+    assert(F.read_int() == -1);
+    restore_events(F);
+
+    assert(F.read_int() == -1);
+    restore_events(F);
+}
+
+void Phase2::read_direct(int thread_id, const NrnThread& nt) {
+    int* types_ = nullptr;
+    int* nodecounts_ = nullptr;
+    int n_weight;
+    (*nrn2core_get_dat2_1_)(thread_id, n_output, n_real_output, n_node, n_diam, n_mech, types_,
+            nodecounts_, n_idata, n_vdata, n_weight);
+    mech_types = std::vector<int>(types_, types_ + n_mech);
+    delete[] types_;
+
+    nodecounts = std::vector<int>(nodecounts_, nodecounts_ + n_mech);
+    delete[] nodecounts_;
+
+
+    // TODO: fix it in the future
+    int n_data_padded = nrn_soa_padded_size(n_node, MATRIX_LAYOUT);
+    int n_data = 6 * n_data_padded;
+    if (n_diam > 0) {
+        n_data += n_data_padded;
+    }
+    for (int i = 0; i < n_mech; ++i) {
+        int layout = corenrn.get_mech_data_layout()[mech_types[i]];
+        int n = nodecounts[i];
+        int sz = corenrn.get_prop_param_size()[mech_types[i]];
+        n_data = nrn_soa_byte_align(n_data);
+        n_data += nrn_soa_padded_size(n, layout) * sz;
+    }
+    _data = (double*)ecalloc_align(n_data, sizeof(double));
+
+    v_parent_index = (int*)ecalloc_align(n_node, sizeof(int));
+    double* actual_a = _data + 2 * n_data_padded;
+    double* actual_b = _data + 3 * n_data_padded;
+    double* actual_v = _data + 4 * n_data_padded;
+    double* actual_area = _data + 5 * n_data_padded;
+    double* actual_diam = n_diam > 0 ? _data + 6 * n_data_padded : nullptr;
+    (*nrn2core_get_dat2_2_)(thread_id, v_parent_index, actual_a, actual_b, actual_area, actual_v, actual_diam);
+
+    tmls.resize(n_mech);
+
+    auto& param_sizes = corenrn.get_prop_param_size();
+    auto& dparam_sizes = corenrn.get_prop_dparam_size();
+    int dsz_inst = 0;
+    size_t offset = 6 * n_data_padded;
+    if (n_diam > 0) offset += n_data_padded;
+    for (size_t i = 0; i < n_mech; ++i) {
+        auto& tml = tmls[i];
+        int type = mech_types[i];
+        int layout = corenrn.get_mech_data_layout()[type];
+        offset = nrn_soa_byte_align(offset);
+
+        tml.type = type;
+        tml.nodeindices.resize(nodecounts[i]);
+        tml.pdata.resize(nodecounts[i] * dparam_sizes[type]);
+
+        int* nodeindices_ = nullptr;
+        double* data_ = _data + offset;
+        int* pdata_ = const_cast<int*>(tml.pdata.data());
+        (*nrn2core_get_dat2_mech_)(thread_id, i, dparam_sizes[type] > 0 ? dsz_inst : 0, nodeindices_, data_, pdata_);
+        if (dparam_sizes[type] > 0)
+            dsz_inst++;
+        offset += nrn_soa_padded_size(nodecounts[i], layout) * param_sizes[type];
+        if (nodeindices_) {
+            std::copy(nodeindices_, nodeindices_ + nodecounts[i], tml.nodeindices.data());
+            free_memory(nodeindices_);
+        }
+        if (corenrn.get_is_artificial()[type]) {
+            assert(nodeindices_ ==  nullptr);
+        }
+    }
+
+    int* output_vindex_ = nullptr;
+    double* output_threshold_ = nullptr;
+    int* pnttype_ = nullptr;
+    int* pntindex_ = nullptr;
+    double* weight_ = nullptr;
+    double* delay_ = nullptr;
+    (*nrn2core_get_dat2_3_)(thread_id, n_weight, output_vindex_, output_threshold_, pnttype_,
+            pntindex_, weight_, delay_);
+
+    output_vindex = std::vector<int>(output_vindex_, output_vindex_ + nt.n_presyn);
+    delete[] output_vindex_;
+
+    output_threshold = std::vector<double>(output_threshold_, output_threshold_ + n_real_output);
+    delete[] output_threshold_;
+
+    int n_netcon = nt.n_netcon - nrn_setup_extracon;
+    pnttype = std::vector<int>(pnttype_, pnttype_ + n_netcon);
+    delete[] pnttype_;
+
+    pntindex = std::vector<int>(pntindex_, pntindex_ + n_netcon);
+    delete[] pntindex_;
+
+    weights = std::vector<double>(weight_, weight_ + n_weight);
+    delete[] weight_;
+
+    delay = std::vector<double>(delay_, delay_ + n_netcon);
+    delete[] delay_;
+
+    (*nrn2core_get_dat2_corepointer_)(nt.id, num_point_process);
+
+    for (size_t i = 0; i < n_mech; ++i) {
+        // not all mod files have BBCOREPOINTER data to read
+        if (!corenrn.get_bbcore_read()[mech_types[i]]) {
+            continue;
+        }
+        int icnt;
+        int* iArray_ = nullptr;
+        int dcnt;
+        double* dArray_ = nullptr;
+        (*nrn2core_get_dat2_corepointer_mech_)(nt.id, tmls[i].type, icnt, dcnt, iArray_, dArray_);
+        tmls[i].iArray.resize(icnt);
+        std::copy(iArray_, iArray_ + icnt, tmls[i].iArray.begin());
+        delete[] iArray_;
+
+        tmls[i].dArray.resize(dcnt);
+        std::copy(dArray_, dArray_ + dcnt, tmls[i].dArray.begin());
+        delete[] dArray_;
+    }
+
+    int n_vec_play_continuous;
+    (*nrn2core_get_dat2_vecplay_)(thread_id, n_vec_play_continuous);
+
+    for (size_t i = 0; i < n_vec_play_continuous; ++i) {
+        VecPlayContinuous_ item;
+        // yvec_ and tvec_ are not deleted as that space is within
+        // NEURON Vector
+        double *yvec_, *tvec_;
+        int sz;
+        (*nrn2core_get_dat2_vecplay_inst_)(thread_id, i, item.vtype, item.mtype, item.ix, sz, yvec_, tvec_);
+        item.yvec = IvocVect(sz);
+        item.tvec = IvocVect(sz);
+        std::copy(yvec_, yvec_ + sz, item.yvec.data());
+        std::copy(tvec_, tvec_ + sz, item.tvec.data());
+        vec_play_continuous.push_back(std::move(item));
+    }
+}
+
+/// Check if MOD file used between NEURON and CoreNEURON is same
+void Phase2::check_mechanism() {
+    int diff_mech_count = 0;
+    for (int i = 0; i < n_mech; ++i) {
+        if (std::any_of(corenrn.get_different_mechanism_type().begin(),
+                    corenrn.get_different_mechanism_type().end(),
+                    [&](int e) { return e == mech_types[i]; })) {
+            if (nrnmpi_myid == 0) {
+                printf("Error: %s is a different MOD file than used by NEURON!\n",
+                        nrn_get_mechname(mech_types[i]));
+            }
+            diff_mech_count++;
+        }
+    }
+
+    if (diff_mech_count > 0) {
+        if (nrnmpi_myid == 0) {
+            printf(
+                    "Error : NEURON and CoreNEURON must use same mod files for compatibility, %d different mod file(s) found. Re-compile special and special-core!\n",
+                    diff_mech_count);
+            nrn_abort(1);
+        }
+    }
+
+}
+
+/// Perform in memory transformation between AoS<>SoA for integer data
+void Phase2::transform_int_data(int elem0, int nodecount, int* pdata, int i, int dparam_size, int layout, int n_node_) {
+    for (int iml = 0; iml < nodecount; ++iml) {
+        int* pd = pdata + nrn_i_layout(iml, nodecount, i, dparam_size, layout);
+        int ix = *pd;  // relative to beginning of _actual_*
+        nrn_assert((ix >= 0) && (ix < n_node_));
+        *pd = elem0 + ix;  // relative to nt._data
+    }
+}
+
+NrnThreadMembList* Phase2::create_tml(int mech_id, Memb_func& memb_func, int& shadow_rhs_cnt) {
+    auto tml = (NrnThreadMembList*)emalloc_align(sizeof(NrnThreadMembList));
+    tml->next = nullptr;
+    tml->index = mech_types[mech_id];
+
+    tml->ml = (Memb_list*)ecalloc_align(1, sizeof(Memb_list));
+    tml->ml->_net_receive_buffer = nullptr;
+    tml->ml->_net_send_buffer = nullptr;
+    tml->ml->_permute = nullptr;
+    if (memb_func.alloc == nullptr) {
+        hoc_execerror(memb_func.sym, "mechanism does not exist");
+    }
+    tml->ml->nodecount = nodecounts[mech_id];
+    if (!memb_func.sym) {
+        printf("%s (type %d) is not available\n", nrn_get_mechname(tml->index), tml->index);
+        exit(1);
+    }
+    tml->ml->_nodecount_padded =
+        nrn_soa_padded_size(tml->ml->nodecount, corenrn.get_mech_data_layout()[tml->index]);
+    if (memb_func.is_point && corenrn.get_is_artificial()[tml->index] == 0) {
+        // Avoid race for multiple PointProcess instances in same compartment.
+        if (tml->ml->nodecount > shadow_rhs_cnt) {
+            shadow_rhs_cnt = tml->ml->nodecount;
+        }
+    }
+
+    return tml;
+}
+
+void Phase2::set_net_send_buffer(Memb_list** ml_list, const std::vector<int>& pnt_offset) {
+    // NetReceiveBuffering
+    for (auto& net_buf_receive : corenrn.get_net_buf_receive()) {
+        int type = net_buf_receive.second;
+        // Does this thread have this type.
+        Memb_list* ml = ml_list[type];
+        if (ml) {  // needs a NetReceiveBuffer
+            NetReceiveBuffer_t* nrb =
+                (NetReceiveBuffer_t*)ecalloc_align(1, sizeof(NetReceiveBuffer_t));
+            ml->_net_receive_buffer = nrb;
+            nrb->_pnt_offset = pnt_offset[type];
+
+            // begin with a size of 5% of the number of instances
+            nrb->_size = ml->nodecount;
+            // or at least 8
+            nrb->_size = std::max(8, nrb->_size);
+            // but not more than nodecount
+            nrb->_size = std::min(ml->nodecount, nrb->_size);
+
+            nrb->_pnt_index = (int*)ecalloc_align(nrb->_size, sizeof(int));
+            nrb->_displ = (int*)ecalloc_align(nrb->_size + 1, sizeof(int));
+            nrb->_nrb_index = (int*)ecalloc_align(nrb->_size, sizeof(int));
+            nrb->_weight_index = (int*)ecalloc_align(nrb->_size, sizeof(int));
+            nrb->_nrb_t = (double*)ecalloc_align(nrb->_size, sizeof(double));
+            nrb->_nrb_flag = (double*)ecalloc_align(nrb->_size, sizeof(double));
+        }
+    }
+
+    // NetSendBuffering
+    for (int type: corenrn.get_net_buf_send_type()) {
+        // Does this thread have this type.
+        Memb_list* ml = ml_list[type];
+        if (ml) {  // needs a NetSendBuffer
+            NetSendBuffer_t* nsb = (NetSendBuffer_t*)ecalloc_align(1, sizeof(NetSendBuffer_t));
+            ml->_net_send_buffer = nsb;
+
+            // begin with a size equal to twice number of instances
+            // at present there is no provision for dynamically increasing this.
+            nsb->_size = ml->nodecount * 2;
+            nsb->_cnt = 0;
+
+            nsb->_sendtype = (int*)ecalloc_align(nsb->_size, sizeof(int));
+            nsb->_vdata_index = (int*)ecalloc_align(nsb->_size, sizeof(int));
+            nsb->_pnt_index = (int*)ecalloc_align(nsb->_size, sizeof(int));
+            nsb->_weight_index = (int*)ecalloc_align(nsb->_size, sizeof(int));
+            // when == 1, NetReceiveBuffer_t is newly allocated (i.e. we need to free previous copy
+            // and recopy new data
+            nsb->reallocated = 1;
+            nsb->_nsb_t = (double*)ecalloc_align(nsb->_size, sizeof(double));
+            nsb->_nsb_flag = (double*)ecalloc_align(nsb->_size, sizeof(double));
+        }
+    }
+}
+
+void Phase2::restore_events(FileHandler& F) {
+    int type;
+    while ((type = F.read_int()) != 0) {
+        double time;
+        F.read_array(&time, 1);
+        switch (type) {
+            case NetConType: {
+                auto event = std::make_shared<NetConType_>();
+                event->time = time;
+                event->netcon_index = F.read_int();
+                events.emplace_back(type, event);
+                break;
+            }
+            case SelfEventType: {
+                auto event = std::make_shared<SelfEventType_>();
+                event->time = time;
+                event->target_type = F.read_int();
+                event->point_proc_instance = F.read_int();
+                event->target_instance = F.read_int();
+                F.read_array(&event->flag, 1);
+                event->movable = F.read_int();
+                event->weight_index = F.read_int();
+                events.emplace_back(type, event);
+                break;
+            }
+            case PreSynType: {
+                auto event = std::make_shared<PreSynType_>();
+                event->time = time;
+                event->presyn_index = F.read_int();
+                events.emplace_back(type, event);
+                break;
+            }
+            case NetParEventType: {
+                auto event = std::make_shared<NetParEvent_>();
+                event->time = time;
+                events.emplace_back(type, event);
+                break;
+            }
+            case PlayRecordEventType: {
+                auto event = std::make_shared<PlayRecordEventType_>();
+                event->time = time;
+                event->play_record_type = F.read_int();
+                if (event->play_record_type == VecPlayContinuousType) {
+                    event->vecplay_index = F.read_int();
+                    events.emplace_back(type, event);
+                } else {
+                    nrn_assert(0);
+                }
+                break;
+            }
+            default: {
+                nrn_assert(0);
+                break;
+            }
+        }
+    }
+}
+
+void Phase2::fill_before_after_lists(NrnThread& nt, const std::vector<Memb_func>& memb_func) {
+    /// Fill the BA lists
+    std::vector<BAMech*> before_after_map(memb_func.size());
+    for (int i = 0; i < BEFORE_AFTER_SIZE; ++i) {
+        for (size_t ii = 0; ii < memb_func.size(); ++ii) {
+            before_after_map[ii] = nullptr;
+        }
+        for (auto bam = corenrn.get_bamech()[i]; bam; bam = bam->next) {
+            before_after_map[bam->type] = bam;
+        }
+        /* unnecessary but keep in order anyway */
+        NrnThreadBAList **ptbl = nt.tbl + i;
+        for (auto tml = nt.tml; tml; tml = tml->next) {
+            if (before_after_map[tml->index]) {
+                auto tbl = (NrnThreadBAList*)emalloc(sizeof(NrnThreadBAList));
+                tbl->next = nullptr;
+                tbl->bam = before_after_map[tml->index];
+                tbl->ml = tml->ml;
+                *ptbl = tbl;
+                ptbl = &(tbl->next);
+            }
+        }
+    }
+}
+
+void Phase2::pdata_relocation(const NrnThread& nt, const std::vector<Memb_func>& memb_func) {
+    // Some pdata may index into data which has been reordered from AoS to
+    // SoA. The four possibilities are if semantics is -1 (area), -5 (pointer),
+    // -9 (diam), // or 0-999 (ion variables).
+    // Note that pdata has a layout and the // type block in nt.data into which
+    // it indexes, has a layout.
+    for (auto tml = nt.tml; tml; tml = tml->next) {
+        int type = tml->index;
+        int layout = corenrn.get_mech_data_layout()[type];
+        int* pdata = tml->ml->pdata;
+        int cnt = tml->ml->nodecount;
+        int szdp = corenrn.get_prop_dparam_size()[type];
+        int* semantics = memb_func[type].dparam_semantics;
+
+        // compute only for ARTIFICIAL_CELL (has useful area pointer with semantics=-1)
+        if (!corenrn.get_is_artificial()[type]) {
+            if (szdp) {
+                if (!semantics)
+                    continue;  // temporary for HDFReport, Binreport which will be skipped in
+                // bbcore_write of HBPNeuron
+                nrn_assert(semantics);
+            }
+
+            for (int i = 0; i < szdp; ++i) {
+                int s = semantics[i];
+                switch(s) {
+                  case -1: // area
+                    transform_int_data(nt._actual_area - nt._data, cnt, pdata, i, szdp, layout, nt.end);
+                    break;
+                  case -9: // diam
+                    transform_int_data(nt._actual_diam - nt._data, cnt, pdata, i, szdp, layout, nt.end);
+                    break;
+                  case -5: // pointer assumes a pointer to membrane voltage
+                    transform_int_data(nt._actual_v - nt._data, cnt, pdata, i, szdp, layout, nt.end);
+                    break;
+                  default:
+                    if (s >= 0 && s < 1000) {  // ion
+                        int etype = s;
+                        /* if ion is SoA, must recalculate pdata values */
+                        /* if ion is AoS, have to deal with offset */
+                        Memb_list* eml = nt._ml_list[etype];
+                        int edata0 = eml->data - nt._data;
+                        int ecnt = eml->nodecount;
+                        int esz = corenrn.get_prop_param_size()[etype];
+                        for (int iml = 0; iml < cnt; ++iml) {
+                            int* pd = pdata + nrn_i_layout(iml, cnt, i, szdp, layout);
+                            int ix = *pd;  // relative to the ion data
+                            nrn_assert((ix >= 0) && (ix < ecnt * esz));
+                            /* Original pd order assumed ecnt groups of esz */
+                            *pd = edata0 + nrn_param_layout(ix, etype, eml);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void Phase2::set_dependencies(const NrnThread& nt, const std::vector<Memb_func>& memb_func) {
+    /* here we setup the mechanism dependencies. if there is a mechanism dependency
+     * then we allocate an array for tml->dependencies otherwise set it to nullptr.
+     * In order to find out the "real" dependencies i.e. dependent mechanism
+     * exist at the same compartment, we compare the nodeindices of mechanisms
+     * returned by nrn_mech_depend.
+     */
+
+    /* temporary array for dependencies */
+    int* mech_deps = (int*)ecalloc(memb_func.size(), sizeof(int));
+
+    for (auto tml = nt.tml; tml; tml = tml->next) {
+        /* initialize to null */
+        tml->dependencies = nullptr;
+        tml->ndependencies = 0;
+
+        /* get dependencies from the models */
+        int deps_cnt = nrn_mech_depend(tml->index, mech_deps);
+
+        /* if dependencies, setup dependency array */
+        if (deps_cnt) {
+            /* store "real" dependencies in the vector */
+            std::vector<int> actual_mech_deps;
+
+            Memb_list* ml = tml->ml;
+            int* nodeindices = ml->nodeindices;
+
+            /* iterate over dependencies */
+            for (int j = 0; j < deps_cnt; j++) {
+                /* memb_list of dependency mechanism */
+                Memb_list* dml = nt._ml_list[mech_deps[j]];
+
+                /* dependency mechanism may not exist in the model */
+                if (!dml)
+                    continue;
+
+                /* take nodeindices for comparison */
+                int* dnodeindices = dml->nodeindices;
+
+                /* set_intersection function needs temp vector to push the common values */
+                std::vector<int> node_intersection;
+
+                /* make sure they have non-zero nodes and find their intersection */
+                if ((ml->nodecount > 0) && (dml->nodecount > 0)) {
+                    std::set_intersection(nodeindices, nodeindices + ml->nodecount, dnodeindices,
+                            dnodeindices + dml->nodecount,
+                            std::back_inserter(node_intersection));
+                }
+
+                /* if they intersect in the nodeindices, it's real dependency */
+                if (!node_intersection.empty()) {
+                    actual_mech_deps.push_back(mech_deps[j]);
+                }
+            }
+
+            /* copy actual_mech_deps to dependencies */
+            if (!actual_mech_deps.empty()) {
+                tml->ndependencies = actual_mech_deps.size();
+                tml->dependencies = (int*)ecalloc(actual_mech_deps.size(), sizeof(int));
+                std::copy(actual_mech_deps.begin(), actual_mech_deps.end(), tml->dependencies);
+            }
+        }
+    }
+
+    /* free temp dependency array */
+    free(mech_deps);
+}
+
+void Phase2::handle_extracon(NrnThread& nt, int n_netcon, const std::vector<int>& pnt_offset) {
+    int extracon_target_nweight = 0;
+    if (nrn_setup_extracon > 0) {
+        // Fill in the extracon target_ and active_.
+        // Simplistic.
+        // Rotate through the pntindex and use only pnttype for ProbAMPANMDA_EMS
+        // (which happens to have a weight vector length of 5.)
+        // Edge case: if there is no such synapse, let the target_ be nullptr
+        //   and the netcon be inactive.
+        // Same pattern as algorithm for extracon netcon_srcgid above in phase1.
+        int extracon_target_type = nrn_get_mechtype("ProbAMPANMDA_EMS");
+        assert(extracon_target_type > 0);
+        extracon_target_nweight = corenrn.get_pnt_receive_size()[extracon_target_type];
+        int j = 0;
+        for (int i = 0; i < nrn_setup_extracon; ++i) {
+            bool extracon_find = false;
+            for (int k = 0; k < n_netcon; ++k) {
+                if (pnttype[j] == extracon_target_type) {
+                    extracon_find = true;
+                    break;
+                }
+                j = (j + 1) % n_netcon;
+            }
+            NetCon& nc = nt.netcons[i + n_netcon];
+            nc.active_ = extracon_find ? 1 : 0;
+            if (extracon_find) {
+                nc.target_ = nt.pntprocs + (pnt_offset[extracon_target_type] + pntindex[j]);
+            } else {
+                nc.target_ = nullptr;
+            }
+        }
+    }
+
+    {
+        nt.n_weight = weights.size();
+        int nweight = nt.n_weight;
+        // weights in netcons order in groups defined by Point_process target type.
+        nt.n_weight += nrn_setup_extracon * extracon_target_nweight;
+        nt.weights = (double*)ecalloc_align(nt.n_weight, sizeof(double));
+        std::copy(weights.begin(), weights.end(), nt.weights);
+
+        int iw = 0;
+        for (int i = 0; i < n_netcon; ++i) {
+            NetCon& nc = nt.netcons[i];
+            nc.u.weight_index_ = iw;
+            if (pnttype[i] != 0) {
+                iw += corenrn.get_pnt_receive_size()[pnttype[i]];
+            } else {
+                iw += 1;
+            }
+        }
+        assert(iw == nweight);
+
+#if CHKPNTDEBUG
+        ntc.delay = new double[n_netcon];
+        memcpy(ntc.delay, delay, n_netcon * sizeof(double));
+#endif
+        for (int i = 0; i < n_netcon; ++i) {
+            NetCon& nc = nt.netcons[i];
+            nc.delay_ = delay[i];
+        }
+
+        if (nrn_setup_extracon > 0) {
+            // simplistic. delay is 1 and weight is 0.001
+            for (int i = 0; i < nrn_setup_extracon; ++i) {
+                NetCon& nc = nt.netcons[n_netcon + i];
+                nc.delay_ = 1.0;
+                nc.u.weight_index_ = nweight + i * extracon_target_nweight;
+                nt.weights[nc.u.weight_index_] = 2.0;  // this value 2.0 is extracted from .dat files
+            }
+        }
+    }
+}
+
+void Phase2::get_info_from_bbcore(NrnThread& nt, const std::vector<Memb_func>& memb_func) {
+    // BBCOREPOINTER information
+#if CHKPNTDEBUG
+    ntc.nbcp = num_point_process;
+    ntc.bcpicnt = new int[num_point_process];
+    ntc.bcpdcnt = new int[num_point_process];
+    ntc.bcptype = new int[num_point_process];
+#endif
+    for (size_t i = 0; i < n_mech; ++i) {
+        int type = mech_types[i];
+        if (!corenrn.get_bbcore_read()[type]) {
+            continue;
+        }
+        type = tmls[i].type; // This is not an error, but it has to be fixed I think
+        if (!corenrn.get_bbcore_write()[type] && nrn_checkpoint_arg_exists) {
+            fprintf(
+                stderr,
+                "Checkpoint is requested involving BBCOREPOINTER but there is no bbcore_write function for %s\n",
+                memb_func[type].sym);
+            assert(corenrn.get_bbcore_write()[type]);
+        }
+#if CHKPNTDEBUG
+        ntc.bcptype[i] = type;
+        ntc.bcpicnt[i] = icnt;
+        ntc.bcpdcnt[i] = dcnt;
+#endif
+        int ik = 0;
+        int dk = 0;
+        Memb_list* ml = nt._ml_list[type];
+        int dsz = corenrn.get_prop_param_size()[type];
+        int pdsz = corenrn.get_prop_dparam_size()[type];
+        int cntml = ml->nodecount;
+        int layout = corenrn.get_mech_data_layout()[type];
+        for (int j = 0; j < cntml; ++j) {
+            int jp = j;
+            if (ml->_permute) {
+                jp = ml->_permute[j];
+            }
+            double* d = ml->data;
+            Datum* pd = ml->pdata;
+            d += nrn_i_layout(jp, cntml, 0, dsz, layout);
+            pd += nrn_i_layout(jp, cntml, 0, pdsz, layout);
+            int aln_cntml = nrn_soa_padded_size(cntml, layout);
+            (*corenrn.get_bbcore_read()[type])(tmls[i].dArray.data(), tmls[i].iArray.data(), &dk, &ik, 0, aln_cntml, d, pd, ml->_thread,
+                                      &nt, 0.0);
+        }
+        assert(dk == tmls[i].dArray.size());
+        assert(ik == tmls[i].iArray.size());
+    }
+}
+
+void Phase2::set_vec_play(NrnThread& nt) {
+    // VecPlayContinuous instances
+    // No attempt at memory efficiency
+    nt.n_vecplay = vec_play_continuous.size();
+    if (nt.n_vecplay) {
+        nt._vecplay = new void*[nt.n_vecplay];
+    } else {
+        nt._vecplay = nullptr;
+    }
+#if CHKPNTDEBUG
+    ntc.vecplay_ix = new int[nt.n_vecplay];
+    ntc.vtype = new int[nt.n_vecplay];
+    ntc.mtype = new int[nt.n_vecplay];
+#endif
+    for (int i = 0; i < nt.n_vecplay; ++i) {
+        auto& vecPlay = vec_play_continuous[i];
+        nrn_assert(vecPlay.vtype == VecPlayContinuousType);
+#if CHKPNTDEBUG
+        ntc.vtype[i] = vecPlay.vtype;
+#endif
+#if CHKPNTDEBUG
+        ntc.mtype[i] = vecPlay.mtype;
+#endif
+        Memb_list* ml = nt._ml_list[vecPlay.mtype];
+#if CHKPNTDEBUG
+        ntc.vecplay_ix[i] = vecPlay.ix;
+#endif
+
+        vecPlay.ix = nrn_param_layout(vecPlay.ix, vecPlay.mtype, ml);
+        if (ml->_permute) {
+            vecPlay.ix = nrn_index_permute(vecPlay.ix, vecPlay.mtype, ml);
+        }
+        nt._vecplay[i] = new VecPlayContinuous(ml->data + vecPlay.ix, std::move(vecPlay.yvec), std::move(vecPlay.tvec), nullptr, nt.id);
+    }
+}
+
+void Phase2::populate(NrnThread& nt, const UserParams& userParams) {
+    nrn_assert(userParams.imult[nt.id] >= 0);  // avoid imult unused warning
+    NrnThreadChkpnt& ntc = nrnthread_chkpnt[nt.id];
+    ntc.file_id = userParams.gidgroups[nt.id];
+
+    nt.ncell = n_real_output;
+    nt.end = n_node;
+
+#if CHKPNTDEBUG
+    ntc.n_outputgids = n_output;
+    ntc.nmech = n_mech;
+#endif
+
+    /// Checkpoint in coreneuron is defined for both phase 1 and phase 2 since they are written
+    /// together
+    nt._ml_list = (Memb_list**)ecalloc_align(corenrn.get_memb_funcs().size(), sizeof(Memb_list*));
+
+    auto& memb_func = corenrn.get_memb_funcs();
+#if CHKPNTDEBUG
+    ntc.mlmap = new Memb_list_chkpnt*[memb_func.size()];
+    for (int i = 0; i < _memb_func.size(); ++i) {
+        ntc.mlmap[i] = nullptr;
+    }
+#endif
+
+    nt.stream_id = 0;
+    nt.compute_gpu = 0;
+    auto& nrn_prop_param_size_ = corenrn.get_prop_param_size();
+    auto& nrn_prop_dparam_size_ = corenrn.get_prop_dparam_size();
+
+/* read_phase2 is being called from openmp region
+ * and hence we can set the stream equal to current thread id.
+ * In fact we could set gid as stream_id when we will have nrn threads
+ * greater than number of omp threads.
+ */
+#if defined(_OPENMP)
+    nt.stream_id = omp_get_thread_num();
+#endif
+
+    int shadow_rhs_cnt = 0;
+    nt.shadow_rhs_cnt = 0;
+
+    NrnThreadMembList* tml_last = nullptr;
+    for (int i = 0; i < n_mech; ++i) {
+        auto tml = create_tml(i, memb_func[mech_types[i]], shadow_rhs_cnt);
+
+        nt._ml_list[tml->index] = tml->ml;
+
+#if CHKPNTDEBUG
+        Memb_list_chkpnt* mlc = new Memb_list_chkpnt;
+        ntc.mlmap[tml->index] = mlc;
+#endif
+
+        if (nt.tml) {
+            tml_last->next = tml;
+        } else {
+            nt.tml = tml;
+        }
+        tml_last = tml;
+    }
+
+    if (shadow_rhs_cnt) {
+        nt._shadow_rhs =
+            (double*)ecalloc_align(nrn_soa_padded_size(shadow_rhs_cnt, 0), sizeof(double));
+        nt._shadow_d =
+            (double*)ecalloc_align(nrn_soa_padded_size(shadow_rhs_cnt, 0), sizeof(double));
+        nt.shadow_rhs_cnt = shadow_rhs_cnt;
+    }
+
+    nt.mapping = nullptr;  // section segment mapping
+
+    nt._nidata = n_idata;
+    if (nt._nidata)
+        nt._idata = (int*)ecalloc(nt._nidata, sizeof(int));
+    else
+        nt._idata = nullptr;
+    // see patternstim.cpp
+    int extra_nv = (&nt == nrn_threads) ? nrn_extra_thread0_vdata : 0;
+    nt._nvdata = n_vdata;
+    if (nt._nvdata + extra_nv)
+        nt._vdata = (void**)ecalloc_align(nt._nvdata + extra_nv, sizeof(void*));
+    else
+        nt._vdata = nullptr;
+
+    // The data format begins with the matrix data
+    int n_data_padded = nrn_soa_padded_size(nt.end, MATRIX_LAYOUT);
+    nt._data = _data;
+    nt._actual_rhs = nt._data + 0 * n_data_padded;
+    nt._actual_d = nt._data + 1 * n_data_padded;
+    nt._actual_a = nt._data + 2 * n_data_padded;
+    nt._actual_b = nt._data + 3 * n_data_padded;
+    nt._actual_v = nt._data + 4 * n_data_padded;
+    nt._actual_area = nt._data + 5 * n_data_padded;
+    nt._actual_diam = n_diam ? nt._data + 6 * n_data_padded : nullptr;
+
+    size_t offset = 6 * n_data_padded;
+    if (n_diam) {
+        // in the rare case that a mechanism has dparam with diam semantics
+        // then actual_diam array added after matrix in nt._data
+        // Generally wasteful since only a few diam are pointed to.
+        // Probably better to move the diam semantics to the p array of the mechanism
+        offset += n_data_padded;
+    }
+
+    // Memb_list.data points into the nt._data array.
+    // Also count the number of Point_process
+    int num_point_process = 0;
+    for (auto tml = nt.tml; tml; tml = tml->next) {
+        Memb_list* ml = tml->ml;
+        int type = tml->index;
+        int layout = corenrn.get_mech_data_layout()[type];
+        int n = ml->nodecount;
+        int sz = nrn_prop_param_size_[type];
+        offset = nrn_soa_byte_align(offset);
+        ml->data = nt._data + offset;
+        offset += nrn_soa_padded_size(n, layout) * sz;
+        if (corenrn.get_pnt_map()[type] > 0) {
+            num_point_process += n;
+        }
+    }
+    nt.pntprocs = (Point_process*)ecalloc_align(
+        num_point_process, sizeof(Point_process));  // includes acell with and without gid
+    nt.n_pntproc = num_point_process;
+    nt._ndata = offset;
+
+
+    // matrix info
+    nt._v_parent_index = v_parent_index;
+
+#if CHKPNTDEBUG
+    ntc.parent = new int[nt.end];
+    memcpy(ntc.parent, nt._v_parent_index, nt.end * sizeof(int));
+    ntc.area = new double[nt.end];
+    memcpy(ntc.area, nt._actual_area, nt.end * sizeof(double));
+#endif
+
+    int synoffset = 0;
+    std::vector<int> pnt_offset(memb_func.size());
+
+    // All the mechanism data and pdata.
+    // Also fill in the pnt_offset
+    // Complete spec of Point_process except for the acell presyn_ field.
+    int itml = 0;
+    for (auto tml = nt.tml; tml; tml = tml->next, ++itml) {
+        int type = tml->index;
+        Memb_list* ml = tml->ml;
+        int n = ml->nodecount;
+        int szp = nrn_prop_param_size_[type];
+        int szdp = nrn_prop_dparam_size_[type];
+        int layout = corenrn.get_mech_data_layout()[type];
+
+        ml->nodeindices = (int*)ecalloc_align(ml->nodecount, sizeof(int));
+        std::copy(tmls[itml].nodeindices.begin(), tmls[itml].nodeindices.end(), ml->nodeindices);
+
+        mech_data_layout_transform<double>(ml->data, n, szp, layout);
+
+        if (szdp) {
+            ml->pdata = (int*)ecalloc_align(nrn_soa_padded_size(n, layout) * szdp, sizeof(int));
+            std::copy(tmls[itml].pdata.begin(), tmls[itml].pdata.end(), ml->pdata);
+            mech_data_layout_transform<int>(ml->pdata, n, szdp, layout);
+
+#if CHKPNTDEBUG  // Not substantive. Only for debugging.
+            Memb_list_ckpnt* mlc = ntc.mlmap[type];
+            mlc->pdata_not_permuted = (int*)coreneuron::ecalloc_align(n * szdp, sizeof(int));
+            if (layout == Layout::AoS) {  // only copy
+                for (int i = 0; i < n; ++i) {
+                    for (int j = 0; j < szdp; ++j) {
+                        mlc->pdata_not_permuted[i * szdp + j] = ml->pdata[i * szdp + j];
+                    }
+                }
+            } else if (layout == Layout::SoA) { // transpose and unpad
+                int align_cnt = nrn_soa_padded_size(n, layout);
+                for (int i = 0; i < n; ++i) {
+                    for (int j = 0; j < szdp; ++j) {
+                        mlc->pdata_not_permuted[i * szdp + j] = ml->pdata[i + j * align_cnt];
+                    }
+                }
+            }
+#endif
+        } else {
+            ml->pdata = nullptr;
+        }
+        if (corenrn.get_pnt_map()[type] > 0) {  // POINT_PROCESS mechanism including acell
+            int cnt = ml->nodecount;
+            Point_process* pnt = nullptr;
+            pnt = nt.pntprocs + synoffset;
+            pnt_offset[type] = synoffset;
+            synoffset += cnt;
+            for (int i = 0; i < cnt; ++i) {
+                Point_process* pp = pnt + i;
+                pp->_type = type;
+                pp->_i_instance = i;
+                nt._vdata[ml->pdata[nrn_i_layout(i, cnt, 1, szdp, layout)]] = pp;
+                pp->_tid = nt.id;
+            }
+        }
+    }
+
+    if (nrn_have_gaps) {
+        nrn_partrans::gap_thread_setup(nt);
+    }
+
+    pdata_relocation(nt, memb_func);
+
+    /* if desired, apply the node permutation. This involves permuting
+       at least the node parameter arrays for a, b, and area (and diam) and all
+       integer vector values that index into nodes. This could have been done
+       when originally filling the arrays with AoS ordered data, but can also
+       be done now, after the SoA transformation. The latter has the advantage
+       that the present order is consistent with all the layout values. Note
+       that after this portion of the permutation, a number of other node index
+       vectors will be read and will need to be permuted as well in subsequent
+       sections of this function.
+    */
+    if (interleave_permute_type) {
+        nt._permute = interleave_order(nt.id, nt.ncell, nt.end, nt._v_parent_index);
+    }
+    if (nt._permute) {
+        int* p = nt._permute;
+        permute_data(nt._actual_a, nt.end, p);
+        permute_data(nt._actual_b, nt.end, p);
+        permute_data(nt._actual_area, nt.end, p);
+        permute_data(nt._actual_v, nt.end,
+                     p);  // need if restore or finitialize does not initialize voltage
+        if (nt._actual_diam) {
+            permute_data(nt._actual_diam, nt.end, p);
+        }
+        // index values change as well as ordering
+        permute_ptr(nt._v_parent_index, nt.end, p);
+        node_permute(nt._v_parent_index, nt.end, p);
+
+#if DEBUG
+        for (int i=0; i < nt.end; ++i) {
+            printf("parent[%d] = %d\n", i, nt._v_parent_index[i]);
+        }
+#endif
+
+        // specify the ml->_permute and sort the nodeindices
+        for (auto tml = nt.tml; tml; tml = tml->next) {
+            if (tml->ml->nodeindices) {  // not artificial
+                permute_nodeindices(tml->ml, p);
+                permute_ml(tml->ml, tml->index, nt);
+            }
+        }
+
+        // permute the Point_process._i_instance
+        for (int i = 0; i < nt.n_pntproc; ++i) {
+            Point_process& pp = nt.pntprocs[i];
+            Memb_list* ml = nt._ml_list[pp._type];
+            if (ml->_permute) {
+                pp._i_instance = ml->_permute[pp._i_instance];
+            }
+        }
+    }
+
+    if (nrn_have_gaps && interleave_permute_type) {
+        nrn_partrans::gap_indices_permute(nt);
+    }
+
+    set_dependencies(nt, memb_func);
+
+    fill_before_after_lists(nt, memb_func);
+
+    // for fast watch statement checking
+    // setup a list of types that have WATCH statement
+    {
+        int sz = 0;  // count the types with WATCH
+        for (auto tml = nt.tml; tml; tml = tml->next) {
+            if (corenrn.get_watch_check()[tml->index]) {
+                ++sz;
+            }
+        }
+        if (sz) {
+            nt._watch_types = (int*)ecalloc(sz + 1, sizeof(int));  // nullptr terminated
+            sz = 0;
+            for (auto tml = nt.tml; tml; tml = tml->next) {
+                if (corenrn.get_watch_check()[tml->index]) {
+                    nt._watch_types[sz++] = tml->index;
+                }
+            }
+        }
+    }
+    auto& pnttype2presyn = corenrn.get_pnttype2presyn();
+    auto& nrn_has_net_event_ = corenrn.get_has_net_event();
+    // from nrn_has_net_event create pnttype2presyn.
+    if (pnttype2presyn.empty()) {
+        pnttype2presyn.resize(memb_func.size(), -1);
+    }
+    for (size_t i = 0; i < nrn_has_net_event_.size(); ++i) {
+        pnttype2presyn[nrn_has_net_event_[i]] = i;
+    }
+    // create the nt.pnt2presyn_ix array of arrays.
+    nt.pnt2presyn_ix = (int**)ecalloc(nrn_has_net_event_.size(), sizeof(int*));
+    for (size_t i = 0; i < nrn_has_net_event_.size(); ++i) {
+        Memb_list* ml = nt._ml_list[nrn_has_net_event_[i]];
+        if (ml && ml->nodecount > 0) {
+            nt.pnt2presyn_ix[i] = (int*)ecalloc(ml->nodecount, sizeof(int));
+        }
+    }
+
+    // Real cells are at the beginning of the nt.presyns followed by
+    // acells (with and without gids mixed together)
+    // Here we associate the real cells with voltage pointers and
+    // acell PreSyn with the Point_process.
+    // nt.presyns order same as output_vindex order
+#if CHKPNTDEBUG
+    ntc.output_vindex = new int[nt.n_presyn];
+    memcpy(ntc.output_vindex, output_vindex, nt.n_presyn * sizeof(int));
+#endif
+    if (nt._permute) {
+        // only indices >= 0 (i.e. _actual_v indices) will be changed.
+        node_permute(output_vindex.data(), nt.n_presyn, nt._permute);
+    }
+#if CHKPNTDEBUG
+    ntc.output_threshold = new double[nt.ncell];
+    memcpy(ntc.output_threshold, output_threshold, nt.ncell * sizeof(double));
+#endif
+    for (int i = 0; i < nt.n_presyn; ++i) {  // real cells
+        PreSyn* ps = nt.presyns + i;
+
+        int ix = output_vindex[i];
+        if (ix == -1 && i < nt.ncell) {  // real cell without a presyn
+            continue;
+        }
+        if (ix < 0) {
+            ix = -ix;
+            int index = ix / 1000;
+            int type = ix % 1000;
+            Point_process* pnt = nt.pntprocs + (pnt_offset[type] + index);
+            ps->pntsrc_ = pnt;
+            // pnt->_presyn = ps;
+            int ip2ps = pnttype2presyn[pnt->_type];
+            if (ip2ps >= 0) {
+                nt.pnt2presyn_ix[ip2ps][pnt->_i_instance] = i;
+            }
+            if (ps->gid_ < 0) {
+                ps->gid_ = -1;
+            }
+        } else {
+            assert(ps->gid_ > -1);
+            ps->thvar_index_ = ix;  // index into _actual_v
+            assert(ix < nt.end);
+            ps->threshold_ = output_threshold[i];
+        }
+    }
+
+    // initial net_send_buffer size about 1% of number of presyns
+    // nt._net_send_buffer_size = nt.ncell/100 + 1;
+    // but, to avoid reallocation complexity on GPU ...
+    nt._net_send_buffer_size = nt.ncell;
+    nt._net_send_buffer = (int*)ecalloc_align(nt._net_send_buffer_size, sizeof(int));
+
+    // do extracon later as the target and weight info
+    // is not directly in the file
+    int nnetcon = nt.n_netcon - nrn_setup_extracon;
+
+    // it may happen that Point_process structures will be made unnecessary
+    // by factoring into NetCon.
+
+#if CHKPNTDEBUG
+    ntc.pnttype = new int[nnetcon];
+    ntc.pntindex = new int[nnetcon];
+    memcpy(ntc.pnttype, pnttype, nnetcon * sizeof(int));
+    memcpy(ntc.pntindex, pntindex, nnetcon * sizeof(int));
+#endif
+    for (int i = 0; i < nnetcon; ++i) {
+        int type = pnttype[i];
+        if (type > 0) {
+            int index = pnt_offset[type] + pntindex[i];  /// Potentially uninitialized pnt_offset[],
+                                                         /// check for previous assignments
+            NetCon& nc = nt.netcons[i];
+            nc.target_ = nt.pntprocs + index;
+            nc.active_ = true;
+        }
+    }
+
+    handle_extracon(nt, nnetcon, pnt_offset);
+
+    get_info_from_bbcore(nt, memb_func);
+
+    set_vec_play(nt);
+
+    if (!events.empty()) {
+        checkpoint_restore_tqueue(nt, *this);
+    }
+
+    set_net_send_buffer(nt._ml_list, pnt_offset);
+}
+}  // namespace coreneuron
+
diff --git a/coreneuron/io/phase2.hpp b/coreneuron/io/phase2.hpp
new file mode 100644
index 000000000..e299d4995
--- /dev/null
+++ b/coreneuron/io/phase2.hpp
@@ -0,0 +1,111 @@
+#pragma once
+
+#include "coreneuron/io/nrn_filehandler.hpp"
+#include "coreneuron/io/user_params.hpp"
+#include "coreneuron/utils/ivocvect.hpp"
+
+#include <memory>
+
+namespace coreneuron {
+struct NrnThread;
+struct NrnThreadMembList;
+struct Memb_func;
+struct Memb_list;
+
+class Phase2 {
+    public:
+    void read_file(FileHandler& F, const NrnThread& nt);
+    void read_direct(int thread_id, const NrnThread& nt);
+    void populate(NrnThread& nt, const UserParams& userParams);
+
+    std::vector<int> preSynConditionEventFlags;
+
+    // All of this is public for nrn_checkpoint
+    struct EventTypeBase {
+        double time;
+    };
+    struct NetConType_: public EventTypeBase {
+        int netcon_index;
+    };
+    struct SelfEventType_: public EventTypeBase {
+        int target_type;
+        int point_proc_instance;
+        int target_instance;
+        double flag;
+        int movable;
+        int weight_index;
+    };
+    struct PreSynType_: public EventTypeBase {
+        int presyn_index;
+    };
+    struct NetParEvent_: public EventTypeBase {
+    };
+    struct PlayRecordEventType_: public EventTypeBase {
+        int play_record_type;
+        int vecplay_index;
+    };
+
+    struct VecPlayContinuous_ {
+        int vtype;
+        int mtype;
+        int ix;
+        IvocVect yvec;
+        IvocVect tvec;
+
+        int last_index;
+        int discon_index;
+        int ubound_index;
+    };
+    std::vector<VecPlayContinuous_> vec_play_continuous;
+    int patstim_index;
+
+    std::vector<std::pair<int, std::shared_ptr<EventTypeBase>>> events;
+
+    private:
+    void check_mechanism();
+    NrnThreadMembList* create_tml(int mech_id, Memb_func& memb_func, int& shadow_rhs_cnt);
+    void transform_int_data(int elem0, int nodecount, int* pdata, int i, int dparam_size, int layout, int n_node_);
+    void set_net_send_buffer(Memb_list** ml_list, const std::vector<int>& pnt_offset);
+    void restore_events(FileHandler& F);
+    void fill_before_after_lists(NrnThread& nt, const std::vector<Memb_func>& memb_func);
+    void pdata_relocation(const NrnThread& nt, const std::vector<Memb_func>& memb_func);
+    void set_dependencies(const NrnThread& nt, const std::vector<Memb_func>& memb_func);
+    void handle_extracon(NrnThread& nt, int n_netcon, const std::vector<int>& pnt_offset);
+    void get_info_from_bbcore(NrnThread& nt, const std::vector<Memb_func>& memb_func);
+    void set_vec_play(NrnThread& nt);
+
+    int n_output;
+    int n_real_output;
+    int n_node;
+    int n_diam;  // 0 if not needed, else n_node
+    int n_mech;
+    std::vector<int> mech_types;
+    std::vector<int> nodecounts;
+    int n_idata;
+    int n_vdata;
+    int* v_parent_index;
+    /* TO DO: when this is fixed use it like that
+    std::vector<double> actual_a;
+    std::vector<double> actual_b;
+    std::vector<double> actual_area;
+    std::vector<double> actual_v;
+    std::vector<double> actual_diam;
+    */
+    double* _data;
+    struct TML {
+        std::vector<int> nodeindices;
+        std::vector<int> pdata;
+        int type;
+        std::vector<int> iArray;
+        std::vector<double> dArray;
+    };
+    std::vector<TML> tmls;
+    std::vector<int> output_vindex;
+    std::vector<double> output_threshold;
+    std::vector<int> pnttype;
+    std::vector<int> pntindex;
+    std::vector<double> weights;
+    std::vector<double> delay;
+    int num_point_process;
+};
+}  // namespace coreneuron
diff --git a/coreneuron/io/user_params.hpp b/coreneuron/io/user_params.hpp
new file mode 100644
index 000000000..f8d938fb4
--- /dev/null
+++ b/coreneuron/io/user_params.hpp
@@ -0,0 +1,33 @@
+#pragma once
+
+namespace coreneuron {
+
+/// This structure is data needed is several part of nrn_setup, phase1 and phase2.
+/// Before it was globals variables, group them to give them as a single argument.
+/// They have for the most part, nothing related to each other.
+struct UserParams {
+    UserParams(int ngroup_, int* gidgroups_, int* imult_,
+        const char* path_, const char* restore_path_)
+    : ngroup(ngroup_)
+    , gidgroups(gidgroups_)
+    , imult(imult_)
+    , path(path_)
+    , restore_path(restore_path_)
+    , file_reader(ngroup_)
+    {}
+
+    /// direct memory mode with neuron, do not open files
+    /// Number of local cell groups
+    const int ngroup;
+    /// Array of cell group numbers (indices)
+    const int* const gidgroups;
+    /// Array of duplicate indices. Normally, with nrn_setup_multiple=1,
+    //   they are ngroup values of 0.
+    const int* const imult;
+    /// path to dataset file
+    const char* const path;
+    /// Dataset path from where simulation is being restored
+    const char* const restore_path;
+    std::vector<FileHandler> file_reader;
+};
+}  // namespace coreneuron
diff --git a/coreneuron/nrniv/nrniv_decl.h b/coreneuron/nrniv/nrniv_decl.h
index a825c5b1a..3f49d03ee 100644
--- a/coreneuron/nrniv/nrniv_decl.h
+++ b/coreneuron/nrniv/nrniv_decl.h
@@ -32,7 +32,6 @@ THE POSSIBILITY OF SUCH DAMAGE.
 #include <vector>
 #include <map>
 #include "coreneuron/network/netcon.hpp"
-#include "coreneuron/utils/endianness.hpp"
 namespace coreneuron {
 
 /// Mechanism type to be used from stdindex2ptr and nrn_dblpntr2nrncore (in Neuron)
@@ -58,7 +57,7 @@ extern void mk_netcvode(void);
 extern void nrn_p_construct(void);
 extern void nrn_setup(const char* filesdat,
                       bool is_mapping_needed,
-                      bool byte_swap,
+                      bool byte_swap, // We keep it for backward API compatibility but it is not used
                       bool run_setup_cleanup = true,
                       const char* datapath = "",
                       const char* restore_path = "",
diff --git a/coreneuron/utils/endianness.hpp b/coreneuron/utils/endianness.hpp
deleted file mode 100644
index 34c094ce1..000000000
--- a/coreneuron/utils/endianness.hpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
-Copyright (c) 2016, Blue Brain Project
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-1. Redistributions of source code must retain the above copyright notice,
-   this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-3. Neither the name of the copyright holder nor the names of its contributors
-   may be used to endorse or promote products derived from this software
-   without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef endianness_h
-#define endianness_h
-
-#include <cstdint>
-
-namespace endian {
-
-    enum endianness { little_endian, big_endian, mixed_endian };
-
-    static const union {
-        unsigned char bytes[4];
-        uint32_t value;
-    } endian_check = {{0, 1, 2, 3}};
-
-    static const enum endianness native_endian =
-        endian_check.value == 0x03020100ul
-            ? little_endian
-            : endian_check.value == 0x00010203ul ? big_endian : mixed_endian;
-
-    static inline bool is_little_endian() {
-        return native_endian == little_endian;
-    }
-    static inline bool is_big_endian() {
-        return native_endian == big_endian;
-    }
-
-}  // namespace endian
-
-#endif  // ifndef endianness_h
diff --git a/coreneuron/utils/ivocvect.hpp b/coreneuron/utils/ivocvect.hpp
index 146f44929..1b39ecc74 100644
--- a/coreneuron/utils/ivocvect.hpp
+++ b/coreneuron/utils/ivocvect.hpp
@@ -30,19 +30,37 @@ THE POSSIBILITY OF SUCH DAMAGE.
 #define ivoc_vector_h
 
 #include <cstdio>
+#include <utility>
 #include "coreneuron/utils/nrnmutdec.h"
 
 namespace coreneuron {
 template <typename T>
 class fixed_vector {
     size_t n_;
-    MUTDEC
 
   public:
     T* data_; /*making public for openacc copying */
+
+    fixed_vector() = default;
+
     fixed_vector(size_t n) : n_(n) {
         data_ = new T[n_];
     }
+
+    fixed_vector(const fixed_vector& vec) = delete;
+    fixed_vector& operator=(const fixed_vector& vec) = delete;
+    fixed_vector(fixed_vector&& vec)
+        : data_(nullptr), n_(vec.n_)
+    {
+        std::swap(data_, vec.data_);
+    }
+    fixed_vector& operator=(fixed_vector&& vec) {
+        data_ = nullptr;
+        std::swap(data_, vec.data_);
+        n_ = vec.n_;
+        return *this;
+    }
+
     ~fixed_vector() {
         delete[] data_;
     }
@@ -64,22 +82,6 @@ class fixed_vector {
     size_t size() const {
         return n_;
     }
-
-#if defined(_OPENMP)
-    void mutconstruct(int mkmut) {
-        if (!mut_)
-            MUTCONSTRUCT(mkmut)
-    }
-#else
-    void mutconstruct(int) {
-    }
-#endif
-    void lock() {
-        MUTLOCK
-    }
-    void unlock() {
-        MUTUNLOCK
-    }
 };
 
 typedef fixed_vector<double> IvocVect;
diff --git a/coreneuron/utils/nrnmutdec.h b/coreneuron/utils/nrnmutdec.h
index 98749d0f1..a6da91175 100644
--- a/coreneuron/utils/nrnmutdec.h
+++ b/coreneuron/utils/nrnmutdec.h
@@ -37,6 +37,46 @@ THE POSSIBILITY OF SUCH DAMAGE.
 #define MUTDEC omp_lock_t* mut_;
 #define MUTCONSTRUCTED (mut_ != (omp_lock_t*)0)
 #if defined(__cplusplus)
+
+// This class respects the requirement *Mutex*
+class OMP_Mutex {
+    public:
+        // Default constructible
+        OMP_Mutex() {
+            omp_init_lock(&mut_);
+        }
+
+        // Destructible
+        ~OMP_Mutex() {
+            omp_destroy_lock(&mut_);
+        }
+
+        // Not copyable
+        OMP_Mutex(const OMP_Mutex&) = delete;
+        OMP_Mutex& operator=(const OMP_Mutex&) = delete;
+
+        // Not movable
+        OMP_Mutex(const OMP_Mutex&&) = delete;
+        OMP_Mutex& operator=(const OMP_Mutex&&) = delete;
+
+        // Basic Lockable
+        void lock() {
+            omp_set_lock(&mut_);
+        }
+
+        void unlock() {
+            omp_unset_lock(&mut_);
+        }
+
+        // Lockable
+        bool try_lock() {
+            return omp_test_lock(&mut_) != 0;
+        }
+
+    private:
+        omp_lock_t mut_;
+};
+
 #define MUTCONSTRUCT(mkmut)        \
     {                              \
         if (mkmut) {               \
diff --git a/coreneuron/utils/swap_endian.h b/coreneuron/utils/swap_endian.h
deleted file mode 100644
index 49339e9ea..000000000
--- a/coreneuron/utils/swap_endian.h
+++ /dev/null
@@ -1,461 +0,0 @@
-/*
-Copyright (c) 2016, Blue Brain Project
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-1. Redistributions of source code must retain the above copyright notice,
-   this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-3. Neither the name of the copyright holder nor the names of its contributors
-   may be used to endorse or promote products derived from this software
-   without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef swap_endian_h
-#define swap_endian_h
-
-#include <algorithm>
-#include <iterator>
-#include <cstring>
-#include <cstddef>
-#include <cstdint>
-#include <cstdlib>
-
-#ifdef SWAP_ENDIAN_ASSERT
-#include <cassert>
-#endif
-
-#if !defined(SWAP_ENDIAN_DISABLE_ASM) && (defined(__AVX2__) || defined(__SSSE3__))
-#include <immintrin.h>
-#endif
-
-/* required for correct choice of PPC assembly */
-#include "coreneuron/utils/endianness.hpp"
-
-#if !defined(SWAP_ENDIAN_MAX_UNROLL)
-#define SWAP_ENDIAN_MAX_UNROLL 8
-#endif
-
-#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
-#define SWAP_ENDIAN_BROKEN_MEMCPY
-#endif
-
-#ifdef SWAP_ENDIAN_BROKEN_MEMCPY
-#define memcpy(d, s, n) ::endian::impl::safe_memcpy((d), (s), (n))
-namespace endian {
-    namespace impl {
-        static inline void* safe_memcpy(void* d, void* s, size_t n) {
-            char* d_ = (char*)d;
-            char* s_ = (char*)s;
-            while (n-- > 0)
-                *d_++ = *s_++;
-            return d;
-        }
-    }  // namespace impl
-}  // namespace endian
-#endif
-
-namespace endian {
-
-    namespace impl {
-        template <typename I>
-        struct is_pointer {
-            enum { value = false };
-        };
-
-        template <typename V>
-        struct is_pointer<V*> {
-            enum { value = true };
-        };
-
-        struct not_implemented {
-            static void eval(...) {
-                abort();
-            }
-        };
-
-        /** Check to ensure a class is not derived from not_implemented */
-        template <typename C>
-        struct is_implemented {
-            typedef char no;
-            typedef double yes;
-            static no check(not_implemented*);
-            static yes check(...);
-            enum { value = sizeof(check((C*)0)) == sizeof(yes) };
-        };
-
-        template <size_t K, size_t Unroll, bool Aligned>
-        struct swap_endian_basic : not_implemented {};
-
-        template <size_t K, bool Aligned>
-        struct swap_endian_basic<K, 1, Aligned> {
-            static void eval(unsigned char* d) {
-                std::reverse(d, d + K);
-            }
-        };
-
-        template <size_t K, size_t Unroll, bool Aligned>
-        struct swap_endian_fast : not_implemented {};
-
-        /** Reverse bytes within values of fixed size in buffer
-         *
-         * Basic overload does one item at a time, using std::reverse.
-         * Specialisations provide faster implementations for particular
-         * item sizes and unrolls.
-         *
-         * If a _fast version is implemented (e.g., architecture specific)
-         * use that in preference.
-         *
-         * If Align is true, we can assume that d is aligned to a multiple
-         * of K*Unroll bytes.
-         */
-        template <size_t K, size_t Unroll, bool Aligned = false>
-        struct swap_endian {
-            static void eval(unsigned char* d) {
-#ifdef SWAP_ENDIAN_ASSERT
-                assert(!Aligned || (uintptr_t)d % (K * Unroll) == 0);
-#endif
-                if (is_implemented<swap_endian_fast<K, Unroll, Aligned> >::value)
-                    swap_endian_fast<K, Unroll, Aligned>::eval(d);
-                else if (is_implemented<swap_endian_basic<K, Unroll, Aligned> >::value)
-                    swap_endian_basic<K, Unroll, Aligned>::eval(d);
-                else if (Unroll % 2 == 0 || !Aligned) {
-                    swap_endian<K, Unroll / 2, Aligned>::eval(d);
-                    swap_endian<K, Unroll / 2, Aligned>::eval(d + K * (Unroll / 2));
-                    if (Unroll % 2)
-                        swap_endian<K, 1, Aligned>::eval(d + K * (Unroll - 1));
-                } else {
-                    // Unroll is odd: can't make guarantees that we're aligned wrt
-                    // (Unroll/2).
-                    swap_endian<K, Unroll / 2, false>::eval(d);
-                    swap_endian<K, Unroll / 2, false>::eval(d + K * (Unroll / 2));
-                    swap_endian<K, 1, Aligned>::eval(d + K * (Unroll - 1));
-                }
-            }
-        };
-
-        // This specialization is required ONLY to convince gcc 4.4.7
-        // that it will never divide by zero statically.
-        template <size_t K, bool Aligned>
-        struct swap_endian<K, 0, Aligned> {
-            static void eval(unsigned char*) {
-            }
-        };
-
-        template <size_t Unroll, bool Aligned>
-        struct swap_endian<1, Unroll, Aligned> {
-            static void eval(unsigned char*) {
-            }
-        };
-
-        // specialise swap_endian_basic for integer data sizes
-        template <bool Aligned>
-        struct swap_endian_basic<2, 1, Aligned> {
-            static void eval(unsigned char* d) {
-                uint16_t v;
-                memcpy(&v, d, 2);
-                v = (uint16_t)((v >> 8u) | (v << 8u));
-                memcpy(d, &v, 2);
-            }
-        };
-
-        template <bool Aligned>
-        struct swap_endian_basic<4, 1, Aligned> {
-            static void eval(unsigned char* d) {
-                uint32_t v;
-                memcpy(&v, d, 4);
-                v = (v >> 24) | ((v >> 8) & 0x0000ff00ul) | ((v << 8) & 0x00ff0000ul) | (v << 24);
-                memcpy(d, &v, 4);
-            }
-        };
-
-        template <bool Aligned>
-        struct swap_endian_basic<8, 1, Aligned> {
-            static void eval(unsigned char* d) {
-                uint64_t v;
-                memcpy(&v, d, 8);
-                v = (v >> 56) | ((v << 40) & 0x00FF000000000000ull) |
-                    ((v << 24) & 0x0000FF0000000000ull) | ((v << 8) & 0x000000FF00000000ull) |
-                    ((v >> 8) & 0x00000000FF000000ull) | ((v >> 24) & 0x0000000000FF0000ull) |
-                    ((v >> 40) & 0x000000000000FF00ull) | (v << 56);
-                memcpy(d, &v, 8);
-            }
-        };
-
-#if !defined(SWAP_ENDIAN_DISABLE_ASM) && defined(__PPC64__)
-        /* generic methods very slow on bgq */
-
-        template <bool Aligned>
-        struct swap_endian_fast<4, 1, Aligned> {
-            static void eval(unsigned char* d) {
-                struct chunk_t {
-                    unsigned char x[4];
-                }& u = *(chunk_t*)(void*)d;
-
-                uint32_t v;
-                asm("lwz    %[v],%[ldata]         \n\t"
-                    "stwbrx %[v],0,%[sdata]       \n\t"
-                    : [v] "=&r"(v), "+o"(u)
-                    : [ldata] "o"(u), [sdata] "r"(d)
-                    :);
-            }
-        };
-
-        template <>
-        struct swap_endian_fast<4, 2, true> {
-            static void eval(unsigned char* d) {
-                struct chunk_t {
-                    unsigned char x[8];
-                }& u = *(chunk_t*)(void*)d;
-
-                uint64_t v;
-                if (endian::is_big_endian()) {
-                    asm("ld     %[v],%[ldata]         \n\t"
-                        "stwbrx %[v],%[word],%[sdata] \n\t"
-                        "srd    %[v],%[v],%[shift]    \n\t"
-                        "stwbrx %[v],0,%[sdata]       \n\t"
-                        : [v] "=&r"(v), "+o"(u)
-                        : [ldata] "o"(u), [sdata] "r"(d), [word] "b"(4), [shift] "b"(32)
-                        :);
-                } else {
-                    asm("ld     %[v],%[ldata]         \n\t"
-                        "stwbrx %[v],0,%[sdata]       \n\t"
-                        "srd    %[v],%[v],%[shift]    \n\t"
-                        "stwbrx %[v],%[word],%[sdata] \n\t"
-                        : [v] "=&r"(v), "+o"(u)
-                        : [ldata] "o"(u), [sdata] "r"(d), [word] "b"(4), [shift] "b"(32)
-                        :);
-                }
-            }
-        };
-
-#if !defined(__POWER8_VECTOR__)
-        // 2.06 ISA does not have stdbrx
-        template <>
-        struct swap_endian_fast<8, 1, true> {
-            static void eval(unsigned char* d) {
-                struct chunk_t {
-                    unsigned char x[8];
-                }& u = *(chunk_t*)(void*)d;
-
-                uint64_t v;
-                if (endian::is_big_endian()) {
-                    asm("ld     %[v],%[ldata]         \n\t"
-                        "stwbrx %[v],0,%[sdata]       \n\t"
-                        "srd    %[v],%[v],%[shift]    \n\t"
-                        "stwbrx %[v],%[word],%[sdata] \n\t"
-                        : [v] "=&r"(v), "+o"(u)
-                        : [ldata] "o"(u), [sdata] "r"(d), [word] "b"(4), [shift] "b"(32)
-                        :);
-                } else {
-                    asm("ld     %[v],%[ldata]         \n\t"
-                        "stwbrx %[v],%[word],%[sdata] \n\t"
-                        "srd    %[v],%[v],%[shift]    \n\t"
-                        "stwbrx %[v],0,%[sdata]       \n\t"
-                        : [v] "=&r"(v), "+o"(u)
-                        : [ldata] "o"(u), [sdata] "r"(d), [word] "b"(4), [shift] "b"(32)
-                        :);
-                }
-            }
-        };
-#else
-        template <>
-        struct swap_endian_fast<8, 1, true> {
-            static void eval(unsigned char* d) {
-                struct chunk_t {
-                    unsigned char x[8];
-                }& u = *(chunk_t*)(void*)d;
-
-                uint64_t v;
-                asm("ld     %[v],%[ldata]         \n\t"
-                    "stdbrx %[v],0,%[sdata]       \n\t"
-                    : [v] "=&r"(v), "+o"(u)
-                    : [ldata] "o"(u), [sdata] "r"(d)
-                    :);
-            }
-        };
-
-#endif
-#endif
-
-#if !defined(SWAP_ENDIAN_DISABLE_ASM) && defined(__SSSE3__)
-        template <>
-        struct swap_endian_fast<2, 8, true> {
-            static void eval(unsigned char* d) {
-                __m128i permute =
-                    _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-                __m128i x = _mm_load_si128((__m128i*)d);
-                x = _mm_shuffle_epi8(x, permute);
-                _mm_store_si128((__m128i*)d, x);
-            }
-        };
-
-        template <>
-        struct swap_endian_fast<4, 4, true> {
-            static void eval(unsigned char* d) {
-                __m128i permute =
-                    _mm_setr_epi8(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
-                __m128i x = _mm_load_si128((__m128i*)d);
-                x = _mm_shuffle_epi8(x, permute);
-                _mm_store_si128((__m128i*)d, x);
-            }
-        };
-
-        template <>
-        struct swap_endian_fast<8, 2, true> {
-            static void eval(unsigned char* d) {
-                __m128i permute =
-                    _mm_setr_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
-                __m128i x = _mm_load_si128((__m128i*)d);
-                x = _mm_shuffle_epi8(x, permute);
-                _mm_store_si128((__m128i*)d, x);
-            }
-        };
-#endif
-
-#if !defined(SWAP_ENDIAN_DISABLE_ASM) && defined(__AVX2__)
-        // Modern implementations suffer little or no penalty from unaligned load.
-        template <bool Aligned>
-        struct swap_endian_fast<4, 8, Aligned> {
-            static void eval(unsigned char* d) {
-                __m256i permute =
-                    _mm256_setr_epi8(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12, 19, 18,
-                                     17, 16, 23, 22, 21, 20, 27, 26, 25, 24, 31, 30, 29, 28);
-                __m256i x = _mm256_loadu_si256((__m256i*)d);
-                x = _mm256_shuffle_epi8(x, permute);
-                _mm256_storeu_si256((__m256i*)d, x);
-            }
-        };
-
-        template <bool Aligned>
-        struct swap_endian_fast<8, 4, Aligned> {
-            static void eval(unsigned char* d) {
-                __m256i permute =
-                    _mm256_setr_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, 23, 22,
-                                     21, 20, 19, 18, 17, 16, 31, 30, 29, 28, 27, 26, 25, 24);
-                __m256i x = _mm256_loadu_si256((__m256i*)d);
-                x = _mm256_shuffle_epi8(x, permute);
-                _mm256_storeu_si256((__m256i*)d, x);
-            }
-        };
-
-#endif
-
-        template <typename V>
-        void swap_endian_unroll(V* b, V* e) {
-            static const size_t n_unroll = SWAP_ENDIAN_MAX_UNROLL;
-            if (e <= b)
-                return;
-
-            size_t n = e - b;
-            bool aligned_vsize = ((uintptr_t)b % sizeof(V) == 0);
-
-            if (n >= n_unroll) {
-                if (!aligned_vsize) {
-                    /* No guarantees on alignment for swap_endian: elements
-                       are not aligned to multiple of the element size.
-                       This can happen with doubles on some 32-bit ABIs. */
-
-                    while (n >= n_unroll) {
-                        swap_endian<sizeof(V), n_unroll, false>::eval((unsigned char*)b);
-                        b += n_unroll;
-                        n -= n_unroll;
-                    }
-                } else {
-                    /* process elements singly until we get to a n_unroll*sizeof(V)
-                       boundary, and then do an alignment-guaranteed swap_endian. */
-
-                    size_t off_align_count =
-                        (size_t)((uintptr_t)b % (sizeof(V) * n_unroll)) / sizeof(V);
-                    if (off_align_count > 0) {
-                        while (off_align_count++ < n_unroll) {
-                            swap_endian<sizeof(V), 1, true>::eval((unsigned char*)b++);
-                            --n;
-                        }
-                    }
-
-/* b should now be n_unroll*sizeof(V) aligned. */
-#ifdef SWAP_ENDIAN_ASSERT
-                    assert((uintptr_t)b % (sizeof(V) * n_unroll) == 0);
-#endif
-                    while (n >= n_unroll) {
-                        swap_endian<sizeof(V), n_unroll, true>::eval((unsigned char*)b);
-                        b += n_unroll;
-                        n -= n_unroll;
-                    }
-                }
-            }
-
-            if (aligned_vsize) {
-                while (n-- > 0)
-                    swap_endian<sizeof(V), 1, true>::eval((unsigned char*)b++);
-            } else {
-                while (n-- > 0)
-                    swap_endian<sizeof(V), 1, false>::eval((unsigned char*)b++);
-            }
-        }
-    }  // namespace impl
-
-    /** Reverse the endianness of a value in-place.
-     *
-     * /tparam T value type
-     * /param v value to byte-reorder
-     */
-    template <typename T>
-    T& swap_endian(T& v) {
-        impl::swap_endian<sizeof(T), 1>::eval((unsigned char*)&v);
-        return v;
-    }
-
-    namespace impl {
-        template <typename I, bool unroll>
-        struct swap_endian_range_dispatch {
-            static void eval(I b, I e) {
-                while (b != e)
-                    ::endian::swap_endian(*b++);
-            }
-        };
-
-        template <typename I>
-        struct swap_endian_range_dispatch<I, true> {
-            static void eval(I b, I e) {
-                swap_endian_unroll(b, e);
-            }
-        };
-    }  // namespace impl
-
-    /** Reverse the endianness of the values in the given range.
-     *
-     * /tparam I iterator type
-     * /param b iterator representing the beginning of the range.
-     * /param e iterator representing the end of the range.
-     *
-     * All values in the iterator range [b,e) are byte-reversed.
-     */
-    template <typename I>
-    void swap_endian_range(I b, I e) {
-        impl::swap_endian_range_dispatch<I, impl::is_pointer<I>::value>::eval(b, e);
-    }
-
-}  // namespace endian
-
-#ifdef SWAP_ENDIAN_BROKEN_MEMCPY
-#undef memcpy
-#endif
-
-#endif  // ifndef swap_endian_h
diff --git a/coreneuron/utils/vrecitem.h b/coreneuron/utils/vrecitem.h
index 2f0f3b905..752b3e7ef 100644
--- a/coreneuron/utils/vrecitem.h
+++ b/coreneuron/utils/vrecitem.h
@@ -78,9 +78,8 @@ class PlayRecord {
 
 class VecPlayContinuous : public PlayRecord {
   public:
-    VecPlayContinuous(double*, IvocVect* yvec, IvocVect* tvec, IvocVect* discon, int ith);
+    VecPlayContinuous(double*, IvocVect&& yvec, IvocVect&& tvec, IvocVect* discon, int ith);
     virtual ~VecPlayContinuous();
-    void init(IvocVect* yvec, IvocVect* tvec, IvocVect* tdiscon);
     virtual void play_init();
     virtual void deliver(double tt, NetCvode*);
     virtual PlayRecordEvent* event() {
@@ -99,8 +98,8 @@ class VecPlayContinuous : public PlayRecord {
         return VecPlayContinuousType;
     }
 
-    IvocVect* y_;
-    IvocVect* t_;
+    IvocVect y_;
+    IvocVect t_;
     IvocVect* discon_indices_;
     size_t last_index_;
     size_t discon_index_;
diff --git a/coreneuron/utils/vrecord.cpp b/coreneuron/utils/vrecord.cpp
index 7c172933b..86a8e470e 100644
--- a/coreneuron/utils/vrecord.cpp
+++ b/coreneuron/utils/vrecord.cpp
@@ -69,28 +69,18 @@ void PlayRecord::pr() {
 }
 
 VecPlayContinuous::VecPlayContinuous(double* pd,
-                                     IvocVect* yvec,
-                                     IvocVect* tvec,
+                                     IvocVect&& yvec,
+                                     IvocVect&& tvec,
                                      IvocVect* discon,
                                      int ith)
-    : PlayRecord(pd, ith) {
-    // printf("VecPlayContinuous\n");
-    init(yvec, tvec, discon);
-}
-
-void VecPlayContinuous::init(IvocVect* yvec, IvocVect* tvec, IvocVect* discon) {
-    y_ = yvec;
-    t_ = tvec;
-    discon_indices_ = discon;
-    ubound_index_ = 0;
-    last_index_ = 0;
-    discon_index_ = 0;
-    e_ = new PlayRecordEvent();
+    : PlayRecord(pd, ith)
+    , y_(std::move(yvec)), t_(std::move(tvec)), discon_indices_(discon)
+    , last_index_(0), discon_index_(0), ubound_index_(0)
+    , e_(new PlayRecordEvent{}) {
     e_->plr_ = this;
 }
 
 VecPlayContinuous::~VecPlayContinuous() {
-    // printf("~VecPlayContinuous\n");
     delete e_;
 }
 
@@ -102,13 +92,13 @@ void VecPlayContinuous::play_init() {
         if (discon_indices_->size() > 0) {
             ubound_index_ = (int)(*discon_indices_)[discon_index_++];
             // printf("play_init %d %g\n", ubound_index_, t_->elem(ubound_index_));
-            e_->send((*t_)[ubound_index_], net_cvode_instance, nt);
+            e_->send(t_[ubound_index_], net_cvode_instance, nt);
         } else {
-            ubound_index_ = t_->size() - 1;
+            ubound_index_ = t_.size() - 1;
         }
     } else {
         ubound_index_ = 0;
-        e_->send((*t_)[ubound_index_], net_cvode_instance, nt);
+        e_->send(t_[ubound_index_], net_cvode_instance, nt);
     }
 }
 
@@ -123,14 +113,14 @@ void VecPlayContinuous::deliver(double tt, NetCvode* ns) {
         if (discon_index_ < discon_indices_->size()) {
             ubound_index_ = (int)(*discon_indices_)[discon_index_++];
             // printf("after deliver:send %d %g\n", ubound_index_, t_->elem(ubound_index_));
-            e_->send((*t_)[ubound_index_], ns, nt);
+            e_->send(t_[ubound_index_], ns, nt);
         } else {
-            ubound_index_ = t_->size() - 1;
+            ubound_index_ = t_.size() - 1;
         }
     } else {
-        if (ubound_index_ < t_->size() - 1) {
+        if (ubound_index_ < t_.size() - 1) {
             ubound_index_++;
-            e_->send((*t_)[ubound_index_], ns, nt);
+            e_->send(t_[ubound_index_], ns, nt);
         }
     }
 // clang-format off
@@ -150,24 +140,24 @@ void VecPlayContinuous::continuous(double tt) {
 }
 
 double VecPlayContinuous::interpolate(double tt) {
-    if (tt >= (*t_)[ubound_index_]) {
+    if (tt >= t_[ubound_index_]) {
         last_index_ = ubound_index_;
         if (last_index_ == 0) {
             // printf("return last tt=%g ubound=%g y=%g\n", tt, t_->elem(ubound_index_),
             // y_->elem(last_index_));
-            return (*y_)[last_index_];
+            return y_[last_index_];
         }
-    } else if (tt <= (*t_)[0]) {
+    } else if (tt <= t_[0]) {
         last_index_ = 0;
         // printf("return elem(0) tt=%g t0=%g y=%g\n", tt, t_->elem(0), y_->elem(0));
-        return (*y_)[0];
+        return y_[0];
     } else {
         search(tt);
     }
-    double x0 = (*y_)[last_index_ - 1];
-    double x1 = (*y_)[last_index_];
-    double t0 = (*t_)[last_index_ - 1];
-    double t1 = (*t_)[last_index_];
+    double x0 = y_[last_index_ - 1];
+    double x1 = y_[last_index_];
+    double t0 = t_[last_index_ - 1];
+    double t1 = t_[last_index_];
     // printf("IvocVectRecorder::continuous tt=%g t0=%g t1=%g theta=%g x0=%g x1=%g\n", tt, t0, t1,
     // (tt - t0)/(t1 - t0), x0, x1);
     if (t0 == t1) {
@@ -178,10 +168,10 @@ double VecPlayContinuous::interpolate(double tt) {
 
 void VecPlayContinuous::search(double tt) {
     //	assert (tt > t_->elem(0) && tt < t_->elem(t_->size() - 1))
-    while (tt < (*t_)[last_index_]) {
+    while (tt < t_[last_index_]) {
         --last_index_;
     }
-    while (tt >= (*t_)[last_index_]) {
+    while (tt >= t_[last_index_]) {
         ++last_index_;
     }
 }
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 6f439c1b2..c647ac1c8 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -22,7 +22,6 @@ find_package(Boost 1.41.0 QUIET
 if(Boost_FOUND)
   if(CORENRN_ENABLE_UNIT_TESTS)
     include_directories(${Boost_INCLUDE_DIRS})
-    add_subdirectory(unit/endian)
     add_subdirectory(unit/cmdline_interface)
     add_subdirectory(unit/interleave_info)
     add_subdirectory(unit/alignment)
diff --git a/tests/integration/reportinglib/reporting_test.sh.in b/tests/integration/reportinglib/reporting_test.sh.in
index 28319366b..6148536f9 100644
--- a/tests/integration/reportinglib/reporting_test.sh.in
+++ b/tests/integration/reportinglib/reporting_test.sh.in
@@ -1,5 +1,5 @@
 #! /bin/sh
 export OMP_NUM_THREADS=1
-@SRUN_PREFIX@ @CMAKE_BINARY_DIR@/bin/@CMAKE_SYSTEM_PROCESSOR@/special-core --mpi --read-config @TEST_NAME@.conf
-chmod +x ./@TEST_NAME@.check
-exit `./@TEST_NAME@.check`
+@SRUN_PREFIX@ @CMAKE_BINARY_DIR@/bin/@CMAKE_SYSTEM_PROCESSOR@/special-core --mpi --read-config @CMAKE_CURRENT_BINARY_DIR@/@SIM_NAME@/@TEST_NAME@.conf
+chmod +x @CMAKE_CURRENT_BINARY_DIR@/@SIM_NAME@/@TEST_NAME@.check
+exit `@CMAKE_CURRENT_BINARY_DIR@/@SIM_NAME@/@TEST_NAME@.check`
diff --git a/tests/unit/endian/CMakeLists.txt b/tests/unit/endian/CMakeLists.txt
deleted file mode 100644
index 24f82cf72..000000000
--- a/tests/unit/endian/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-# =============================================================================
-# Copyright (C) 2016-2019 Blue Brain Project
-#
-# See top-level LICENSE file for details.
-# =============================================================================
-
-# Need to test the endian code against aggressive compiler optimization
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_${COMPILER_LANGUAGE}_OPT_AGGRESSIVE}")
-
-file(GLOB endian_test_src "*.cpp")
-
-foreach(test_src_file ${endian_test_src})
-  get_filename_component(test_name ${test_src_file} NAME)
-
-  add_executable(${test_name}_test_bin ${test_src_file})
-  target_link_libraries(${test_name}_test_bin ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY})
-
-  add_test(NAME ${test_name}_test
-           COMMAND ${TEST_EXEC_PREFIX} ${CMAKE_CURRENT_BINARY_DIR}/${test_name}_test_bin)
-
-endforeach()
diff --git a/tests/unit/endian/endianness_test.cpp b/tests/unit/endian/endianness_test.cpp
deleted file mode 100644
index 2bf9bec60..000000000
--- a/tests/unit/endian/endianness_test.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
-Copyright (c) 2016, Blue Brain Project
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-1. Redistributions of source code must retain the above copyright notice,
-   this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-3. Neither the name of the copyright holder nor the names of its contributors
-   may be used to endorse or promote products derived from this software
-   without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-
-/* confirm that reported native endianness is big- or little-endian, and that
- * this corresponds with our expectations across 16-, 32- and 64-bit integers,
- * and floats and doubles (assumes IEEE 4- and 8- byte representation) */
-
-#define BOOST_TEST_MODULE NativeEndianCheck
-#define BOOST_TEST_MAIN
-
-#include <cstring>
-#include <stdint.h>
-
-#include <boost/test/unit_test.hpp>
-#include <boost/test/test_case_template.hpp>
-#include <boost/mpl/list.hpp>
-
-#include "coreneuron/utils/endianness.hpp"
-
-BOOST_AUTO_TEST_CASE(confirm_big_or_little) {
-    BOOST_CHECK(endian::little_endian!=endian::big_endian);
-    BOOST_CHECK(endian::little_endian!=endian::mixed_endian);
-    BOOST_CHECK(endian::big_endian!=endian::mixed_endian);
-
-    BOOST_CHECK(endian::native_endian==endian::big_endian ||
-                endian::native_endian==endian::little_endian);
-}
-
-template <typename T>
-struct check_data {
-    static const unsigned char data[sizeof(T)];
-    static const T le,be;
-
-    static void confirm_endian(enum endian::endianness E) {
-        union {
-            unsigned char bytes[sizeof(T)];
-            T value;
-        } join;
-
-        memcpy(join.bytes,data,sizeof(T));
-        switch (E) {
-        case endian::little_endian:
-            BOOST_CHECK_EQUAL(join.value,le);
-            break;
-        case endian::big_endian:
-            BOOST_CHECK_EQUAL(join.value,be);
-            break;
-        default:
-            BOOST_ERROR("Endian neither little nor big");
-        }
-    }
-};
-
-typedef boost::mpl::list<double,float,char,uint16_t,uint64_t,uint32_t> test_value_types;
-
-#define CHECK_DATA(T,le_,be_)\
-template <> const T check_data<T>::le=le_;\
-template <> const T check_data<T>::be=be_;\
-template <> const unsigned char check_data<T>::data[]
-
-CHECK_DATA(char,'x','x')={'x'};
-
-CHECK_DATA(float,(float)1.1,(float)-428967904.0)=
-    {0xcd,0xcc,0x8c,0x3f};
- 
-CHECK_DATA(double,1.1,-1.5423487136706484e-180)=
-    {0x9a,0x99,0x99,0x99,0x99,0x99,0xf1,0x3f};
-
-CHECK_DATA(uint16_t,0xf12e,0x2ef1)=
-    {0x2e,0xf1};
-
-CHECK_DATA(uint32_t,0x192a3b4c,0x4c3b2a19)=
-    {0x4c,0x3b,0x2a,0x19};
-
-CHECK_DATA(uint64_t,0xab13cd24ef350146,0x460135ef24cd13ab)=
-    {0x46,0x01,0x35,0xef,0x24,0xcd,0x13,0xab};
-
-BOOST_AUTO_TEST_CASE_TEMPLATE(correct_endianness,T,test_value_types) {
-    check_data<T>::confirm_endian(endian::native_endian);
-}
-
diff --git a/tests/unit/endian/swap_endian_common.ipp b/tests/unit/endian/swap_endian_common.ipp
deleted file mode 100644
index bb8a017c6..000000000
--- a/tests/unit/endian/swap_endian_common.ipp
+++ /dev/null
@@ -1,305 +0,0 @@
-/*
-Copyright (c) 2016, Blue Brain Project
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-1. Redistributions of source code must retain the above copyright notice,
-   this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-3. Neither the name of the copyright holder nor the names of its contributors
-   may be used to endorse or promote products derived from this software
-   without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-
-/* Wish to run unit tests for swap_endian code under multiple configurations:
- * 1. default: SWAP_ENDIAN_DISABLE_ASM and SWAP_ENDIAN_MAX_UNROLL are undef.
- * 2. using fallback methods, with SWAP_ENDIAN_DISABLE_ASM defined.
- * 3. using a strange unroll factor, with SWAP_ENDIAN_DISABLE_ASM set to 7.
- *
- * This file presents the common code for testing, to be included in
- * the specific test code for each of the above scenarios.
- */
-
-#define SWAP_ENDIAN_ASSERT
-#include "coreneuron/utils/swap_endian.h"
-
-#ifndef SWAP_ENDIAN_CONFIG
-#define SWAP_ENDIAN_CONFIG _
-#endif
-#define CONCAT(a,b) a##b
-#define CONCAT2(a,b) CONCAT(a,b)
-
-#define BOOST_TEST_MODULE CONCAT2(SwapEndian,SWAP_ENDIAN_CONFIG)
-#define BOOST_TEST_MAIN
-#include <boost/test/unit_test.hpp>
-#include <boost/test/test_case_template.hpp>
-#include <boost/mpl/list.hpp>
-
-#include <list>
-#include <vector>
-#include <cstring>
-
-#include <stdint.h>
-
-#ifdef SWAP_ENDIAN_BROKEN_MEMCPY
-#define memcpy(d,s,n) ::endian::impl::safe_memcpy((d),(s),(n))
-#endif
-
-/* Some of the tests require an unaligned fill: can't ask std::fill to
- * do this and not expect issues with alignment */
-template <typename T>
-void fill(T *b,T *e,T value) {
-    while (b<e) memcpy(b++,&value,sizeof(T));
-}
-
-template <typename T>
-void run_test_array(T *buf,size_t N,T (*fill_value)(unsigned),T (*check_value)(unsigned)) {
-    for (size_t i=0;i<N;++i) buf[i]=fill_value(i);
-    endian::swap_endian_range(buf,buf+N);
-    for (size_t i=0;i<N;++i) {
-        BOOST_REQUIRE_EQUAL(buf[i],check_value(i));
-    }
-}
-
-template <typename T>
-void run_test_list(size_t N,T (*fill_value)(unsigned),T (*check_value)(unsigned)) {
-    std::list<T> data(N);
-    unsigned j=0;
-    for (typename std::list<T>::iterator i=data.begin();i!=data.end();++i) *i=fill_value(j++);
-
-    endian::swap_endian_range(data.begin(),data.end());
-
-    j=0;
-    for (typename std::list<T>::const_iterator i=data.begin();i!=data.end();++i) {
-        BOOST_REQUIRE_EQUAL(*i,check_value(j++));
-    }
-}
-
-template <typename T,size_t MAXN>
-void check_small_containers(T (*fill_value)(unsigned),T (*check_value)(unsigned)) {
-    std::vector<T> data(MAXN);
-
-    for (size_t i=0;i<MAXN;++i) {
-        run_test_array(&data[0],i,fill_value,check_value);
-        run_test_list(i,fill_value,check_value);
-    }
-}
-
-typedef boost::mpl::list<double,float,char,uint16_t,uint64_t,uint32_t> test_value_types;
-
-template <typename V> V sample_fill_value(unsigned i=0);
-template <typename V> V sample_check_value(unsigned i=0);
-
-#define NELEM(arr) (sizeof(arr)/sizeof(arr[0]))
-
-template <> double sample_fill_value<double>(unsigned i)  {
-    double table[]={
-        1.1,
-        12.345e36,
-        -0.987e-39
-    };
-    return table[i%NELEM(table)];
-}
-template <> double sample_check_value<double>(unsigned i) {
-    double table[]={
-        -1.5423487136706484e-180,
-        130023111.64417373,
-        2.703780148780151e-145
-    };
-    return table[i%NELEM(table)];
-}
-
-template <> uint64_t sample_fill_value<uint64_t>(unsigned i)  {
-    uint64_t table[]={
-        0x123456789abcdef0ull,
-        0x5342312f1e0dfcebull,
-        0xb56cd13e90023347ull,
-        0x45b6e72aa9c692b5ull,
-        0x2b45634ec623974full
-    };
-    return table[i%NELEM(table)];
-}
-
-template <> uint64_t sample_check_value<uint64_t>(unsigned i) {
-    uint64_t table[]={
-        0xf0debc9a78563412ull,
-        0xebfc0d1e2f314253ull,
-        0x473302903ed16cb5ull,
-        0xb592c6a92ae7b645ull,
-        0x4f9723c64e63452bull
-    };
-    return table[i%NELEM(table)];
-}
-
-
-template <> uint32_t sample_fill_value<uint32_t>(unsigned i)  {
-    uint32_t table[]={
-        0x1234567ful,
-        0x89abcde0ul,
-        0x11223344ul
-    };
-    return table[i%NELEM(table)];
-}
-
-template <> uint32_t sample_check_value<uint32_t>(unsigned i) {
-    uint32_t table[]={
-        0x7f563412ul,
-        0xe0cdab89ul,
-        0x44332211ul
-    };
-    return table[i%NELEM(table)];
-}
-
-template <> float sample_fill_value<float>(unsigned i)  {
-    float table[]={
-        (float)1.1,
-        (float)12.345e12,
-        (float)-0.987e-13
-    };
-    return table[i%NELEM(table)];
-}
-
-template <> float sample_check_value<float>(unsigned i) {
-    float table[]={
-        (float)-428967904.0,
-        (float)-1.2233891766300076e-06,
-        (float)-1.0963376907702216e-11
-    };
-    return table[i%NELEM(table)];
-}
-
-
-template <> uint16_t sample_fill_value<uint16_t>(unsigned i)  {
-    uint16_t table[]={
-        0xb2c6u,
-        0xa391u,
-        0x3456u
-    };
-    return table[i%NELEM(table)];
-}
-
-template <> uint16_t sample_check_value<uint16_t>(unsigned i) {
-    uint16_t table[]={
-        0xc6b2u,
-        0x91a3u,
-        0x5634u
-    };
-    return table[i%NELEM(table)];
-}
-
-
-template <> char sample_fill_value<char>(unsigned i)  {
-    char table[]={
-        'z','a','/'
-    };
-    return table[i%NELEM(table)];
-}
-
-template <> char sample_check_value<char>(unsigned i) {
-    char table[]={
-        'z','a','/'
-    };
-    return table[i%NELEM(table)];
-}
-
-
-BOOST_AUTO_TEST_CASE_TEMPLATE(small_collection,T,test_value_types) {
-    check_small_containers<T,10>(sample_fill_value<T>,sample_check_value<T>);
-}
-
-/* run test over longer (unrollable) array; using N_guard items at front and back of the
- * buffer to check against overwrites */
-
-template <typename T>
-void run_longer_test_array(T *buf,size_t N,size_t N_guard,
-                           T (*fill_value)(unsigned),T (*check_value)(unsigned),const T &guard_value)
-{
-    if (N_guard*2>=N) return;
-
-    T *start=buf+N_guard;
-    size_t count=N-2*N_guard;
-
-    fill(buf,buf+N_guard,guard_value);
-    for (size_t i=0;i<count;++i)
-        start[i]=fill_value(i);
-
-    fill(start+count,buf+N,guard_value);
-
-    endian::swap_endian_range(start,start+count);
-
-    for (size_t i=0;i<N_guard;++i) {
-        BOOST_REQUIRE_EQUAL(buf[i],guard_value);
-        BOOST_REQUIRE_EQUAL(start[count+i],guard_value);
-    }
-
-    for (size_t i=0;i<count;++i) {
-        BOOST_REQUIRE_EQUAL(start[i],check_value(i));
-    }
-}
-
-static const size_t unroll_multiple=123;
-
-template <typename T>
-void check_longer_unroll(T (*fill_value)(unsigned),T (*check_value)(unsigned),const T &guard_value) {
-    std::vector<T> data((1+unroll_multiple)*SWAP_ENDIAN_MAX_UNROLL);
-    
-    size_t n_guard=8;
-    for (size_t n_over=0;n_over<SWAP_ENDIAN_MAX_UNROLL;++n_over) {
-        run_longer_test_array(&data[0],unroll_multiple*SWAP_ENDIAN_MAX_UNROLL+n_over,n_guard,fill_value,check_value,guard_value);
-    }
-
-    for (size_t n_off=1;n_off<SWAP_ENDIAN_MAX_UNROLL;++n_off) {
-        run_longer_test_array(&data[0]+n_off,unroll_multiple*SWAP_ENDIAN_MAX_UNROLL,n_guard,fill_value,check_value,guard_value);
-    }
-}
-
-
-template <typename T>
-void check_unroll_badalign(T (*fill_value)(unsigned),T (*check_value)(unsigned),const T &guard_value) {
-    static const size_t n_item=unroll_multiple*SWAP_ENDIAN_MAX_UNROLL;
-    std::vector<unsigned char> bytes(sizeof(T)*(n_item+1));
-    
-    size_t n_guard=3;
-    for (size_t offset=0;offset<sizeof(T);++offset) {
-        T *data=(T *)(void *)(&bytes[offset]);
-        run_longer_test_array(data,n_item,n_guard,fill_value,check_value,guard_value);
-    }
-}
-
-template <typename T>
-void fill_with_guard_byte(T *value) {
-    unsigned char bytes[sizeof(T)];
-    for (size_t i=0;i<sizeof(T);++i) bytes[i]=(unsigned char)(0x5a+i);
-    memcpy(value,bytes,sizeof(T));
-}
-
-BOOST_AUTO_TEST_CASE_TEMPLATE(unrolled_array,T,test_value_types) {
-    T guard_value;
-    fill_with_guard_byte(&guard_value);
-    check_longer_unroll<T>(sample_fill_value<T>,sample_check_value<T>,guard_value);
-}
-
-BOOST_AUTO_TEST_CASE_TEMPLATE(unaligned_array,T,test_value_types) {
-    T guard_value;
-    fill_with_guard_byte(&guard_value);
-    check_unroll_badalign<T>(sample_fill_value<T>,sample_check_value<T>,guard_value);
-}
-
-
-
diff --git a/tests/unit/endian/swap_endian_default.cpp b/tests/unit/endian/swap_endian_default.cpp
deleted file mode 100644
index ddb53fd86..000000000
--- a/tests/unit/endian/swap_endian_default.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
-Copyright (c) 2016, Blue Brain Project
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-1. Redistributions of source code must retain the above copyright notice,
-   this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-3. Neither the name of the copyright holder nor the names of its contributors
-   may be used to endorse or promote products derived from this software
-   without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-
-/* Run endian tests with default configuration of swap_endian.h */
-
-#define SWAP_ENDIAN_CONFIG Default
-#include "swap_endian_common.ipp"
-
diff --git a/tests/unit/endian/swap_endian_noasm.cpp b/tests/unit/endian/swap_endian_noasm.cpp
deleted file mode 100644
index a337eaf8d..000000000
--- a/tests/unit/endian/swap_endian_noasm.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
-Copyright (c) 2016, Blue Brain Project
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-1. Redistributions of source code must retain the above copyright notice,
-   this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-3. Neither the name of the copyright holder nor the names of its contributors
-   may be used to endorse or promote products derived from this software
-   without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-
-/* Run endian tests with assembly and intrinsics disabled.
- */
-
-#define SWAP_ENDIAN_CONFIG DisableAsm
-
-#ifndef SWAP_ENDIAN_DISABLE_ASM
-#define SWAP_ENDIAN_DISABLE_ASM
-#endif
-
-#include "swap_endian_common.ipp"
-
diff --git a/tests/unit/endian/swap_endian_nounroll.cpp b/tests/unit/endian/swap_endian_nounroll.cpp
deleted file mode 100644
index 3b35c6d1e..000000000
--- a/tests/unit/endian/swap_endian_nounroll.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
-Copyright (c) 2016, Blue Brain Project
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-1. Redistributions of source code must retain the above copyright notice,
-   this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-3. Neither the name of the copyright holder nor the names of its contributors
-   may be used to endorse or promote products derived from this software
-   without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-
-/* Run endian tests with unroll factor forced to be one.
- */
-
-#define SWAP_ENDIAN_CONFIG NoUnroll
-#define SWAP_ENDIAN_MAX_UNROLL 1
-
-#include "swap_endian_common.ipp"
-
diff --git a/tests/unit/endian/swap_endian_oddunroll.cpp b/tests/unit/endian/swap_endian_oddunroll.cpp
deleted file mode 100644
index 91fc49e15..000000000
--- a/tests/unit/endian/swap_endian_oddunroll.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
-Copyright (c) 2016, Blue Brain Project
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-1. Redistributions of source code must retain the above copyright notice,
-   this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-3. Neither the name of the copyright holder nor the names of its contributors
-   may be used to endorse or promote products derived from this software
-   without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-
-/* Run endian tests with an unusual, odd unroll factor.
- *
- * In practice, unroll should be left at the default value or set
- * to a power of two. This test ensures correctness even if this
- * is not the case.
- */
-
-#define SWAP_ENDIAN_CONFIG OddUnroll
-
-#define SWAP_ENDIAN_MAX_UNROLL 5
-#include "swap_endian_common.ipp"
-