Skip to content
This repository was archived by the owner on Mar 25, 2025. It is now read-only.
This repository was archived by the owner on Mar 25, 2025. It is now read-only.

NMODL does not update global variables on GPU #678

@olupton

Description

@olupton

NMODL generates OpenACC code from the hhwatch.mod mechanism that gives incorrect results. This leads to test failures when the NEURON test suite is run using CoreNEURON and NMODL, i.e.

cmake ..  -DNRN_ENABLE_TESTS=ON -DNRN_ENABLE_CORENEURON=ON -DCORENRN_ENABLE_GPU=ON -DCORENRN_ENABLE_NMODL=ON
cmake --build . --parallel
ctest -j 8

The testcorenrn_watch::compare_results test fails because the output spikes from the CoreNEURON+GPU tests do not match the NEURON and CoreNEURON+CPU runs, and the reference file. In the current master this is affected by #675, but if that is worked around there is a second issue: these tests configure non-default values for some global parameters, and these values are not updated on the GPU. The OpenACC code generated by NMODL should include:

     /** initialize channel */
     void nrn_init_hhwatch(NrnThread* nt, Memb_list* ml, int type) {
+        #pragma acc update device (hhwatch_global) if(nt->compute_gpu)
         #pragma acc data present(nt, ml, hhwatch_global) if(nt->compute_gpu)
         {

to ensure the device-side values of these parameters are updated.

The full diff of manual changes to the generate C++ file that was tested locally is shown below. This includes three different sets of changes:

The full diff:

--- hhwatch-original.cpp 2021-06-01 09:31:52.879278000 +0200
+++ hhwatch-patched.cpp  2021-06-01 09:27:37.270874000 +0200
@@ -230,9 +230,10 @@


     static inline void net_send_buffering(NetSendBuffer_t* nsb, int type, int vdata_index, int weight_index, int point_index, double t, double flag)  {
-        int i = nsb->_cnt;
-        nsb->_cnt++;
-        if(nsb->_cnt >= nsb->_size) {
+        int i = 0;
+#pragma acc atomic capture
+        i = nsb->_cnt++;
+        if(i >= nsb->_size) {
             printf("Error : netsend buffer size (%d) exceeded\n", nsb->_cnt);
             coreneuron_abort();
         }
@@ -291,49 +292,47 @@
                     if (v >  -55.0) {
                         if ((inst->watch1[4*pnodecount+id]&1) == 0) {
                             net_send_buffering(ml->_net_send_buffer, 0, inst->tqitem[2*pnodecount+id], 0, inst->point_process[1*pnodecount+id], nt->_t+0.0, 2.0);
-                            inst->watch1[4*pnodecount+id] = 3;
-                        }
-                        else {
-                            inst->watch1[4*pnodecount+id] = 2;
                         }
+                        inst->watch1[4*pnodecount+id] = 3;
+                    } else {
+                        inst->watch1[4*pnodecount+id] = 2;
                     }
                 }
                 if (inst->watch2[5*pnodecount+id]&2) {
                     if (v > 10.0) {
                         if ((inst->watch2[5*pnodecount+id]&1) == 0) {
                             net_send_buffering(ml->_net_send_buffer, 0, inst->tqitem[2*pnodecount+id], 0, inst->point_process[1*pnodecount+id], nt->_t+0.0, 3.0);
-                            inst->watch2[5*pnodecount+id] = 3;
-                        }
-                        else {
-                            inst->watch2[5*pnodecount+id] = 2;
                         }
+                        inst->watch2[5*pnodecount+id] = 3;
+                    } else {
+                        inst->watch2[5*pnodecount+id] = 2;
                     }
                 }
                 if (inst->watch3[6*pnodecount+id]&2) {
                     if (v <  -70.0) {
                         if ((inst->watch3[6*pnodecount+id]&1) == 0) {
                             net_send_buffering(ml->_net_send_buffer, 0, inst->tqitem[2*pnodecount+id], 0, inst->point_process[1*pnodecount+id], nt->_t+0.0, 4.0);
-                            inst->watch3[6*pnodecount+id] = 3;
-                        }
-                        else {
-                            inst->watch3[6*pnodecount+id] = 2;
                         }
+                        inst->watch3[6*pnodecount+id] = 3;
+                    } else {
+                        inst->watch3[6*pnodecount+id] = 2;
                     }
                 }
                 if (inst->watch4[7*pnodecount+id]&2) {
                     if (v >  -55.0) {
                         if ((inst->watch4[7*pnodecount+id]&1) == 0) {
                             net_send_buffering(ml->_net_send_buffer, 0, inst->tqitem[2*pnodecount+id], 0, inst->point_process[1*pnodecount+id], nt->_t+0.0, 2.0);
-                            inst->watch4[7*pnodecount+id] = 3;
-                        }
-                        else {
-                            inst->watch4[7*pnodecount+id] = 2;
                         }
+                        inst->watch4[7*pnodecount+id] = 3;
+                    } else {
+                        inst->watch4[7*pnodecount+id] = 2;
                     }
                 }
             }
-
             NetSendBuffer_t* nsb = ml->_net_send_buffer;
+            #pragma acc wait(nt->stream_id)
+            #pragma acc update self(nsb->_cnt) if(nt->compute_gpu)
+            update_net_send_buffer_on_host(nt, nsb);
             for (int i=0; i < nsb->_cnt; i++) {
                 int type = nsb->_sendtype[i];
                 int tid = nt->id;
@@ -345,6 +344,7 @@
                 net_sem_from_gpu(type, vdata_index, weight_index, tid, point_index, t, flag);
             }
             nsb->_cnt = 0;
+            #pragma acc update device(nsb->_cnt) if (nt->compute_gpu)
         }
     }

@@ -425,8 +425,12 @@
                     net_receive_kernel_hhwatch(t, point_process, inst, nt, ml, weight_index, flag);
                 }
             }
-
+            #pragma acc wait(nt->stream_id)
+            nrb->_displ_cnt = 0;
+            nrb->_cnt = 0;
             NetSendBuffer_t* nsb = ml->_net_send_buffer;
+            #pragma acc update self(nsb->_cnt) if(nt->compute_gpu)
+            update_net_send_buffer_on_host(nt, nsb);
             for (int i=0; i < nsb->_cnt; i++) {
                 int type = nsb->_sendtype[i];
                 int tid = nt->id;
@@ -438,15 +442,14 @@
                 net_sem_from_gpu(type, vdata_index, weight_index, tid, point_index, t, flag);
             }
             nsb->_cnt = 0;
-
-            nrb->_displ_cnt = 0;
-            nrb->_cnt = 0;
+            #pragma acc update device(nsb->_cnt) if (nt->compute_gpu)
         }
     }


     /** initialize channel */
     void nrn_init_hhwatch(NrnThread* nt, Memb_list* ml, int type) {
+        #pragma acc update device (hhwatch_global) if(nt->compute_gpu)
         #pragma acc data present(nt, ml, hhwatch_global) if(nt->compute_gpu)
         {
             int nodecount = ml->nodecount;
@@ -470,10 +473,26 @@
                     double v = voltage[node_id];
                     inst->g[id] = hhwatch_global.gpas;
                     inst->e[id] = hhwatch_global.erev;
-                    net_send_buffering(ml->_net_send_buffer, 0, inst->tqitem[2*pnodecount+id], -1, inst->point_process[1*pnodecount+id], nt->_t+0.0, 1.0);
+                    net_send_buffering(ml->_net_send_buffer, 0, inst->tqitem[2*pnodecount+id], 0, inst->point_process[1*pnodecount+id], nt->_t+0.0, 1.0);
                 }
             }
         }
+        #pragma acc wait(nt->stream_id)
+        NetSendBuffer_t* nsb = ml->_net_send_buffer;
+        #pragma acc update self(nsb->_cnt) if(nt->compute_gpu)
+        update_net_send_buffer_on_host(nt, nsb);
+        for (int i=0; i < nsb->_cnt; i++) {
+            int type = nsb->_sendtype[i];
+            int tid = nt->id;
+            double t = nsb->_nsb_t[i];
+            double flag = nsb->_nsb_flag[i];
+            int vdata_index = nsb->_vdata_index[i];
+            int weight_index = nsb->_weight_index[i];
+            int point_index = nsb->_pnt_index[i];
+            net_sem_from_gpu(type, vdata_index, weight_index, tid, point_index, t, flag);
+        }
+        nsb->_cnt = 0;
+        #pragma acc update device(nsb->_cnt) if (nt->compute_gpu)
     }

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't workingcodegenCode generation backendgpu

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions