Skip to content
This repository was archived by the owner on Mar 20, 2023. It is now read-only.

Commit f6e3237

Browse files
Christos Kotsalosolupton
authored andcommitted
fixing race condition in cell permute 2
1 parent f1a5f61 commit f6e3237

File tree

1 file changed

+25
-23
lines changed

1 file changed

+25
-23
lines changed

coreneuron/permute/cellorder.cpp

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -482,21 +482,22 @@ static void triang_interleaved2(NrnThread* nt, int icore, int ncycle, int* strid
482482
int icycle = ncycle - 1;
483483
int istride = stride[icycle];
484484
int i = lastnode - istride + icore;
485-
#ifndef CORENEURON_ENABLE_GPU
485+
//#ifndef CORENEURON_ENABLE_GPU
486486
int ii = i;
487-
#endif
487+
//#endif
488488

489489
// execute until all tree depths are executed
490490
bool has_subtrees_to_compute = true;
491491

492492
// clang-format off
493493
nrn_pragma_acc(loop seq)
494494
for (; has_subtrees_to_compute; ) { // ncycle loop
495-
#ifndef CORENEURON_ENABLE_GPU
495+
//#ifndef CORENEURON_ENABLE_GPU
496496
// serial test, gpu does this in parallel
497+
nrn_pragma_acc(loop)
497498
for (int icore = 0; icore < warpsize; ++icore) {
498499
int i = ii + icore;
499-
#endif
500+
//#endif
500501
if (icore < istride) { // most efficient if istride equal warpsize
501502
// what is the index
502503
int ip = GPU_PARENT(i);
@@ -508,9 +509,9 @@ static void triang_interleaved2(NrnThread* nt, int icore, int ncycle, int* strid
508509
nrn_pragma_omp(atomic update)
509510
GPU_RHS(ip) -= p * GPU_RHS(i);
510511
}
511-
#ifndef CORENEURON_ENABLE_GPU
512+
//#ifndef CORENEURON_ENABLE_GPU
512513
}
513-
#endif
514+
//#endif
514515
// if finished with all tree depths then ready to break
515516
// (note that break is not allowed in OpenACC)
516517
if (icycle == 0) {
@@ -520,9 +521,9 @@ static void triang_interleaved2(NrnThread* nt, int icore, int ncycle, int* strid
520521
--icycle;
521522
istride = stride[icycle];
522523
i -= istride;
523-
#ifndef CORENEURON_ENABLE_GPU
524+
//#ifndef CORENEURON_ENABLE_GPU
524525
ii -= istride;
525-
#endif
526+
//#endif
526527
}
527528
// clang-format on
528529
}
@@ -535,36 +536,37 @@ static void bksub_interleaved2(NrnThread* nt,
535536
int ncycle,
536537
int* stride,
537538
int firstnode) {
538-
#ifndef CORENEURON_ENABLE_GPU
539+
//#ifndef CORENEURON_ENABLE_GPU
539540
for (int i = root; i < lastroot; i += 1) {
540-
#else
541-
nrn_pragma_acc(loop seq)
542-
for (int i = root; i < lastroot; i += warpsize) {
543-
#endif
541+
//#else
542+
// nrn_pragma_acc(loop seq)
543+
// for (int i = root; i < lastroot; i += warpsize) {
544+
//#endif
544545
GPU_RHS(i) /= GPU_D(i); // the root
545546
}
546547

547548
int i = firstnode + icore;
548-
#ifndef CORENEURON_ENABLE_GPU
549+
//#ifndef CORENEURON_ENABLE_GPU
549550
int ii = i;
550-
#endif
551+
//#endif
551552
for (int icycle = 0; icycle < ncycle; ++icycle) {
552553
int istride = stride[icycle];
553-
#ifndef CORENEURON_ENABLE_GPU
554+
//#ifndef CORENEURON_ENABLE_GPU
555+
nrn_pragma_acc(loop)
554556
// serial test, gpu does this in parallel
555557
for (int icore = 0; icore < warpsize; ++icore) {
556558
int i = ii + icore;
557-
#endif
559+
//#endif
558560
if (icore < istride) {
559561
int ip = GPU_PARENT(i);
560562
GPU_RHS(i) -= GPU_B(i) * GPU_RHS(ip);
561563
GPU_RHS(i) /= GPU_D(i);
562564
}
563565
i += istride;
564-
#ifndef CORENEURON_ENABLE_GPU
566+
//#ifndef CORENEURON_ENABLE_GPU
565567
}
566568
ii += istride;
567-
#endif
569+
//#endif
568570
}
569571
}
570572

@@ -617,14 +619,14 @@ void solve_interleaved2(int ith) {
617619
int lastroot = rootbegin[iwarp + 1];
618620
int firstnode = nodebegin[iwarp];
619621
int lastnode = nodebegin[iwarp + 1];
620-
#ifndef CORENEURON_ENABLE_GPU
622+
//#ifndef CORENEURON_ENABLE_GPU
621623
if (ic == 0) { // serial test mode. triang and bksub do all cores in warp
622-
#endif
624+
//#endif
623625
triang_interleaved2(nt, ic, ncycle, stride, lastnode);
624626
bksub_interleaved2(nt, root + ic, lastroot, ic, ncycle, stride, firstnode);
625-
#ifndef CORENEURON_ENABLE_GPU
627+
//#ifndef CORENEURON_ENABLE_GPU
626628
} // serial test mode
627-
#endif
629+
//#endif
628630
}
629631
nrn_pragma_acc(wait(nt->stream_id))
630632
#ifdef _OPENACC

0 commit comments

Comments
 (0)