Print memory footprint of each mechanism (#833)

iomaganaris · olupton · web-flow · commit 0574812f772d · 2022-06-30T08:27:40.000+02:00
* Print each mechanism's size
* Fix compilation with -DDEBUG
* Improve the calculation of the total memory for each mechanism
* Improved calculation of NrnThreadMembList

Co-authored-by: Olli Lupton &lt;oliver.lupton@epfl.ch&gt;
diff --git a/README.md b/README.md
@@ -395,4 +395,4 @@ You can see current [contributors here](https://github.com/BlueBrain/CoreNeuron/
 
 CoreNEURON is developed in a joint collaboration between the Blue Brain Project and Yale University. This work is supported by funding to the Blue Brain Project, a research center of the École polytechnique fédérale de Lausanne (EPFL), from the Swiss government’s ETH Board of the Swiss Federal Institutes of Technology, NIH grant number R01NS11613 (Yale University), the European Union Seventh Framework Program (FP7/20072013) under grant agreement n◦ 604102 (HBP) and the European Union’s Horizon 2020 Framework Programme for Research and Innovation under Specific Grant Agreement n◦ 720270 (Human Brain Project SGA1), n◦ 785907 (Human Brain Project SGA2) and n◦ 945539 (Human Brain Project SGA3).
 
-Copyright (c) 2016 - 2021 Blue Brain Project/EPFL
+Copyright (c) 2016 - 2022 Blue Brain Project/EPFL
diff --git a/coreneuron/apps/corenrn_parameters.cpp b/coreneuron/apps/corenrn_parameters.cpp
@@ -40,7 +40,7 @@ corenrn_parameters::corenrn_parameters() {
     app.add_set(
         "--verbose",
         this->verbose,
-        {verbose_level::NONE, verbose_level::ERROR, verbose_level::INFO, verbose_level::DEBUG},
+        {verbose_level::NONE, verbose_level::ERROR, verbose_level::INFO, verbose_level::DEBUG_INFO},
         "Verbose level: 0 = NONE, 1 = ERROR, 2 = INFO, 3 = DEBUG. Default is INFO");
     app.add_flag("--model-stats",
                  this->model_stats,
diff --git a/coreneuron/apps/corenrn_parameters.hpp b/coreneuron/apps/corenrn_parameters.hpp
@@ -35,7 +35,13 @@
 namespace coreneuron {
 
 struct corenrn_parameters_data {
-    enum verbose_level : std::uint32_t { NONE = 0, ERROR = 1, INFO = 2, DEBUG = 3, DEFAULT = INFO };
+    enum verbose_level : std::uint32_t {
+        NONE = 0,
+        ERROR = 1,
+        INFO = 2,
+        DEBUG_INFO = 3,
+        DEFAULT = INFO
+    };
 
     static constexpr int report_buff_size_default = 4;
 
diff --git a/coreneuron/io/mech_report.cpp b/coreneuron/io/mech_report.cpp
@@ -10,6 +10,7 @@
 #include <vector>
 
 #include "coreneuron/coreneuron.hpp"
+#include "coreneuron/io/nrn_setup.hpp"
 #include "coreneuron/mpi/nrnmpi.h"
 #include "coreneuron/apps/corenrn_parameters.hpp"
 
@@ -19,6 +20,7 @@ void write_mech_report() {
     /// mechanim count across all gids, local to rank
     const auto n_memb_func = corenrn.get_memb_funcs().size();
     std::vector<long> local_mech_count(n_memb_func, 0);
+    std::vector<long> local_mech_size(n_memb_func, 0);
 
     /// each gid record goes on separate row, only check non-empty threads
     for (int i = 0; i < nrn_nthread; i++) {
@@ -27,10 +29,12 @@ void write_mech_report() {
             const int type = tml->index;
             const auto& ml = tml->ml;
             local_mech_count[type] += ml->nodecount;
+            local_mech_size[type] = memb_list_size(tml, true);
         }
     }
 
     std::vector<long> total_mech_count(n_memb_func);
+    std::vector<long> total_mech_size(n_memb_func);
 
 #if NRNMPI
     if (corenrn_param.mpi_enable) {
@@ -39,21 +43,29 @@ void write_mech_report() {
                                   &total_mech_count[0],
                                   local_mech_count.size(),
                                   1);
-
+        nrnmpi_long_allreduce_vec(&local_mech_size[0],
+                                  &total_mech_size[0],
+                                  local_mech_size.size(),
+                                  1);
     } else
 #endif
     {
         total_mech_count = local_mech_count;
+        total_mech_size = local_mech_size;
     }
 
     /// print global stats to stdout
     if (nrnmpi_myid == 0) {
-        printf("\n================ MECHANISMS COUNT BY TYPE ==================\n");
-        printf("%4s %20s %10s\n", "Id", "Name", "Count");
+        printf("\n================= MECHANISMS COUNT BY TYPE ===================\n");
+        printf("%4s %20s %10s %25s\n", "Id", "Name", "Count", "Total memory size (KiB)");
         for (size_t i = 0; i < total_mech_count.size(); i++) {
-            printf("%4lu %20s %10ld\n", i, nrn_get_mechname(i), total_mech_count[i]);
+            printf("%4lu %20s %10ld %25.2lf\n",
+                   i,
+                   nrn_get_mechname(i),
+                   total_mech_count[i],
+                   static_cast<double>(total_mech_size[i]) / 1024);
         }
-        printf("=============================================================\n");
+        printf("==============================================================\n");
     }
 }
 
diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp
@@ -966,9 +966,37 @@ void read_phase3(NrnThread& nt, UserParams& userParams) {
     nt.summation_report_handler_ = std::make_unique<SummationReportMapping>();
 }
 
-static size_t memb_list_size(NrnThreadMembList* tml) {
+/* Returns the size of the dynamically allocated memory for NrnThreadMembList
+ * Includes:
+ *  - Size of NrnThreadMembList
+ *  - Size of Memb_list
+ *  - Size of nodeindices
+ *  - Size of _permute
+ *  - Size of _thread
+ *  - Size of NetReceive and NetSend Buffers
+ *  - Size of int variables
+ *  - Size of double variables (If include_data is enabled. Those variables are already counted
+ * since they point to nt->_data.)
+ */
+size_t memb_list_size(NrnThreadMembList* tml, bool include_data) {
     size_t nbyte = sizeof(NrnThreadMembList) + sizeof(Memb_list);
     nbyte += tml->ml->nodecount * sizeof(int);
+    if (tml->ml->_permute) {
+        nbyte += tml->ml->nodecount * sizeof(int);
+    }
+    if (tml->ml->_thread) {
+        Memb_func& mf = corenrn.get_memb_func(tml->index);
+        nbyte += mf.thread_size_ * sizeof(ThreadDatum);
+    }
+    if (tml->ml->_net_receive_buffer) {
+        nbyte += sizeof(NetReceiveBuffer_t) + tml->ml->_net_receive_buffer->size_of_object();
+    }
+    if (tml->ml->_net_send_buffer) {
+        nbyte += sizeof(NetSendBuffer_t) + tml->ml->_net_send_buffer->size_of_object();
+    }
+    if (include_data) {
+        nbyte += corenrn.get_prop_param_size()[tml->index] * tml->ml->nodecount * sizeof(double);
+    }
     nbyte += corenrn.get_prop_dparam_size()[tml->index] * tml->ml->nodecount * sizeof(Datum);
 #ifdef DEBUG
     int i = tml->index;
@@ -991,7 +1019,7 @@ size_t output_presyn_size(void) {
     size_t nbyte = sizeof(gid2out) + sizeof(int) * gid2out.size() +
                    sizeof(PreSyn*) * gid2out.size();
 #ifdef DEBUG
-    printf(" gid2out table bytes=~%ld size=%d\n", nbyte, gid2out.size());
+    printf(" gid2out table bytes=~%ld size=%ld\n", nbyte, gid2out.size());
 #endif
     return nbyte;
 }
@@ -1003,7 +1031,7 @@ size_t input_presyn_size(void) {
     size_t nbyte = sizeof(gid2in) + sizeof(int) * gid2in.size() +
                    sizeof(InputPreSyn*) * gid2in.size();
 #ifdef DEBUG
-    printf(" gid2in table bytes=~%ld size=%d\n", nbyte, gid2in.size());
+    printf(" gid2in table bytes=~%ld size=%ld\n", nbyte, gid2in.size());
 #endif
     return nbyte;
 }
@@ -1031,7 +1059,7 @@ size_t model_size(bool detailed_report) {
         // Memb_list size
         int nmech = 0;
         for (auto tml = nt.tml; tml; tml = tml->next) {
-            nb_nt += memb_list_size(tml);
+            nb_nt += memb_list_size(tml, false);
             ++nmech;
         }
 
diff --git a/coreneuron/io/nrn_setup.hpp b/coreneuron/io/nrn_setup.hpp
@@ -42,6 +42,8 @@ extern void nrn_setup_cleanup();
 
 extern int nrn_i_layout(int i, int cnt, int j, int size, int layout);
 
+size_t memb_list_size(NrnThreadMembList* tml, bool include_data);
+
 size_t model_size(bool detailed_report);
 
 namespace coreneuron {
diff --git a/coreneuron/mechanism/mechanism.hpp b/coreneuron/mechanism/mechanism.hpp
@@ -50,6 +50,13 @@ struct NetReceiveBuffer_t {
     int _displ_cnt; /* number of unique _pnt_index */
     int _size;      /* capacity */
     int _pnt_offset;
+    size_t size_of_object() {
+        size_t nbytes = 0;
+        nbytes += _size * sizeof(int) * 3;
+        nbytes += (_size + 1) * sizeof(int);
+        nbytes += _size * sizeof(double) * 2;
+        return nbytes;
+    }
 };
 
 struct NetSendBuffer_t: MemoryManaged {
@@ -78,6 +85,13 @@ struct NetSendBuffer_t: MemoryManaged {
         _nsb_flag = (double*) ecalloc_align(_size, sizeof(double));
     }
 
+    size_t size_of_object() {
+        size_t nbytes = 0;
+        nbytes += _size * sizeof(int) * 4;
+        nbytes += _size * sizeof(double) * 2;
+        return nbytes;
+    }
+
     ~NetSendBuffer_t() {
         free_memory(_sendtype);
         free_memory(_vdata_index);

Original file line number	Diff line number	Diff line change
`@@ -395,4 +395,4 @@ You can see current [contributors here](https://github.com/BlueBrain/CoreNeuron/`
`395`	`395`
`396`	`396`	CoreNEURON is developed in a joint collaboration between the Blue Brain Project and Yale University. This work is supported by funding to the Blue Brain Project, a research center of the École polytechnique fédérale de Lausanne (EPFL), from the Swiss government’s ETH Board of the Swiss Federal Institutes of Technology, NIH grant number R01NS11613 (Yale University), the European Union Seventh Framework Program (FP7/20072013) under grant agreement n◦ 604102 (HBP) and the European Union’s Horizon 2020 Framework Programme for Research and Innovation under Specific Grant Agreement n◦ 720270 (Human Brain Project SGA1), n◦ 785907 (Human Brain Project SGA2) and n◦ 945539 (Human Brain Project SGA3).
`397`	`397`
`398`		`-Copyright (c) 2016 - 2021 Blue Brain Project/EPFL`
	`398`	`+Copyright (c) 2016 - 2022 Blue Brain Project/EPFL`