Update docs

lorenzotomada · lorenzotomada · commit c84187f34692 · 2025-07-07T17:03:33.000+02:00
diff --git a/docs/Documentation.ipynb b/docs/Documentation.ipynb
@@ -399,7 +399,6 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "\n",
     "# Tradeoff accuracy vs time\n",
     "\n",
     "The pipeline for computing the eigenvalues and eigenvectors of a symmetric matrix using our proposed method is as follows:\n",
@@ -757,6 +756,65 @@
     "<!-- ![Time profiling](images/plot_time.png) -->"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# A note on the code in C++\n",
+    "As anticipated in the `README.md` file, a few functions have been written in `C++` and have then been exposed to `Python` using `pybind11`.\n",
+    "The reason for this choice is the following one: either compiling them with `numba` offered no major performance advantage, or it was simply not possible as a result of the fact that some types that we used (e.g. `SciPy`'s sparse matrices) are not compatible with `numba`.\n",
+    "\n",
+    "The cell below provides a comparison of the execution time of the original `Python` implementation and the `C++` one."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pyclassify.cxx_utils import secular_solver_cxx\n",
+    "from pyclassify.zero_finder import secular_solver_python\n",
+    "\n",
+    "seed = 2206\n",
+    "np.seed(seed)\n",
+    "\n",
+    "n = 1000\n",
+    "d = np.arange(n)\n",
+    "rho = 2.\n",
+    "D = np.diag(d)\n",
+    "v = np.random.rand(n)\n",
+    "\n",
+    "indices = range(len(d))\n",
+    "rk_1_update = rho * np.outer(v, v)\n",
+    "L = D + rk_1_update\n",
+    "\n",
+    "begin_cxx = time()\n",
+    "computed_eigs_cxx, _, __ = secular_solver_cxx(rho, d, v, indices)\n",
+    "end_cxx = time()\n",
+    "duration_cxx = end_cxx - begin_cxx\n",
+    "\n",
+    "begin_python = time()\n",
+    "computed_eigs_python = secular_solver_python(rho, d, v) \n",
+    "end_python = time()\n",
+    "duration_python = end_python - begin_python\n",
+    "\n",
+    "print(f'Speedup: {duration_python/duration_cxx}')\n",
+    "\n",
+    "exact_eigs, _ = np.linalg.eig(L)\n",
+    "exact_eigs = np.sort(exact_eigs)\n",
+    "\n",
+    "for i in range(len(exact_eigs)):\n",
+    "    # Assert that the C++ eigenvalues are correct\n",
+    "    assert (\n",
+    "        np.abs(computed_eigs_cxx[i] - exact_eigs[i]) < 1e-8\n",
+    "    ), \"Error. The eigenvalues were not computed correctly.\"\n",
+    "    # Also assert that they are close to the Python ones\n",
+    "    assert (\n",
+    "            np.abs(computed_eigs_cxx[i] - computed_eigs_python[i]) < 1e-8\n",
+    "        ), \"Error. The eigenvalues were not computed correctly.\""
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -781,11 +839,6 @@
     "\n",
     "[7] [Arbenz, Peter. Lecture Notes on Solving Large Scale Eigenvalue Problems - Chapter 5-6. Computer Science Department, ETH Zürich, Spring semester 2016.](https://sissa-my.sharepoint.com/my?FolderCTID=0x012000B3941AA86D63224F9CC9A5B1FEDCCF3B&id=%2Fpersonal%2Fglicausi%5Fsissa%5Fit%2FDocuments%2FPhD%2FDevelopment%20Tools%20for%20Scientific%20Computing%20%2D%20Project%20material%20and%20references%2Fchapters5%2D6%2Epdf&parent=%2Fpersonal%2Fglicausi%5Fsissa%5Fit%2FDocuments%2FPhD%2FDevelopment%20Tools%20for%20Scientific%20Computing%20%2D%20Project%20material%20and%20references)"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
   }
  ],
  "metadata": {
diff --git a/docs/plots/memory_profiling.png b/docs/plots/memory_profiling.png
diff --git a/scripts/mpi_running.py b/scripts/mpi_running.py
@@ -10,8 +10,8 @@
 from pyclassify.eigenvalues import Lanczos_PRO
 
 
-seed = 10
-np.random.seed(seed)
+seed = 100
+# np.random.seed(seed)
 
 
 def parallel_eig(diag, off_diag, nprocs):
@@ -85,3 +85,7 @@ def compute_eigvals(A, n_procs):
 
 if max_error < 1e-8:
     print("Pretty small, huh?")
+else:
+    print(
+        "Please notice that no seed has been set, so it might be that this was an unlucky case. Please try again"
+    )
diff --git a/scripts/profiling_memory_and_time.py b/scripts/profiling_memory_and_time.py
@@ -20,7 +20,7 @@
 from mpi4py import MPI
 
 
-seed = 10
+seed = 1000
 np.random.seed(seed)
 
 
@@ -85,7 +85,7 @@
     gc.collect()
     mem_after_lanczos = proc.memory_info().rss / 1024 / 1024  # MB
     delta_mem_lanczos = mem_after_lanczos - mem_before_lanczos
-    delta_t_lanzos = end_lanczos - begin_lanczos
+    delta_t_lanczos = end_lanczos - begin_lanczos
 
     print("Done. Now computing eigenvalues...")
 else:
@@ -116,13 +116,15 @@
 
 # Collect the information across all ranks
 if rank == 0:
-    total_mem_all = delta_mem_lanczos
-    total_time_all = delta_t_lanzos + total_time_children
+    total_mem_all = delta_mem_lanczos + total_mem_children
+    total_time_all = delta_t_lanczos + total_time_children
     print("Eigenvalues computed.")
     process = psutil.Process()
 
     print(f"[D&I] Total memory across all processes: {total_mem_all:.4f} MB")
-    print(f"[D&I] Total time: across all processes: {total_time_all:.4f} s")
+    print(
+        f"[D&I] Total time (rank 0, which also performs Lanczos): {total_time_all:.4f} s"
+    )
     # We also profile numpy and scipy memory consumption
     mem_np, time_np = profile_numpy_eigvals(A_np)
     print(f"[NumPy] eig memory usage: {mem_np:.4f} MB")
@@ -164,7 +166,7 @@
                 "mem_total_mb": round(total_mem_all, 2),
                 "mem_numpy_mb": round(mem_np, 2),
                 "mem_scipy_mb": round(mem_sp, 2),
-                "time_lanczos": round(delta_t_lanzos, 2),
+                "time_lanczos": round(delta_t_lanczos, 2),
                 "time_tridiag": round(total_time_children, 2),
                 "time_total": round(total_time_all, 2),
                 "time_numpy": round(time_np, 2),