|
16 | 16 | import os
|
17 | 17 | import csv
|
18 | 18 | import sys
|
| 19 | +from time import time |
19 | 20 | from mpi4py import MPI
|
20 | 21 |
|
21 | 22 |
|
|
50 | 51 | plot = kwargs["plot"]
|
51 | 52 |
|
52 | 53 | # Now we build the matrix on rank 0
|
53 |
| -# It is a scipy sparse matrix with the structure of a 2D Poisson problem matrix obtained using finite differences |
54 | 54 | if rank == 0:
|
55 | 55 | eig = np.arange(1, dim + 1)
|
56 | 56 | A = np.diag(eig)
|
|
69 | 69 | # We actually call it twice: the first time to ensure that the function is JIT-compiled by Numba, the second one for memory profiling
|
70 | 70 | if rank == 0:
|
71 | 71 | print("Precompiling Lanczos...")
|
| 72 | + |
72 | 73 | Q, diag, off_diag = Lanczos_PRO(A_np, np.ones_like(np.diag(A_np)) * 1.0)
|
| 74 | + |
73 | 75 | print("Done. Now reducing using Lanczos...")
|
| 76 | + |
74 | 77 | gc.collect()
|
75 | 78 | proc = psutil.Process()
|
76 | 79 | mem_before_lanczos = proc.memory_info().rss / 1024 / 1024 # MB
|
| 80 | + begin_lanczos = time() |
77 | 81 |
|
78 | 82 | Q, diag, off_diag = Lanczos_PRO(A_np, np.ones_like(np.diag(A_np)) * 1.0)
|
79 | 83 |
|
| 84 | + end_lanczos = time() |
80 | 85 | gc.collect()
|
81 | 86 | mem_after_lanczos = proc.memory_info().rss / 1024 / 1024 # MB
|
82 | 87 | delta_mem_lanczos = mem_after_lanczos - mem_before_lanczos
|
| 88 | + delta_t_lanzos = end_lanczos - begin_lanczos |
| 89 | + |
83 | 90 | print("Done. Now computing eigenvalues...")
|
84 | 91 | else:
|
85 | 92 | diag = off_diag = None
|
|
91 | 98 | gc.collect()
|
92 | 99 | proc = psutil.Process()
|
93 | 100 | mem_before = proc.memory_info().rss / 1024 / 1024 # MB
|
| 101 | +time_before_parallel = time() |
94 | 102 |
|
95 | 103 | eigvals, eigvecs = parallel_tridiag_eigen(
|
96 | 104 | diag, off_diag, comm=comm, min_size=1, tol_factor=1e-10
|
97 | 105 | )
|
98 | 106 |
|
| 107 | +time_after_parallel = time() |
99 | 108 | gc.collect()
|
100 | 109 | mem_after = proc.memory_info().rss / 1024 / 1024
|
101 | 110 | delta_mem = mem_after - mem_before
|
| 111 | +delta_t_parallel = time_after_parallel - time_before_parallel |
102 | 112 |
|
103 | 113 | total_mem_children = comm.reduce(delta_mem, op=MPI.SUM, root=0)
|
| 114 | +total_time_children = comm.reduce(delta_t_parallel, op=MPI.SUM, root=0) |
104 | 115 |
|
105 | 116 | # Collect the information across all ranks
|
106 | 117 | if rank == 0:
|
107 | 118 | total_mem_all = delta_mem_lanczos
|
| 119 | + total_time_all = delta_t_lanzos + total_time_children |
108 | 120 | print("Eigenvalues computed.")
|
109 | 121 | process = psutil.Process()
|
110 | 122 |
|
111 |
| - print(f"Total memory across all processes: {total_mem_all:.2f} MB") |
112 |
| - |
| 123 | + print(f"[D&I] Total memory across all processes: {total_mem_all:.4f} MB") |
| 124 | + print(f"[D&I] Total time: across all processes: {total_time_all:.4f} s") |
113 | 125 | # We also profile numpy and scipy memory consumption
|
114 |
| - mem_np = profile_numpy_eigvals(A_np) |
115 |
| - print(f"NumPy eig memory usage: {mem_np:.2f} MB") |
| 126 | + mem_np, time_np = profile_numpy_eigvals(A_np) |
| 127 | + print(f"[NumPy] eig memory usage: {mem_np:.4f} MB") |
| 128 | + print(f"[NumPy] eig total time: {time_np:.4f} s") |
116 | 129 |
|
117 |
| - mem_sp = profile_scipy_eigvals(A_np) |
118 |
| - print(f"SciPy eig memory usage: {mem_sp:.2f} MB") |
| 130 | + mem_sp, time_sp = profile_scipy_eigvals(A_np) |
| 131 | + print(f"[SciPy] eig memory usage: {mem_sp:.4f} MB") |
| 132 | + print(f"[SciPy] eig total time: {time_sp:.4f} s") |
119 | 133 |
|
120 | 134 | # Save to the logs folder
|
121 | 135 | os.makedirs("logs", exist_ok=True)
|
122 |
| - log_file = "logs/memory_profile.csv" |
| 136 | + log_file = "logs/profile.csv" |
123 | 137 | fieldnames = [
|
124 | 138 | "matrix_size",
|
125 | 139 | "n_processes",
|
126 |
| - "mem_lanzos_mb", |
| 140 | + "mem_lanczos_mb", |
127 | 141 | "mem_tridiag_mb",
|
128 | 142 | "mem_total_mb",
|
129 | 143 | "mem_numpy_mb",
|
130 | 144 | "mem_scipy_mb",
|
| 145 | + "time_lanczos", |
| 146 | + "time_tridiag", |
| 147 | + "time_total", |
| 148 | + "time_numpy", |
| 149 | + "time_scipy", |
131 | 150 | ]
|
132 | 151 |
|
133 | 152 | write_header = not os.path.exists(log_file)
|
|
139 | 158 | {
|
140 | 159 | "matrix_size": dim,
|
141 | 160 | "n_processes": size,
|
142 |
| - "mem_lanzos_mb": round(delta_mem_lanczos, 2), |
| 161 | + "mem_lanczos_mb": round(delta_mem_lanczos, 2), |
143 | 162 | "mem_tridiag_mb": round(total_mem_children, 2),
|
144 | 163 | "mem_total_mb": round(total_mem_all, 2),
|
145 | 164 | "mem_numpy_mb": round(mem_np, 2),
|
146 | 165 | "mem_scipy_mb": round(mem_sp, 2),
|
| 166 | + "time_lanczos": round(delta_t_lanzos, 2), |
| 167 | + "time_tridiag": round(total_time_children, 2), |
| 168 | + "time_total": round(total_time_all, 2), |
| 169 | + "time_numpy": round(time_np, 2), |
| 170 | + "time_scipy": round(time_sp, 2), |
147 | 171 | }
|
148 | 172 | )
|
149 | 173 |
|
|
153 | 177 | import matplotlib.pyplot as plt
|
154 | 178 | import pandas as pd
|
155 | 179 |
|
156 |
| - df = pd.read_csv("logs/memory_profile.csv") |
| 180 | + df = pd.read_csv("logs/profile.csv") |
157 | 181 | nproc_values = sorted(df["n_processes"].unique())
|
158 | 182 |
|
| 183 | + # First we plot the memoy usage, then the execution time |
159 | 184 | plt.figure(figsize=(10, 6))
|
160 | 185 |
|
161 | 186 | numpy_avg = df.groupby("matrix_size")["mem_numpy_mb"].mean()
|
|
206 | 231 |
|
207 | 232 | plt.savefig("logs/memory_profiling.png", bbox_inches="tight")
|
208 | 233 | plt.show()
|
| 234 | + |
| 235 | + plt.figure(figsize=(10, 6)) |
| 236 | + |
| 237 | + numpy_avg = df.groupby("matrix_size")["time_numpy"].mean() |
| 238 | + plt.plot( |
| 239 | + numpy_avg.index, |
| 240 | + numpy_avg.values, |
| 241 | + color="green", |
| 242 | + marker="x", |
| 243 | + linestyle="--", |
| 244 | + label="NumPy", |
| 245 | + ) |
| 246 | + |
| 247 | + scipy_avg = df.groupby("matrix_size")["time_scipy"].mean() |
| 248 | + plt.plot( |
| 249 | + scipy_avg.index, |
| 250 | + scipy_avg.values, |
| 251 | + color="red", |
| 252 | + marker="^", |
| 253 | + linestyle=":", |
| 254 | + label="SciPy", |
| 255 | + ) |
| 256 | + |
| 257 | + for nproc in nproc_values: |
| 258 | + subset = df[df["n_processes"] == nproc].sort_values("matrix_size") |
| 259 | + label = f"Divide et impera ({nproc} proc{'s' if nproc > 1 else ''})" |
| 260 | + plt.plot( |
| 261 | + subset["matrix_size"], |
| 262 | + subset["time_total"], |
| 263 | + marker="o", |
| 264 | + linestyle="-", |
| 265 | + label=label, |
| 266 | + ) |
| 267 | + |
| 268 | + plt.xlabel("Matrix size") |
| 269 | + plt.ylabel("Total time (s)") |
| 270 | + plt.xscale("log") |
| 271 | + plt.title("Execution time vs. Matrix size") |
| 272 | + plt.grid(True) |
| 273 | + plt.tight_layout() |
| 274 | + |
| 275 | + plt.legend( |
| 276 | + bbox_to_anchor=(1.05, 1), |
| 277 | + loc="upper left", |
| 278 | + borderaxespad=0.0, |
| 279 | + title="Method", |
| 280 | + ) |
| 281 | + plt.subplots_adjust(right=0.75) |
| 282 | + |
| 283 | + plt.savefig("logs/time_profiling.png", bbox_inches="tight") |
| 284 | + plt.show() |
0 commit comments