Skip to content

Commit 49e5b0b

Browse files
committed
Merge branch 'final_project' of github.com:lorenzotomada/devtools_scicomp_project_2025 into final_project
2 parents 12be444 + 8ddce11 commit 49e5b0b

File tree

4 files changed

+99
-17
lines changed

4 files changed

+99
-17
lines changed

scripts/profiling_memory.py renamed to scripts/profiling_memory_and_time.py

Lines changed: 87 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import os
1717
import csv
1818
import sys
19+
from time import time
1920
from mpi4py import MPI
2021

2122

@@ -50,7 +51,6 @@
5051
plot = kwargs["plot"]
5152

5253
# Now we build the matrix on rank 0
53-
# It is a scipy sparse matrix with the structure of a 2D Poisson problem matrix obtained using finite differences
5454
if rank == 0:
5555
eig = np.arange(1, dim + 1)
5656
A = np.diag(eig)
@@ -69,17 +69,24 @@
6969
# We actually call it twice: the first time to ensure that the function is JIT-compiled by Numba, the second one for memory profiling
7070
if rank == 0:
7171
print("Precompiling Lanczos...")
72+
7273
Q, diag, off_diag = Lanczos_PRO(A_np, np.ones_like(np.diag(A_np)) * 1.0)
74+
7375
print("Done. Now reducing using Lanczos...")
76+
7477
gc.collect()
7578
proc = psutil.Process()
7679
mem_before_lanczos = proc.memory_info().rss / 1024 / 1024 # MB
80+
begin_lanczos = time()
7781

7882
Q, diag, off_diag = Lanczos_PRO(A_np, np.ones_like(np.diag(A_np)) * 1.0)
7983

84+
end_lanczos = time()
8085
gc.collect()
8186
mem_after_lanczos = proc.memory_info().rss / 1024 / 1024 # MB
8287
delta_mem_lanczos = mem_after_lanczos - mem_before_lanczos
88+
delta_t_lanzos = end_lanczos - begin_lanczos
89+
8390
print("Done. Now computing eigenvalues...")
8491
else:
8592
diag = off_diag = None
@@ -91,43 +98,55 @@
9198
gc.collect()
9299
proc = psutil.Process()
93100
mem_before = proc.memory_info().rss / 1024 / 1024 # MB
101+
time_before_parallel = time()
94102

95103
eigvals, eigvecs = parallel_tridiag_eigen(
96104
diag, off_diag, comm=comm, min_size=1, tol_factor=1e-10
97105
)
98106

107+
time_after_parallel = time()
99108
gc.collect()
100109
mem_after = proc.memory_info().rss / 1024 / 1024
101110
delta_mem = mem_after - mem_before
111+
delta_t_parallel = time_after_parallel - time_before_parallel
102112

103113
total_mem_children = comm.reduce(delta_mem, op=MPI.SUM, root=0)
114+
total_time_children = comm.reduce(delta_t_parallel, op=MPI.SUM, root=0)
104115

105116
# Collect the information across all ranks
106117
if rank == 0:
107118
total_mem_all = delta_mem_lanczos
119+
total_time_all = delta_t_lanzos + total_time_children
108120
print("Eigenvalues computed.")
109121
process = psutil.Process()
110122

111-
print(f"Total memory across all processes: {total_mem_all:.2f} MB")
112-
123+
print(f"[D&I] Total memory across all processes: {total_mem_all:.4f} MB")
124+
print(f"[D&I] Total time: across all processes: {total_time_all:.4f} s")
113125
# We also profile numpy and scipy memory consumption
114-
mem_np = profile_numpy_eigvals(A_np)
115-
print(f"NumPy eig memory usage: {mem_np:.2f} MB")
126+
mem_np, time_np = profile_numpy_eigvals(A_np)
127+
print(f"[NumPy] eig memory usage: {mem_np:.4f} MB")
128+
print(f"[NumPy] eig total time: {time_np:.4f} s")
116129

117-
mem_sp = profile_scipy_eigvals(A_np)
118-
print(f"SciPy eig memory usage: {mem_sp:.2f} MB")
130+
mem_sp, time_sp = profile_scipy_eigvals(A_np)
131+
print(f"[SciPy] eig memory usage: {mem_sp:.4f} MB")
132+
print(f"[SciPy] eig total time: {time_sp:.4f} s")
119133

120134
# Save to the logs folder
121135
os.makedirs("logs", exist_ok=True)
122-
log_file = "logs/memory_profile.csv"
136+
log_file = "logs/profile.csv"
123137
fieldnames = [
124138
"matrix_size",
125139
"n_processes",
126-
"mem_lanzos_mb",
140+
"mem_lanczos_mb",
127141
"mem_tridiag_mb",
128142
"mem_total_mb",
129143
"mem_numpy_mb",
130144
"mem_scipy_mb",
145+
"time_lanczos",
146+
"time_tridiag",
147+
"time_total",
148+
"time_numpy",
149+
"time_scipy",
131150
]
132151

133152
write_header = not os.path.exists(log_file)
@@ -139,11 +158,16 @@
139158
{
140159
"matrix_size": dim,
141160
"n_processes": size,
142-
"mem_lanzos_mb": round(delta_mem_lanczos, 2),
161+
"mem_lanczos_mb": round(delta_mem_lanczos, 2),
143162
"mem_tridiag_mb": round(total_mem_children, 2),
144163
"mem_total_mb": round(total_mem_all, 2),
145164
"mem_numpy_mb": round(mem_np, 2),
146165
"mem_scipy_mb": round(mem_sp, 2),
166+
"time_lanczos": round(delta_t_lanzos, 2),
167+
"time_tridiag": round(total_time_children, 2),
168+
"time_total": round(total_time_all, 2),
169+
"time_numpy": round(time_np, 2),
170+
"time_scipy": round(time_sp, 2),
147171
}
148172
)
149173

@@ -153,9 +177,10 @@
153177
import matplotlib.pyplot as plt
154178
import pandas as pd
155179

156-
df = pd.read_csv("logs/memory_profile.csv")
180+
df = pd.read_csv("logs/profile.csv")
157181
nproc_values = sorted(df["n_processes"].unique())
158182

183+
# First we plot the memoy usage, then the execution time
159184
plt.figure(figsize=(10, 6))
160185

161186
numpy_avg = df.groupby("matrix_size")["mem_numpy_mb"].mean()
@@ -206,3 +231,54 @@
206231

207232
plt.savefig("logs/memory_profiling.png", bbox_inches="tight")
208233
plt.show()
234+
235+
plt.figure(figsize=(10, 6))
236+
237+
numpy_avg = df.groupby("matrix_size")["time_numpy"].mean()
238+
plt.plot(
239+
numpy_avg.index,
240+
numpy_avg.values,
241+
color="green",
242+
marker="x",
243+
linestyle="--",
244+
label="NumPy",
245+
)
246+
247+
scipy_avg = df.groupby("matrix_size")["time_scipy"].mean()
248+
plt.plot(
249+
scipy_avg.index,
250+
scipy_avg.values,
251+
color="red",
252+
marker="^",
253+
linestyle=":",
254+
label="SciPy",
255+
)
256+
257+
for nproc in nproc_values:
258+
subset = df[df["n_processes"] == nproc].sort_values("matrix_size")
259+
label = f"Divide et impera ({nproc} proc{'s' if nproc > 1 else ''})"
260+
plt.plot(
261+
subset["matrix_size"],
262+
subset["time_total"],
263+
marker="o",
264+
linestyle="-",
265+
label=label,
266+
)
267+
268+
plt.xlabel("Matrix size")
269+
plt.ylabel("Total time (s)")
270+
plt.xscale("log")
271+
plt.title("Execution time vs. Matrix size")
272+
plt.grid(True)
273+
plt.tight_layout()
274+
275+
plt.legend(
276+
bbox_to_anchor=(1.05, 1),
277+
loc="upper left",
278+
borderaxespad=0.0,
279+
title="Method",
280+
)
281+
plt.subplots_adjust(right=0.75)
282+
283+
plt.savefig("logs/time_profiling.png", bbox_inches="tight")
284+
plt.show()

shell/submit.sbatch

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ for dim in "${matrix_sizes[@]}"; do
6161
sed -i "s/^plot: .*/plot: true/" $CONFIG_FILE
6262
fi
6363

64-
srun --mpi=openmpi -n ${n_p} python scripts/profiling_memory.py
64+
srun --mpi=openmpi -n ${n_p} python scripts/profiling_memory_and_time.py
6565
done
6666
done
6767

shell/submit.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# Ranges over which we iterate
44
n_processes=(1 2 4 8)
5-
matrix_sizes=(10 50 100 500 1000)
5+
matrix_sizes=(10 50 100 500 1000 1500)
66

77
last_dim="${matrix_sizes[-1]}"
88
last_nproc="${n_processes[-1]}"
@@ -30,7 +30,7 @@ for dim in "${matrix_sizes[@]}"; do
3030
sed -i "s/^plot: .*/plot: true/" $CONFIG_FILE
3131
fi
3232

33-
mpirun -np ${n_p} python scripts/profiling_memory.py
33+
mpirun -np ${n_p} python scripts/profiling_memory_and_time.py
3434
done
3535
done
3636

src/pyclassify/utils.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import psutil
66
import gc
77
import yaml
8+
from time import time
89

910
# import cProfile
1011
# from memory_profiler import memory_usage
@@ -162,14 +163,17 @@ def profile_numpy_eigvals(A):
162163
gc.collect()
163164
process = psutil.Process()
164165
mem_before = process.memory_info().rss / 1024 / 1024
166+
time_begin = time()
165167

166168
# NumPy symmetric eig solver
167169
eigvals, eigvecs = np.linalg.eigh(A)
168170

171+
time_end = time()
172+
elapsed_time = time_end - time_begin
169173
gc.collect()
170174
mem_after = process.memory_info().rss / 1024 / 1024
171175
delta_mem = mem_after - mem_before
172-
return delta_mem
176+
return delta_mem, elapsed_time
173177

174178

175179
def profile_scipy_eigvals(A):
@@ -191,14 +195,16 @@ def profile_scipy_eigvals(A):
191195
gc.collect()
192196
process = psutil.Process()
193197
mem_before = process.memory_info().rss / 1024 / 1024
198+
time_begin = time()
194199

195-
# SciPy symmetric eig solver
196200
eigvals, eigvecs = scipy.linalg.eigh(A)
197201

202+
time_end = time()
203+
elapsed_time = time_end - time_begin
198204
gc.collect()
199205
mem_after = process.memory_info().rss / 1024 / 1024
200206
delta_mem = mem_after - mem_before
201-
return delta_mem
207+
return delta_mem, elapsed_time
202208

203209

204210
######### OUTDATED FUNCTIONS: working but no longer used #########

0 commit comments

Comments
 (0)