@@ -29,139 +29,6 @@ def check_column_directions(A, B):
29
29
B [:, i ] = - B [:, i ]
30
30
31
31
32
- @profile
33
- def deflate_eigenpairs (D , v , beta , tol_factor = 1e-12 ):
34
- """
35
- Applying the deflation step to the divide and conquer algorithm to reduce the size of
36
- the system to be solved and find the trivial eigenvalues and eigenvectors.
37
- Notice that we cannot use jit since we work with scipy sparse matrices.
38
- Input:
39
- -D: element on the diagonal.
40
- -v: rank one update vector
41
- -beta: scalar value that multiplies the rank one update
42
-
43
- Output:
44
- -deflated_eigvals: Vector containing the trivial eigenvalue
45
- -deflated_eigvecs: Matrix containing the trivial eigenvectors
46
- -D_keep: element on the diagonal after the deflation.
47
- -v_keep: rank one update vector after the deflation
48
- -P_3 @ P_2 @ P: Permutation matrix
49
- """
50
-
51
- norm_T = np .linalg .norm (
52
- np .diag (D ) + beta * np .outer (v , v )
53
- ) # Normalize tolerance to matrix size
54
- # abs_v=np.abs(v)
55
- # norm_T=0.5*(np.max(np.abs(D)) + beta * np.max(abs_v)*np.sum(abs_v))
56
- tol = tol_factor * norm_T
57
- keep_indices = []
58
- deflated_eigvals = np .zeros_like (D )
59
- deflated_eigvecs = []
60
- deflated_indices = []
61
- reduced_dimension = len (D )
62
-
63
- # Zero component deflation
64
- e_vec = np .zeros (len (D ))
65
- j = 0
66
- for i in range (len (D )):
67
- if abs (v [i ]) < tol :
68
- deflated_eigvals [j ] = D [i ]
69
- e_vec [i ] = 1.0 # Standard basis vector
70
- deflated_eigvecs .append (e_vec .copy ())
71
- deflated_indices .append (i )
72
- e_vec [i ] = 0.0
73
- j += 1
74
- else :
75
- keep_indices .append (i )
76
-
77
- new_order = keep_indices + deflated_indices
78
- reduced_dimension = len (keep_indices )
79
-
80
- # Create permutation matrix P (use sparse)
81
- n = len (D )
82
- P = sp .lil_array ((n , n )) # Use sparse format
83
- P_2 = sp .eye (n , format = "csr" ) # Efficient multiplication format
84
- P_3 = sp .lil_array ((n , n )) # Permutation matrix
85
-
86
- for new_pos , old_pos in enumerate (new_order ):
87
- P [new_pos , old_pos ] = 1 # Assign 1s to move elements accordingly
88
-
89
- P = P .tocsr () # Convert to CSR format for fast multiplication
90
-
91
- D_keep = D [keep_indices ]
92
- v_keep = v [keep_indices ]
93
-
94
- to_check = set (
95
- np .arange (reduced_dimension , dtype = np .int64 )
96
- ) # Use set for fast lookup
97
- rotation_matrix = []
98
- vec_idx_list = [] # Use list instead of np.append()
99
-
100
- to_check_copy = list (to_check ) # Convert to list for iteration
101
-
102
- for i in to_check_copy [:- 1 ]:
103
- if i not in to_check :
104
- continue # Skip if the index was removed
105
-
106
- # Find duplicates in a vectorized way
107
- idx_duplicate_vec = np .where (np .abs (D_keep [i + 1 :] - D_keep [i ]) < tol )[0 ]
108
- if len (idx_duplicate_vec ):
109
- idx_duplicate_vec += i + 1 # Adjust indices
110
-
111
- for idx_duplicate in idx_duplicate_vec :
112
- to_check .discard (idx_duplicate ) # O(1) removal instead of np.delete()
113
-
114
- # Compute Givens rotation parameters
115
- r = np .hypot (
116
- v_keep [i ], v_keep [idx_duplicate ]
117
- ) # More stable than sqrt(x^2 + y^2)
118
- c = v_keep [i ] / r
119
- s = - v_keep [idx_duplicate ] / r
120
-
121
- v_keep [i ] = r
122
- v_keep [idx_duplicate ] = 0
123
-
124
- # Store transformation
125
- rotation_matrix .append ((i , idx_duplicate , c , s ))
126
- deflated_eigvals [j ] = D_keep [i ]
127
- j += 1
128
-
129
- # Efficient eigenvector computation
130
- eig_vec_local = np .zeros (n )
131
- eig_vec_local [idx_duplicate ] = c
132
- eig_vec_local [i ] = s
133
- deflated_eigvecs .append (P .T @ eig_vec_local )
134
-
135
- vec_idx_list .append (
136
- idx_duplicate
137
- ) # Use list instead of slow np.append()
138
-
139
- new_order = np .concatenate (
140
- (list (to_check ), vec_idx_list )
141
- ) # Efficient concatenation
142
- new_order = new_order .astype (int )
143
- deflated_eigvals = deflated_eigvals [:j ]
144
-
145
- # Apply Givens rotations
146
- for i , j , c , s in rotation_matrix :
147
- G = sp .eye (n , n , format = "csr" ) # Sparse identity matrix
148
- G [i , i ] = G [j , j ] = c
149
- G [i , j ] = - s
150
- G [j , i ] = s
151
- P_2 = P_2 @ G # Sparse multiplication
152
-
153
- for new_pos , old_pos in enumerate (new_order ):
154
- P_3 [new_pos , old_pos ] = 1 # Assign 1s
155
-
156
- P_3 = P_3 .tocsr ()
157
-
158
- to_check = [i for i in to_check ]
159
- reduced_dimension = len (to_check )
160
- D_keep = D_keep [to_check ]
161
- v_keep = v_keep [to_check ]
162
-
163
- return deflated_eigvals , np .array (deflated_eigvecs ), D_keep , v_keep , P_3 @ P_2 @ P
164
-
165
32
166
33
def find_interval_extreme (total_dimension , n_processor ):
167
34
"""
@@ -293,12 +160,6 @@ def parallel_tridiag_eigen(
293
160
eigvals_right = subcomm .bcast (eigvals_right , root = 0 )
294
161
eigvecs_right = subcomm .bcast (eigvecs_right , root = 0 )
295
162
296
- # if rank == 0:
297
- # eigvals_right = comm.recv(source=left_size, tag=77)
298
- # eigvecs_right = comm.recv(source=left_size, tag=78)
299
- # elif rank == left_size:
300
- # comm.send(eigvals_right, dest=0, tag=77)
301
- # comm.send(eigvecs_right, dest=0, tag=78)
302
163
303
164
if rank == 0 :
304
165
@@ -312,10 +173,6 @@ def parallel_tridiag_eigen(
312
173
D , v_vec , beta , tol_factor
313
174
)
314
175
315
- # Pdeflated_eigvals, Pdeflated_eigvecs, PD_keep, Pv_keep, PP = deflate_eigenpairs(
316
- # D, v_vec, beta, tol_factor
317
- # )
318
-
319
176
D_keep = np .array (D_keep )
320
177
321
178
reduced_dim = len (D_keep )
@@ -325,9 +182,6 @@ def parallel_tridiag_eigen(
325
182
idx_inv = np .arange (0 , reduced_dim )
326
183
idx_inv = idx_inv [idx ]
327
184
328
- # T= np.diag(D_keep) + beta * np.outer(v_keep, v_keep)
329
- # lam , _ = np.linalg.eigh(T)
330
-
331
185
lam , changing_position , delta = secular_solver_cxx (
332
186
beta , D_keep [idx ], v_keep [idx ], np .arange (reduced_dim )
333
187
)
@@ -492,14 +346,8 @@ def parallel_tridiag_eigen(
492
346
local_count = eig_val .size # or however many elements you’ll send
493
347
494
348
# 2) Everyone exchanges counts via allgather:
495
- # this returns a Python list of length `size` on every rank
496
349
recvcounts = comm .allgather (local_count )
497
350
498
- # # 1) Gather all the counts to rank 0
499
- # counts = comm.gather(local_count, root=0)
500
-
501
- # # 2) Broadcast that list from rank 0 back to everyone
502
- # recvcounts = comm.bcast(counts, root=0)
503
351
504
352
final_eig_val = np .empty (D_size , dtype = eig_val .dtype )
505
353
@@ -527,15 +375,12 @@ def parallel_tridiag_eigen(
527
375
)
528
376
529
377
# 5) Reshape on every rank (or just on rank 0 if you prefer)
530
- # total_pairs == sum(recvcounts)
531
378
final_eig_vecs = flat_all .reshape (D_size , D_size )
532
379
final_eig_vecs = final_eig_vecs .T
533
380
index_sort = np .argsort (final_eig_val )
534
381
final_eig_vecs = final_eig_vecs [:, index_sort ]
535
382
final_eig_val = final_eig_val [index_sort ]
536
- # if rank==0:
537
- # print(final_eig_val)
538
- # print(final_eig_vecs)
383
+
539
384
return final_eig_val , final_eig_vecs
540
385
541
386
@@ -556,7 +401,143 @@ def parallel_eigen(
556
401
)
557
402
return eigvals , eigvecs
558
403
559
-
404
+
405
+
406
+ # ORIGINAL DEFLATION IMPLEMENTATION
407
+ # @profile
408
+ # def deflate_eigenpairs(D, v, beta, tol_factor=1e-12):
409
+ # """
410
+ # Applying the deflation step to the divide and conquer algorithm to reduce the size of
411
+ # the system to be solved and find the trivial eigenvalues and eigenvectors.
412
+ # Notice that we cannot use jit since we work with scipy sparse matrices.
413
+ # Input:
414
+ # -D: element on the diagonal.
415
+ # -v: rank one update vector
416
+ # -beta: scalar value that multiplies the rank one update
417
+
418
+ # Output:
419
+ # -deflated_eigvals: Vector containing the trivial eigenvalue
420
+ # -deflated_eigvecs: Matrix containing the trivial eigenvectors
421
+ # -D_keep: element on the diagonal after the deflation.
422
+ # -v_keep: rank one update vector after the deflation
423
+ # -P_3 @ P_2 @ P: Permutation matrix
424
+ # """
425
+
426
+ # norm_T = np.linalg.norm(
427
+ # np.diag(D) + beta * np.outer(v, v)
428
+ # ) # Normalize tolerance to matrix size
429
+ # # abs_v=np.abs(v)
430
+ # # norm_T=0.5*(np.max(np.abs(D)) + beta * np.max(abs_v)*np.sum(abs_v))
431
+ # tol = tol_factor * norm_T
432
+ # keep_indices = []
433
+ # deflated_eigvals = np.zeros_like(D)
434
+ # deflated_eigvecs = []
435
+ # deflated_indices = []
436
+ # reduced_dimension = len(D)
437
+
438
+ # # Zero component deflation
439
+ # e_vec = np.zeros(len(D))
440
+ # j = 0
441
+ # for i in range(len(D)):
442
+ # if abs(v[i]) < tol:
443
+ # deflated_eigvals[j] = D[i]
444
+ # e_vec[i] = 1.0 # Standard basis vector
445
+ # deflated_eigvecs.append(e_vec.copy())
446
+ # deflated_indices.append(i)
447
+ # e_vec[i] = 0.0
448
+ # j += 1
449
+ # else:
450
+ # keep_indices.append(i)
451
+
452
+ # new_order = keep_indices + deflated_indices
453
+ # reduced_dimension = len(keep_indices)
454
+
455
+ # # Create permutation matrix P (use sparse)
456
+ # n = len(D)
457
+ # P = sp.lil_array((n, n)) # Use sparse format
458
+ # P_2 = sp.eye(n, format="csr") # Efficient multiplication format
459
+ # P_3 = sp.lil_array((n, n)) # Permutation matrix
460
+
461
+ # for new_pos, old_pos in enumerate(new_order):
462
+ # P[new_pos, old_pos] = 1 # Assign 1s to move elements accordingly
463
+
464
+ # P = P.tocsr() # Convert to CSR format for fast multiplication
465
+
466
+ # D_keep = D[keep_indices]
467
+ # v_keep = v[keep_indices]
468
+
469
+ # to_check = set(
470
+ # np.arange(reduced_dimension, dtype=np.int64)
471
+ # ) # Use set for fast lookup
472
+ # rotation_matrix = []
473
+ # vec_idx_list = [] # Use list instead of np.append()
474
+
475
+ # to_check_copy = list(to_check) # Convert to list for iteration
476
+
477
+ # for i in to_check_copy[:-1]:
478
+ # if i not in to_check:
479
+ # continue # Skip if the index was removed
480
+
481
+ # # Find duplicates in a vectorized way
482
+ # idx_duplicate_vec = np.where(np.abs(D_keep[i + 1 :] - D_keep[i]) < tol)[0]
483
+ # if len(idx_duplicate_vec):
484
+ # idx_duplicate_vec += i + 1 # Adjust indices
485
+
486
+ # for idx_duplicate in idx_duplicate_vec:
487
+ # to_check.discard(idx_duplicate) # O(1) removal instead of np.delete()
488
+
489
+ # # Compute Givens rotation parameters
490
+ # r = np.hypot(
491
+ # v_keep[i], v_keep[idx_duplicate]
492
+ # ) # More stable than sqrt(x^2 + y^2)
493
+ # c = v_keep[i] / r
494
+ # s = -v_keep[idx_duplicate] / r
495
+
496
+ # v_keep[i] = r
497
+ # v_keep[idx_duplicate] = 0
498
+
499
+ # # Store transformation
500
+ # rotation_matrix.append((i, idx_duplicate, c, s))
501
+ # deflated_eigvals[j] = D_keep[i]
502
+ # j += 1
503
+
504
+ # # Efficient eigenvector computation
505
+ # eig_vec_local = np.zeros(n)
506
+ # eig_vec_local[idx_duplicate] = c
507
+ # eig_vec_local[i] = s
508
+ # deflated_eigvecs.append(P.T @ eig_vec_local)
509
+
510
+ # vec_idx_list.append(
511
+ # idx_duplicate
512
+ # ) # Use list instead of slow np.append()
513
+
514
+ # new_order = np.concatenate(
515
+ # (list(to_check), vec_idx_list)
516
+ # ) # Efficient concatenation
517
+ # new_order = new_order.astype(int)
518
+ # deflated_eigvals = deflated_eigvals[:j]
519
+
520
+ # # Apply Givens rotations
521
+ # for i, j, c, s in rotation_matrix:
522
+ # G = sp.eye(n, n, format="csr") # Sparse identity matrix
523
+ # G[i, i] = G[j, j] = c
524
+ # G[i, j] = -s
525
+ # G[j, i] = s
526
+ # P_2 = P_2 @ G # Sparse multiplication
527
+
528
+ # for new_pos, old_pos in enumerate(new_order):
529
+ # P_3[new_pos, old_pos] = 1 # Assign 1s
530
+
531
+ # P_3 = P_3.tocsr()
532
+
533
+ # to_check = [i for i in to_check]
534
+ # reduced_dimension = len(to_check)
535
+ # D_keep = D_keep[to_check]
536
+ # v_keep = v_keep[to_check]
537
+
538
+ # return deflated_eigvals, np.array(deflated_eigvecs), D_keep, v_keep, P_3 @ P_2 @ P
539
+
540
+ # This portion of the script was used during the intense debugging phase.
560
541
if __name__ == "__main__" :
561
542
comm = MPI .COMM_WORLD
562
543
rank = comm .Get_rank ()
0 commit comments