Skip to content

Commit b291022

Browse files
committed
Fixed bug in cxx_utils deflate_eigenpair. Lack of an inverse multiplication
1 parent ec81d32 commit b291022

File tree

2 files changed

+46
-210
lines changed

2 files changed

+46
-210
lines changed

src/pyclassify/cxx_utils.cpp

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,7 @@ secular_solver(
589589
#include <Eigen/Dense>
590590
#include <Eigen/Sparse>
591591
#include <unordered_set>
592+
#include <set>
592593

593594
/**
594595
* Applies the deflation step in a divide-and-conquer eigenvalue algorithm.
@@ -632,6 +633,7 @@ deflateEigenpairs(
632633
double beta,
633634
double tol_factor = 1e-12
634635
) {
636+
635637
int n = D.size();
636638
// 1) Build full matrix M and compute norm for tolerance
637639
Eigen::MatrixXd M = D.asDiagonal();
@@ -659,6 +661,8 @@ deflateEigenpairs(
659661
}
660662
}
661663

664+
665+
662666
// 4) Build permutation P: [keep_indices, deflated_indices]
663667
std::vector<int> new_order;
664668
new_order.reserve(n);
@@ -667,6 +671,7 @@ deflateEigenpairs(
667671
Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic> P(n);
668672
P.indices() = Eigen::VectorXi::Map(new_order.data(), n);
669673

674+
670675
// 5) Extract subproblem D_keep and v_keep
671676
Eigen::VectorXd D_keep(static_cast<int>(keep_indices.size()));
672677
Eigen::VectorXd v_keep(static_cast<int>(keep_indices.size()));
@@ -676,8 +681,7 @@ deflateEigenpairs(
676681
}
677682

678683
// 6) Givens rotations for near-duplicate entries
679-
std::unordered_set<int> to_check;
680-
to_check.reserve(keep_indices.size());
684+
std::set<int> to_check;
681685
for (int i = 0; i < static_cast<int>(keep_indices.size()); ++i)
682686
to_check.insert(i);
683687
std::vector<std::tuple<int,int,double,double>> rotations;
@@ -707,11 +711,14 @@ deflateEigenpairs(
707711
}
708712
}
709713
}
714+
710715

711716
// 7) Final ordering after rotations
712717
std::vector<int> final_order(to_check.begin(), to_check.end());
713718
final_order.insert(final_order.end(), vec_idx_list.begin(), vec_idx_list.end());
714719

720+
721+
715722
// 8) Resize deflated_eigvals to actual number found
716723
deflated_eigvals.conservativeResize(j);
717724

@@ -728,15 +735,23 @@ deflateEigenpairs(
728735
G.coeffRef(k,i) = s;
729736
P2 = P2 * G;
730737
}
738+
739+
740+
// 10) Build permutation matrix P3 from final_order
741+
Eigen::SparseMatrix<double> P3(n, n);
742+
std::vector<Eigen::Triplet<double>> triplets;
743+
for (int new_pos = 0; new_pos < static_cast<int>(final_order.size()); ++new_pos) {
744+
int old_pos = final_order[new_pos];
745+
triplets.emplace_back(new_pos, old_pos, 1.0);
746+
}
747+
P3.setFromTriplets(triplets.begin(), triplets.end());
731748

732-
// 10) Build permutation matrix P3 from final_order
733-
Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic> P3(n);
734-
P3.indices() = Eigen::VectorXi::Map(final_order.data(), n);
735749

736750
// 11) Convert P and P3 to sparse<double> and combine all transforms
737751
Eigen::SparseMatrix<double> P_sparse = P.toDenseMatrix().cast<double>().sparseView();
738-
Eigen::SparseMatrix<double> P3_sparse = P3.toDenseMatrix().cast<double>().sparseView();
739-
Eigen::SparseMatrix<double> P_final = P3_sparse * P2 * P_sparse;
752+
753+
Eigen::SparseMatrix<double> P_final = P3 * P2 * P_sparse.transpose();
754+
740755

741756
// 12) Extract final D_keep and v_keep for reduced problem
742757
std::vector<int> final_keep(to_check.begin(), to_check.end());
@@ -755,7 +770,7 @@ deflateEigenpairs(
755770

756771
return std::make_tuple(
757772
deflated_eigvals,
758-
deflated_eigvecs.transpose(),
773+
deflated_eigvecs.transpose(), // Transpose to match expected output
759774
D_keep_final,
760775
v_keep_final,
761776
P_final

src/pyclassify/parallel_tridiag_eigen.py

Lines changed: 23 additions & 202 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
import scipy.sparse as sp
88
from line_profiler import LineProfiler
99
import scipy
10-
10+
from pyclassify.utils import make_symmetric
11+
from pyclassify.eigenvalues import EigenSolver, Lanczos_PRO
1112

1213
profile = LineProfiler()
1314

@@ -301,9 +302,17 @@ def parallel_tridiag_eigen(
301302
D = np.concatenate((eigvals_left, eigvals_right))
302303
D_size=D.size
303304
v_vec = np.concatenate((eigvecs_left[-1, :], eigvecs_right[0, :]))
305+
304306
deflated_eigvals, deflated_eigvecs, D_keep, v_keep, P = deflate_eigenpairs_cxx(
305307
D, v_vec, beta, tol_factor
306308
)
309+
310+
# Pdeflated_eigvals, Pdeflated_eigvecs, PD_keep, Pv_keep, PP = deflate_eigenpairs(
311+
# D, v_vec, beta, tol_factor
312+
# )
313+
314+
315+
307316
D_keep=np.array(D_keep)
308317

309318
reduced_dim = len(D_keep)
@@ -313,13 +322,16 @@ def parallel_tridiag_eigen(
313322
idx_inv = np.arange(0, reduced_dim)
314323
idx_inv = idx_inv[idx]
315324

325+
# T= np.diag(D_keep) + beta * np.outer(v_keep, v_keep)
326+
# lam , _ = np.linalg.eigh(T)
327+
316328
lam, changing_position, delta = secular_solver_cxx(
317-
beta, D_keep[idx], v_keep[idx], np.arange(reduced_dim)
329+
beta, D_keep[idx], v_keep[idx] , np.arange(reduced_dim)
318330
)
319331
lam = np.array(lam)
320332
delta=np.array(delta)
321333
changing_position=np.array(changing_position)
322-
# #diff=lam_s-lam
334+
323335
else:
324336
lam = np.array([])
325337

@@ -350,9 +362,9 @@ def parallel_tridiag_eigen(
350362
v_keep=comm.bcast(v_keep, root=0)
351363
my_count = counts[rank]
352364
type_lam=comm.bcast(lam.dtype, root=0)
353-
# type_D=comm.bcast(D_keep.dtype, root=0)
365+
354366
lam_buffer=np.empty(my_count, dtype=type_lam)
355-
# D_buffer=np.empty(my_count, dtype=type_D)
367+
356368
P=comm.bcast(P, root=0)
357369
D_size=comm.bcast(D_size)
358370
changing_position=comm.bcast(changing_position, root=0)
@@ -381,11 +393,6 @@ def parallel_tridiag_eigen(
381393
root=0
382394
)
383395

384-
# # Scatterv with matching MPI datatype
385-
# comm.Scatterv([lam, counts, displs, MPI._typedict[type_lam] ],
386-
# lam_buffer, root=0)
387-
# comm.Scatterv([D_keep, counts, displs, MPI._typedict[type_lam] ],
388-
# D_buffer, root=0)
389396

390397
initial_point=displs[rank]
391398

@@ -403,8 +410,6 @@ def parallel_tridiag_eigen(
403410

404411
for j_rel in range(lam_buffer.size):
405412
y = np.zeros(D_size)
406-
# y[:reduced_dim]=v_keep/(lam[j]-D_keep)
407-
# y /= np.linalg.norm(y)
408413
j=j_rel + initial_point
409414
diff = lam[j] - D_keep
410415
diff[idx_inv[changing_position[j]]] = delta[j]
@@ -544,192 +549,6 @@ def parallel_tridiag_eigen(
544549

545550

546551

547-
# for eigval, vec in zip(deflated_eigvals, deflated_eigvecs):
548-
# # vec = Q_block @ vec
549-
# vec = np.concatenate((eigvecs_left @ vec[:n1], eigvecs_right @ vec[n1:]))
550-
# eigenpairs.append((eigval, vec))
551-
# eigenpairs.sort(key=lambda x: x[0])
552-
# final_eigvals = np.array([ev for ev, _ in eigenpairs])
553-
# final_eigvecs = np.column_stack([vec for _, vec in eigenpairs])
554-
# else:
555-
# final_eigvals, final_eigvecs = None, None
556-
557-
# final_eigvals = comm.bcast(final_eigvals, root=0)
558-
# final_eigvecs = comm.bcast(final_eigvecs, root=0)
559-
560-
# return final_eigvals, final_eigvecs
561-
562-
563-
564-
565-
566-
567-
# @profile
568-
# def parallel_tridiag_eigen(
569-
# diag,
570-
# off,
571-
# comm=None,
572-
# tol_factor=1e-16,
573-
# min_size=1,
574-
# depth=0,
575-
# profiler=None,
576-
# tol_QR=1e-8,
577-
# max_iterQR=5000,
578-
# ):
579-
# """
580-
# Computes eigenvalues and eigenvectors of a symmetric tridiagonal matrix.
581-
# Input:
582-
# -diag: diagonal part of the tridiagonal matrix
583-
# -off: off diagonal part of the tridiagonal matrix
584-
# -comm: MPI communicator
585-
# -tol_factor: tollerance for the deflating step
586-
587-
# Output:
588-
# -final_eigvals: return the eigenvalues of the tridiagonal matrix
589-
# -final_eigvecs: return the eigenvectors of the tridiagonal matrix
590-
591-
# """
592-
593-
# if comm is None:
594-
# comm = MPI.COMM_WORLD
595-
# rank = comm.Get_rank()
596-
# size = comm.Get_size()
597-
# current_rank = MPI.COMM_WORLD.Get_rank()
598-
# n = len(diag)
599-
# prof_filename = f"Profile_folder/profile.rank{current_rank}.depth{depth}.lprof"
600-
601-
602-
# if n <= min_size or size == 1:
603-
# eigvals, eigvecs = QR_algorithm(diag, off, tol_QR, max_iterQR)
604-
# eigvecs = np.array(eigvecs)
605-
# eigvals = np.array(eigvals)
606-
607-
# index_sort = np.argsort(eigvals)
608-
# eigvecs = eigvecs[:, index_sort]
609-
# eigvals = eigvals[index_sort]
610-
# return eigvals, eigvecs
611-
612-
# k = n // 2
613-
# diag1, diag2 = diag[:k].copy(), diag[k:].copy()
614-
# off1 = off[: k - 1].copy() if k > 1 else np.array([])
615-
# off2 = off[k:] if k < n - 1 else np.array([])
616-
# beta = off[k - 1].copy()
617-
618-
# diag1[-1] -= beta
619-
# diag2[0] -= beta
620-
621-
# # Parallel Recursion
622-
# left_size = size // 2 if size > 1 else 1
623-
# color = 0 if rank < left_size else 1
624-
# subcomm = comm.Split(color=color, key=rank)
625-
626-
# if color == 0:
627-
# eigvals_left, eigvecs_left = parallel_tridiag_eigen(
628-
# diag1,
629-
# off1,
630-
# comm=subcomm,
631-
# tol_factor=tol_factor,
632-
# min_size=min_size,
633-
# depth=depth + 1,
634-
# profiler=profiler,
635-
# )
636-
# else:
637-
# eigvals_right, eigvecs_right = parallel_tridiag_eigen(
638-
# diag2,
639-
# off2,
640-
# comm=subcomm,
641-
# tol_factor=tol_factor,
642-
# min_size=min_size,
643-
# depth=depth + 1,
644-
# profiler=profiler,
645-
# )
646-
647-
648-
649-
650-
# if rank == 0:
651-
# eigvals_right = comm.recv(source=left_size, tag=77)
652-
# eigvecs_right = comm.recv(source=left_size, tag=78)
653-
# elif rank == left_size:
654-
# comm.send(eigvals_right, dest=0, tag=77)
655-
# comm.send(eigvecs_right, dest=0, tag=78)
656-
657-
658-
# if rank == 0:
659-
660-
# # Merge Step
661-
# n1 = len(eigvals_left)
662-
# D = np.concatenate((eigvals_left, eigvals_right))
663-
# v_vec = np.concatenate((eigvecs_left[-1, :], eigvecs_right[0, :]))
664-
# deflated_eigvals, deflated_eigvecs, D_keep, v_keep, P = deflate_eigenpairs_cxx(
665-
# D, v_vec, beta, tol_factor
666-
# )
667-
# D_keep=np.array(D_keep)
668-
669-
# reduced_dim = len(D_keep)
670-
671-
# if D_keep.size > 0:
672-
# idx = np.argsort(D_keep)
673-
# idx_inv = np.arange(0, reduced_dim)
674-
# idx_inv = idx_inv[idx]
675-
676-
# lam, changing_position, delta = secular_solver_cxx(
677-
# beta, D_keep[idx], v_keep[idx], np.arange(reduced_dim)
678-
# )
679-
# lam = np.array(lam)
680-
# delta=np.array(delta)
681-
# changing_position=np.array(changing_position)
682-
# # #diff=lam_s-lam
683-
# else:
684-
# lam = np.array([])
685-
686-
687-
# for k in range(lam.size):
688-
# numerator = lam - D_keep[k]
689-
# denominator = np.concatenate((D_keep[:k], D_keep[k + 1 :])) - D_keep[k]
690-
# numerator[:-1] = numerator[:-1] / denominator
691-
# v_keep[k] = np.sqrt(np.abs(np.prod(numerator) / beta)) * np.sign(v_keep[k])
692-
693-
# eigenpairs = []
694-
695-
# for j in range(lam.size):
696-
# y = np.zeros(D.size)
697-
# # y[:reduced_dim]=v_keep/(lam[j]-D_keep)
698-
# # y /= np.linalg.norm(y)
699-
700-
# diff = lam[j] - D_keep
701-
# diff[idx_inv[changing_position[j]]] = delta[j]
702-
# y[:reduced_dim] = v_keep / (diff)
703-
# y_norm = np.linalg.norm(y)
704-
# if y_norm > 1e-15:
705-
# y /= y_norm
706-
707-
# y = P.T @ y
708-
# vec = np.concatenate((eigvecs_left @ y[:n1], eigvecs_right @ y[n1:]))
709-
# vec /= np.linalg.norm(vec)
710-
# eigenpairs.append((lam[j], vec))
711-
712-
# for eigval, vec in zip(deflated_eigvals, deflated_eigvecs):
713-
# # vec = Q_block @ vec
714-
# vec = np.concatenate((eigvecs_left @ vec[:n1], eigvecs_right @ vec[n1:]))
715-
# eigenpairs.append((eigval, vec))
716-
# eigenpairs.sort(key=lambda x: x[0])
717-
# final_eigvals = np.array([ev for ev, _ in eigenpairs])
718-
# final_eigvecs = np.column_stack([vec for _, vec in eigenpairs])
719-
# else:
720-
# final_eigvals, final_eigvecs = None, None
721-
722-
# final_eigvals = comm.bcast(final_eigvals, root=0)
723-
# final_eigvecs = comm.bcast(final_eigvecs, root=0)
724-
725-
# return final_eigvals, final_eigvecs
726-
727-
728-
729-
730-
731-
732-
733552

734553
def parallel_eigen(
735554
main_diag, off_diag, tol_QR=1e-15, max_iterQR=5000, tol_deflation=1e-15
@@ -752,17 +571,19 @@ def parallel_eigen(
752571
if __name__ == "__main__":
753572
comm = MPI.COMM_WORLD
754573
rank = comm.Get_rank()
755-
n = 900
574+
n = 100
756575
if rank == 0:
757576

758577
# import debugpy
759578
# port = 5678 + rank # 5678 for rank 0, 5679 for rank 1
760579
# debugpy.listen(("localhost", port))
761580
# print(f"Rank {rank} waiting for debugger attach on port {port}")
762581
# debugpy.wait_for_client()
763-
# np.random.seed(42)
764-
main_diag = np.ones(n, dtype=np.float64) * 2.0
765-
off_diag = np.ones(n - 1, dtype=np.float64) *1.0
582+
np.random.seed(42)
583+
# main_diag = np.ones(n, dtype=np.float64) * 2.0
584+
# off_diag = np.ones(n - 1, dtype=np.float64) *1.0
585+
main_diag = (np.random.rand(n) * 2).astype(np.float64)
586+
off_diag = (np.random.rand(n - 1) *1).astype(np.float64)
766587
# eig = np.arange(1, n + 1)
767588
# A = np.diag(eig)
768589
# U = scipy.stats.ortho_group.rvs(n)

0 commit comments

Comments
 (0)