11
11
#include < numeric>
12
12
13
13
std::vector<std::vector<double >>
14
- GivensProduct (const std::vector<std::array<double , 2 >>& Matrix_trigonometric,
14
+ GivensProduct (const std::vector<std::array<double , 2 >> Matrix_trigonometric,
15
15
unsigned int start,
16
16
unsigned int end,
17
17
unsigned int n)
18
18
{
19
- // Create identity matrix Q of size n x n.
20
- std::vector<std::vector<double >> Q (n, std::vector<double >(n, 0.0 ));
21
- for (unsigned int row = 0 ; row < n; ++row) {
22
- Q[row][row] = 1.0 ;
19
+ // Determine the size of the nontrivial block.
20
+ unsigned int m = end - start + 1 ;
21
+
22
+
23
+ // Allocate block B as an m x m matrix stored by column.
24
+ // B[j][r] is element at row r in column j.
25
+ std::vector<std::vector<double >> B (m, std::vector<double >(m, 0.0 ));
26
+
27
+ // Initialize B as the identity (in column–major form).
28
+ for (unsigned int j = 0 ; j < m; ++j) {
29
+ B[j][j] = 1.0 ;
23
30
}
24
31
25
- // For each rotation from start to end-1, update columns i and i+1 .
26
- for ( unsigned int i = start; i < end; ++i)
27
- {
28
- double c = Matrix_trigonometric[i ][0 ];
29
- double s = Matrix_trigonometric[i ][1 ];
32
+ // For each rotation in the range [start, end), reindexed by k = i - start .
33
+ // Each rotation affects columns k and k+1, and only the first (k+2) rows.
34
+ for ( unsigned int k = 0 ; k < m - 1 ; ++k) {
35
+ double c = Matrix_trigonometric[k ][0 ];
36
+ double s = Matrix_trigonometric[k ][1 ];
30
37
31
- // Rotate columns i and i+1 in Q (parallelizing over rows)
32
38
#pragma omp parallel for
33
- for (int row = start; row < i+2 ; ++row)
34
- {
35
- double tmp = Q[row][i];
36
- Q[row][i] = c * tmp - s * Q[row][i + 1 ];
37
- Q[row][i+1 ] = s * tmp + c * Q[row][i + 1 ];
39
+ for (unsigned int r = 0 ; r < k + 2 ; ++r) {
40
+ double tmp = B[k][r];
41
+ B[k][r] = c * tmp - s * B[k+1 ][r];
42
+ B[k+1 ][r] = s * tmp + c * B[k+1 ][r];
38
43
}
39
44
}
40
- return Q;
41
- }
42
45
43
- void operator /=(std::vector<double > & x, const double scale_factor){
44
- std::for_each (x.begin (), x.end (), [scale_factor](double element){return element/scale_factor;});
45
- }
46
- std::vector<double > operator /(const std::vector<double > & x, const double scale_factor){
47
- std::vector<double > z;
48
- std::transform (x.cbegin (), x.cend (), std::front_inserter (z), [scale_factor](double element){return element/scale_factor;});
49
- return z;
50
- }
51
- std::vector<double > operator *(std::vector<double > & x, const double scale_factor){
52
- std::for_each (x.begin (), x.end (), [scale_factor](double element){return element*scale_factor;});
53
- return x;
54
- }
55
-
56
- double norm_2 (std::vector<double > & x){
57
- double sum=std::accumulate (x.begin (), x.end (), 0 , [](double a, double b) { return a + b*b; });
58
- return std::sqrt (sum);
59
- }
60
- void operator -=(std::vector<double > & x, const std::vector<double >& y){
61
- std::transform (x.begin (), x.end (), y.begin (), x.begin (), std::minus<double >());
62
- }
63
- std::vector<double > operator -(std::vector<double > & x, const std::vector<double >& y){
64
- std::vector<double > z;
65
- std::transform (x.begin (), x.end (), y.begin (), std::front_inserter (z), std::minus<double >());
66
- return z;
46
+ return B;
67
47
}
68
- std::vector<double > operator *(const std::vector<std::vector<double >> & A, const std::vector<double > & b){
69
- std::vector<double > x (A.size (), 0 );
70
48
71
- // Parallelize this loop
72
- for (unsigned int i=0 ; i<A.size (); ++i){
73
- x[i]=std::inner_product (A[i].begin (), A[i].end (), b.begin (), 0 );
49
+ void Matrix_matrix_mult (const std::vector<std::vector<double >> &Q, std::vector<std::vector<double >> & Q_posterior, const std::vector<std::vector<double >> &G_i, const unsigned int start, const unsigned int end){
50
+ for (unsigned int j=start; j<end+1 ;j++){
51
+ for (unsigned int i=start; i<end+1 ;i++){
52
+ Q_posterior[i][j]= std::inner_product (Q[i].begin (), Q[i].end (), G_i[j].begin (), 0 );
53
+ }
74
54
}
75
- return x;
76
- }
77
-
78
- double operator *(const std::vector<double > & x, const std::vector<double > & y){
79
-
80
- return std::inner_product (x.cbegin (), x.cend (), y.cbegin (), 0 );
81
-
82
55
}
83
-
84
- void Lanczos_PRO (std::vector<std::vector<double >> A, std::vector<double > q, const unsigned int m, const double toll=1e-6 ){
85
-
86
- q/=norm_2 (q);
87
- std::vector<std::vector<double >> Q{q};
88
-
89
- std::vector<double > r=A*q;
90
-
91
- std::vector<double > alpha, beta;
92
-
93
- alpha.push_back (q*r);
94
- r=r-q*alpha[0 ];
95
-
96
- beta.push_back (norm_2 (r));
97
- std::vector<double > res;
98
-
99
- for (unsigned int j = 1 ; j < m; j++)
100
- {
101
- q= r/beta[j-1 ];
102
- res= Q*q;
103
-
104
- for (auto const ele: res){
105
- if (std::abs (ele)>toll){
56
+ // void operator/=(std::vector<double> & x, const double scale_factor){
57
+ // std::for_each(x.begin(), x.end(), [scale_factor](double element){return element/scale_factor;});
58
+ // }
59
+ // std::vector<double> operator/(const std::vector<double> & x, const double scale_factor){
60
+ // std::vector<double> z;
61
+ // std::transform(x.cbegin(), x.cend(), std::front_inserter(z), [scale_factor](double element){return element/scale_factor;});
62
+ // return z;
63
+ // }
64
+ // std::vector<double> operator*(std::vector<double> & x, const double scale_factor){
65
+ // std::for_each(x.begin(), x.end(), [scale_factor](double element){return element*scale_factor;});
66
+ // return x;
67
+ // }
68
+
69
+ // double norm_2(std::vector<double> & x){
70
+ // double sum=std::accumulate(x.begin(), x.end(), 0, [](double a, double b) { return a + b*b; });
71
+ // return std::sqrt(sum);
72
+ // }
73
+ // void operator-=(std::vector<double> & x, const std::vector<double>& y){
74
+ // std::transform(x.begin(), x.end(), y.begin(), x.begin(), std::minus<double>());
75
+ // }
76
+ // std::vector<double> operator-(std::vector<double> & x, const std::vector<double>& y){
77
+ // std::vector<double> z;
78
+ // std::transform(x.begin(), x.end(), y.begin(), std::front_inserter(z), std::minus<double>());
79
+ // return z;
80
+ // }
81
+ // std::vector<double> operator*(const std::vector<std::vector<double>> & A, const std::vector<double> & b){
82
+ // std::vector<double> x(A.size(), 0);
83
+
84
+ // // Parallelize this loop
85
+ // for(unsigned int i=0; i<A.size(); ++i){
86
+ // x[i]=std::inner_product(A[i].begin(), A[i].end(), b.begin(), 0);
87
+ // }
88
+ // return x;
89
+ // }
90
+
91
+ // double operator*(const std::vector<double> & x, const std::vector<double> & y){
92
+
93
+ // return std::inner_product(x.cbegin(), x.cend(), y.cbegin(), 0);
94
+
95
+ // }
96
+
97
+ // void Lanczos_PRO(std::vector<std::vector<double>> A, std::vector<double> q, const unsigned int m, const double toll=1e-6){
98
+
99
+ // q/=norm_2(q);
100
+ // std::vector<std::vector<double>> Q{q};
101
+
102
+ // std::vector<double> r=A*q;
103
+
104
+ // std::vector<double> alpha, beta;
105
+
106
+ // alpha.push_back(q*r);
107
+ // r=r-q*alpha[0];
108
+
109
+ // beta.push_back(norm_2(r));
110
+ // std::vector<double> res;
111
+
112
+ // for (unsigned int j = 1; j < m; j++)
113
+ // {
114
+ // q= r/beta[j-1];
115
+ // res= Q*q;
116
+
117
+ // for(auto const ele: res){
118
+ // if(std::abs(ele)>toll){
106
119
107
- for (unsigned int i=0 ;i<Q.size (); ++i){
108
- double h=(q*Q[i]);
109
- q=q-Q[i]*h;
110
- }
111
- break ;
112
- }
113
- }
114
- q/=norm_2 (q);
115
- Q.push_back (q);
116
- r=A*q-(Q[j-1 ]*beta[j-1 ]);
117
- alpha.push_back (q * r);
118
- r = r - q *alpha[j];
119
- beta.push_back (norm_2 (r));
120
+ // for(unsigned int i=0;i<Q.size(); ++i){
121
+ // double h=(q*Q[i]);
122
+ // q=q-Q[i]*h;
123
+ // }
124
+ // break;
125
+ // }
126
+ // }
127
+ // q/=norm_2(q);
128
+ // Q.push_back(q);
129
+ // r=A*q-(Q[j-1]*beta[j-1]);
130
+ // alpha.push_back(q * r);
131
+ // r = r - q *alpha[j];
132
+ // beta.push_back(norm_2(r));
133
+
134
+ // if (std::abs(beta[j]) < 1e-15){
135
+ // //return Q, alpha, beta[:-1]
136
+ // break;
137
+ // }
120
138
121
- if (std::abs (beta[j]) < 1e-15 ){
122
-
123
- }
124
-
125
- return Q, alpha, beta[:-1 ]
139
+
126
140
127
141
128
- }
142
+ // }
129
143
130
144
131
145
132
- }
146
+ // }
133
147
134
148
135
149
// std::pair<std::vector<double>, std::vector<std::vector<double>> >
136
150
void QR_algorithm (std::vector<double > diag, std::vector<double > off_diag, const double toll=1e-8 , const unsigned int max_iter=1000 ){
137
151
138
152
const unsigned int n = diag.size ();
139
153
140
- std::vector<std::vector<double >> Q (n, std::vector<double >(n, 0 ));
154
+ std::vector<std::vector<double >> Q (n, std::vector<double >(n, 0 )), Q_posterior (n, std::vector< double >(n, 0 )) ;
141
155
142
156
for (unsigned int i = 0 ; i < n; i++){
143
157
Q[i][i] = 1 ;
@@ -158,6 +172,20 @@ void QR_algorithm(std::vector<double> diag, std::vector<double> off_diag, cons
158
172
double toll_equivalence=1e-10 ;
159
173
double w=0 , z=0 ;
160
174
175
+
176
+ unsigned int n_processor=8 , delta_i=n/n_processor;
177
+ // // std::future<std::vector<std::vector<double>>> future1 = std::async(std::launch::async, GivensProduct, Matrix_trigonometric, 0, 250, 500);
178
+ // // std::future<std::vector<std::vector<double>>> future2 = std::async(std::launch::async, GivensProduct, Matrix_trigonometric, 250, 500 - 1 ,500);
179
+
180
+ std::vector< std::future<std::vector<std::vector<double >>> > vector_thread (n_processor);
181
+ std::vector <unsigned int > index;
182
+
183
+ for (unsigned int i=0 ;i<n_processor;i++){
184
+ index.push_back (i*delta_i);
185
+ }
186
+
187
+ index.push_back (n-1 );
188
+
161
189
while (iter<max_iter && m>0 )
162
190
{
163
191
// prefetching most used value to avoid call overhead
@@ -249,27 +277,25 @@ void QR_algorithm(std::vector<double> diag, std::vector<double> off_diag, cons
249
277
250
278
251
279
252
- // Uncomment to compute the eigenvalue
253
- for (unsigned int i=0 ; i<n-1 ; i++){
254
- for (unsigned j=0 ; j<n;j++){
255
- tmp=Q[j][i];
256
- Q[j][i]=tmp*Matrix_trigonometric[i][0 ]-Q[j][i+1 ]*Matrix_trigonometric[i][1 ];
257
- Q[j][i+1 ]=tmp*Matrix_trigonometric[i][1 ]+Q[j][i+1 ]*Matrix_trigonometric[i][0 ];
258
- }
259
- }
280
+ // // Uncomment to compute the eigenvalue
281
+ // for(unsigned int i=0; i<n-1; i++){
282
+ // for(unsigned j=0; j<n;j++){
283
+ // tmp=Q[j][i];
284
+ // Q[j][i]=tmp*Matrix_trigonometric[i][0]-Q[j][i+1]*Matrix_trigonometric[i][1];
285
+ // Q[j][i+1]=tmp*Matrix_trigonometric[i][1]+Q[j][i+1]*Matrix_trigonometric[i][0];
286
+ // }
287
+ // }
260
288
289
+ for (unsigned int i=0 ;i<n_processor; i++){
290
+ vector_thread[i] = std::async (std::launch::async, GivensProduct, std::vector<std::array<double , 2 >> (Matrix_trigonometric.begin ()+index[i], Matrix_trigonometric.begin () + index[i+1 ]), index[i], index[i+1 ], n);
291
+ }
292
+ std::vector< std::vector<std::vector<double >>> G_i (n_processor);
261
293
262
- unsigned int n_processor, delta_i=n/n_processor;
263
- std::future<std::vector<std::vector<double >>> future1 = std::async (std::launch::async, GivensProduct, Matrix_trigonometric, 0 , 250 - 1 , 500 );
264
- std::future<std::vector<std::vector<double >>> future2 = std::async (std::launch::async, GivensProduct, Matrix_trigonometric, 250 , 500 - 1 ,500 );
265
-
266
- std::vector< std::future<std::vector<std::vector<double >>> > vector_thread;
267
- std::vector <unsigned int > index;
268
- for (unsigned int i=0 ;i<n_processor;i++){
269
- index.push_back (i*delta_i);
294
+ for (unsigned int i=0 ;i<n_processor; i++){
295
+ G_i[i] = vector_thread[i].get ();;
270
296
}
271
297
272
- index. push_back (n- 1 );
298
+
273
299
274
300
iter++;
275
301
if ( std::abs (off_diag[m-1 ]) < toll*( std::abs (diag[m]) + std::abs (diag[m-1 ]) ) )
@@ -287,7 +313,7 @@ void QR_algorithm(std::vector<double> diag, std::vector<double> off_diag, cons
287
313
288
314
int main (){
289
315
290
- std::vector<double > diag (500 , 5 ), offdiag (499 , 20 );
316
+ std::vector<double > diag (5000 , 5 ), offdiag (4999 , 20 );
291
317
292
318
QR_algorithm (diag, offdiag);
293
319
0 commit comments