Automatic Differentiation
 
Loading...
Searching...
No Matches
mrrr.hpp
Go to the documentation of this file.
1#ifndef STAN_MATH_OPENCL_MRRR_HPP
2#define STAN_MATH_OPENCL_MRRR_HPP
3
4#ifdef STAN_OPENCL
5
6#include <cmath>
7#include <queue>
8
14
16
17namespace stan {
18namespace math {
19namespace internal {
20
21using VectorXdd = Eigen::Matrix<double_d, -1, 1>;
22using MatrixXdd = Eigen::Matrix<double_d, -1, -1>;
23
24static const double_d perturbation_range = 1e-20;
25
32 static const double_d rand_norm = perturbation_range / RAND_MAX;
33 static const double_d almost_one = 1 - perturbation_range * 0.5;
34 return almost_one + std::rand() * rand_norm;
35}
36
45inline void get_gresgorin(const Eigen::Ref<const Eigen::VectorXd> diagonal,
46 const Eigen::Ref<const Eigen::VectorXd> subdiagonal,
47 double& min_eigval, double& max_eigval) {
48 using std::fabs;
49 const int n = diagonal.size();
50 min_eigval = diagonal[0] - fabs(subdiagonal[0]);
51 max_eigval = diagonal[0] + fabs(subdiagonal[0]);
52 for (int i = 1; i < n - 1; i++) {
53 min_eigval = std::min(min_eigval, diagonal[i] - fabs(subdiagonal[i])
54 - fabs(subdiagonal[i - 1]));
55 max_eigval = std::max(max_eigval, diagonal[i] + fabs(subdiagonal[i])
56 + fabs(subdiagonal[i - 1]));
57 }
58 min_eigval = std::min(min_eigval, diagonal[n - 1] - fabs(subdiagonal[n - 2]));
59 max_eigval = std::max(max_eigval, diagonal[n - 1] + fabs(subdiagonal[n - 2]));
60}
61
69inline double max_nan(double a, double b) { return isnan(a) || a > b ? a : b; }
70
83inline double get_ldl(const Eigen::Ref<const Eigen::VectorXd> diagonal,
84 const Eigen::Ref<const Eigen::VectorXd> subdiagonal,
85 const double shift, VectorXdd& l, VectorXdd& d_plus) {
86 using std::fabs;
87 d_plus[0] = diagonal[0] - shift;
88 double element_growth = fabs(d_plus[0].high);
89 for (int i = 0; i < subdiagonal.size(); i++) {
90 l[i] = subdiagonal[i] / d_plus[i];
91 d_plus[i + 1] = diagonal[i + 1] - shift - l[i] * subdiagonal[i];
92 element_growth = max_nan(element_growth, fabs(d_plus[i + 1].high));
93 }
94 return element_growth;
95}
96
109inline double find_initial_shift(
110 const Eigen::Ref<const Eigen::VectorXd> diagonal,
111 const Eigen::Ref<const Eigen::VectorXd> subdiagonal, VectorXdd& l0,
112 VectorXdd& d0, const double min_eigval, const double max_eigval,
113 const double max_ele_growth) {
114 double shift = (max_eigval + min_eigval) * 0.5;
115 double element_growth = get_ldl(diagonal, subdiagonal, shift, l0, d0);
116 if (element_growth < max_ele_growth) {
117 return shift;
118 }
119 double plus = (max_eigval - min_eigval) * 1e-15;
120 while (!(element_growth < max_ele_growth)) { // if condition was flipped it
121 // would be wrong for the case
122 // where element_growth is nan
123 plus *= -2;
124 element_growth = get_ldl(diagonal, subdiagonal, shift + plus, l0, d0);
125 }
126 return shift + plus;
127}
128
139inline int get_sturm_count_ldl(const VectorXdd& l, const VectorXdd& d,
140 const double_d shift) {
141 using std::isinf;
142 const int n = l.size();
143 double_d s = -shift;
144 double_d d_plus;
145 int count = 0;
146 for (int i = 0; i < n; i++) {
147 d_plus = s + d.coeff(i);
148 count += d_plus >= 0;
149 if (isinf(s)) { // this happens if d_plus==0 -> in next
150 // iteration d_plus==inf and s==inf
151 s = l.coeff(i) * l.coeff(i) * d.coeff(i) - shift;
152 } else {
153 s = l.coeff(i) * l.coeff(i) * s * (d.coeff(i) / d_plus) - shift;
154 }
155 }
156 d_plus = s + d.coeff(n);
157 count += d_plus >= 0;
158 return count;
159}
160
170inline void eigenval_bisect_refine(const VectorXdd& l, const VectorXdd& d,
171 double_d& low, double_d& high, const int i) {
172 using std::fabs;
173 const double_d eps = 3e-20;
174 while ((high - low) > eps * fabs(high + low)
175 && fabs(high - low)
176 > std::numeric_limits<double>::min()) { // second term is for
177 // the case where the
178 // eigenvalue is 0 and
179 // division yields NaN
180 double_d mid = (high + low) * 0.5;
181 if (get_sturm_count_ldl(l, d, mid) > i) {
182 low = mid;
183 } else {
184 high = mid;
185 }
186 }
187}
188
202inline double get_shifted_ldl(const VectorXdd& l, const VectorXdd& d,
203 const double_d shift, VectorXdd& l_plus,
204 VectorXdd& d_plus) {
205 using std::fabs;
206 using std::isinf;
207 const int n = l.size();
208 double_d s = -shift;
209 double element_growth = 0;
210 for (int i = 0; i < n; i++) {
211 d_plus[i] = s + d[i];
212 element_growth = max_nan(element_growth, fabs(d_plus[i].high));
213 l_plus[i] = l[i] * (d[i] / d_plus[i]);
214 if (isinf(d_plus[i]) && isinf(s)) { // this happens if d_plus[i]==0 -> in
215 // next iteration d_plus==inf and
216 // s==inf
217 s = l[i] * l[i] * d[i] - shift;
218 } else {
219 s = l_plus[i] * l[i] * s - shift;
220 }
221 }
222 d_plus[n] = s + d[n];
223 element_growth = max_nan(element_growth, fabs(d_plus[n].high));
224 return element_growth;
225}
226
242inline void find_shift(const VectorXdd& l, const VectorXdd& d,
243 const double_d low, const double_d high,
244 const double max_ele_growth, const double_d max_shift,
245 VectorXdd& l2, VectorXdd& d2, double_d& shift,
246 double& min_element_growth) {
247 VectorXdd l3(l2.size()), d3(d2.size());
248 const std::vector<double_d> shifts = {
249 low,
250 high - max_shift * 0.1,
251 low + max_shift * 0.1,
252 high - max_shift * 0.25,
253 low + max_shift * 0.25,
254 high - max_shift * 0.5,
255 low + max_shift * 0.5,
256 high - max_shift * 0.75,
257 low + max_shift * 0.75,
258 high - max_shift,
259 low + max_shift,
260 };
261 min_element_growth = std::numeric_limits<double>::infinity();
262 for (double_d sh : shifts) {
263 const double element_growth = get_shifted_ldl(l, d, sh, l3, d3);
264 if (element_growth < min_element_growth) {
265 l2.swap(l3);
266 d2.swap(d3);
267 shift = sh;
268 min_element_growth = element_growth;
269 if (element_growth <= max_ele_growth) {
270 break;
271 }
272 }
273 }
274}
275
276struct mrrr_task {
277 int start, end;
278 double_d shift; // total shift, not just the last one
280 int level;
281};
282
297template <bool need_eigenvectors = true>
298inline void mrrr_cl(const Eigen::Ref<const Eigen::VectorXd> diagonal,
299 const Eigen::Ref<const Eigen::VectorXd> subdiagonal,
300 Eigen::Ref<Eigen::VectorXd> eigenvalues,
301 Eigen::Ref<Eigen::MatrixXd> eigenvectors,
302 const double min_rel_sep = 1e-4,
303 const double maximum_ele_growth = 15) {
304 using std::copysign;
305 using std::fabs;
306 const double shift_error = 1e-19;
307 const int n = diagonal.size();
308 double min_eigval;
309 double max_eigval;
310 const Eigen::VectorXd subdiagonal_squared
311 = subdiagonal.array() * subdiagonal.array();
312 get_gresgorin(diagonal, subdiagonal, min_eigval, max_eigval);
313 if (!need_eigenvectors) {
314 matrix_cl<double> diagonal_cl(diagonal);
315 matrix_cl<double> subdiagonal_squared_cl(subdiagonal_squared);
316 matrix_cl<double> eigenvalues_cl(n, 1);
317 matrix_cl<double_d> l_cl, d_cl, high_cl, low_cl(n, 1);
318 opencl_kernels::eigenvals(cl::NDRange(n), diagonal_cl,
319 subdiagonal_squared_cl, l_cl, d_cl,
320 eigenvalues_cl, low_cl, high_cl, min_eigval,
321 max_eigval, 0, need_eigenvectors);
322 eigenvalues = from_matrix_cl(eigenvalues_cl);
323 return;
324 }
325 VectorXdd l(n - 1), d(n);
326 const double max_ele_growth = maximum_ele_growth * (max_eigval - min_eigval);
327 const double shift0 = find_initial_shift(
328 diagonal, subdiagonal, l, d, min_eigval, max_eigval, max_ele_growth);
329 for (int i = 0; i < n; i++) {
330 if (i != n - 1) {
332 }
334 }
335 VectorXdd high(n), low(n);
336
337 matrix_cl<double> diagonal_cl(diagonal);
338 matrix_cl<double> subdiagonal_squared_cl(subdiagonal_squared);
339 matrix_cl<double_d> l_cl(l);
340 matrix_cl<double_d> d_cl(d);
341 matrix_cl<double> eigenvalues_cl(n, 1);
342 matrix_cl<double_d> high_cl(n, 1);
343 matrix_cl<double_d> low_cl(n, 1);
344 opencl_kernels::eigenvals(cl::NDRange(n), diagonal_cl, subdiagonal_squared_cl,
345 l_cl, d_cl, eigenvalues_cl, low_cl, high_cl,
346 min_eigval, max_eigval, shift0, need_eigenvectors);
347 eigenvalues = from_matrix_cl(eigenvalues_cl);
348 high = from_matrix_cl(high_cl);
349 low = from_matrix_cl(low_cl);
350
351 MatrixXdd l_big(n - 1, n), d_big(n, n);
352
353 std::queue<mrrr_task> block_queue;
354 block_queue.push(mrrr_task{0, n, {shift0, 0}, std::move(l), std::move(d), 0});
355 l.resize(n - 1); // after move out
356 d.resize(n);
357 while (!block_queue.empty()) {
358 const mrrr_task block = block_queue.front();
359 block_queue.pop();
360 double_d shift = std::numeric_limits<double>::infinity();
361 double min_element_growth = std::numeric_limits<double>::infinity();
362 VectorXdd l2(n - 1), d2(n), l_plus(n - 1), u_minus(n - 1);
363 for (int i = block.start; i < block.end; i++) {
364 int cluster_end;
365 for (cluster_end = i + 1; cluster_end < block.end; cluster_end++) {
366 const int prev = cluster_end - 1;
367 const double_d end_threshold
368 = low[prev] * (1 - copysign(min_rel_sep, low[prev]));
369 if (high[cluster_end] < end_threshold) {
370 break;
371 }
372 }
373 cluster_end--; // now this is the index of the last element of the
374 // cluster
375 if (cluster_end > i) { // cluster
376 double_d max_shift
377 = (high[cluster_end - 1] - low[cluster_end]) / min_rel_sep;
378 double_d next_shift;
379 double min_ele_growth;
380 find_shift(block.l, block.d, low[cluster_end], high[i], max_ele_growth,
381 max_shift, l, d, next_shift, min_ele_growth);
382 for (int j = i; j <= cluster_end; j++) {
383 low[j] = low[j] * (double_d(1.0, 0.0) - copysign(shift_error, low[j]))
384 - next_shift - 1e-200;
385 high[j]
386 = high[j] * (double_d(1.0, 0.0) + copysign(shift_error, high[j]))
387 - next_shift + 1e-200;
388 eigenval_bisect_refine(l, d, low[j], high[j], j);
389 }
390 block_queue.push(mrrr_task{i, cluster_end + 1, block.shift + next_shift,
391 std::move(l), std::move(d),
392 block.level + 1});
393 l.resize(n - 1); // after move out
394 d.resize(n);
395
396 i = cluster_end;
397 } else { // isolated eigenvalue
398 const double_d low_gap
399 = i == block.start
400 ? double_d(std::numeric_limits<double>::infinity())
401 : low[i - 1] - high[i];
402 const double_d high_gap
403 = i == block.end - 1
404 ? double_d(std::numeric_limits<double>::infinity())
405 : low[i] - high[i + 1];
406 const double_d min_gap = std::min(low_gap, high_gap);
407 const VectorXdd *l_ptr, *d_ptr;
408 if (!(fabs(min_gap / ((high[i] + low[i]) * 0.5)) > min_rel_sep)) {
409 if (!(fabs(min_gap / ((high[i] + low[i]) * 0.5 - shift)) > min_rel_sep
410 && min_element_growth < max_ele_growth)) {
411 const double_d max_shift = min_gap / min_rel_sep;
412 find_shift(block.l, block.d, low[i], high[i], max_ele_growth,
413 max_shift, l2, d2, shift, min_element_growth);
414 }
415 low[i] = low[i] * (1 - copysign(shift_error, low[i])) - shift;
416 high[i] = high[i] * (1 + copysign(shift_error, high[i])) - shift;
417 eigenval_bisect_refine(l2, d2, low[i], high[i], i);
418 l_ptr = &l2;
419 d_ptr = &d2;
420 } else {
421 l_ptr = &block.l;
422 d_ptr = &block.d;
423 }
424 l_big.col(i) = *l_ptr;
425 d_big.col(i) = *d_ptr;
426 }
427 }
428 }
429 matrix_cl<double_d> l_big_cl(l_big.transpose());
430 matrix_cl<double_d> d_big_cl(d_big.transpose());
431 matrix_cl<double> subdiag_cl(subdiagonal);
432 matrix_cl<double_d> shifted_eigvals_cl((low + high) * 0.5);
433 matrix_cl<double_d> l_plus_cl(n - 1, n), u_minus_cl(n - 1, n), temp_cl(n, n);
434 matrix_cl<double> eigenvectors_cl(n, n);
435 opencl_kernels::get_eigenvectors(cl::NDRange(n), l_big_cl, d_big_cl,
436 subdiag_cl, shifted_eigvals_cl, l_plus_cl,
437 u_minus_cl, temp_cl, eigenvectors_cl);
438 eigenvectors = from_matrix_cl(transpose((eigenvectors_cl)));
439}
440
451template <bool need_eigenvectors = true>
452inline void tridiagonal_eigensolver_cl(const matrix_cl<double>& diagonal_cl,
453 const matrix_cl<double>& subdiagonal_cl,
454 matrix_cl<double>& eigenvalues_cl,
455 matrix_cl<double>& eigenvectors_cl,
456 const double split_threshold = 1e-15) {
457 using std::fabs;
458 using std::sqrt;
459 const int n = diagonal_cl.rows();
460 Eigen::MatrixXd eigenvectors;
461 if (need_eigenvectors) {
462 eigenvectors.resize(n, n);
463 }
464 Eigen::VectorXd eigenvalues(n);
465 Eigen::VectorXd diagonal = from_matrix_cl<Eigen::VectorXd>(diagonal_cl);
466 Eigen::VectorXd subdiagonal = from_matrix_cl<Eigen::VectorXd>(subdiagonal_cl);
467 int last = 0;
468 for (int i = 0; i < subdiagonal.size(); i++) {
469 if (fabs(subdiagonal[i]) < split_threshold * sqrt(fabs(diagonal[i]))
470 * sqrt(fabs(diagonal[i + 1]))) {
471 if (need_eigenvectors) {
472 eigenvectors.block(last, i + 1, i + 1 - last, n - i - 1)
473 = Eigen::MatrixXd::Constant(i + 1 - last, n - i - 1, 0);
474 eigenvectors.block(i + 1, last, n - i - 1, i + 1 - last)
475 = Eigen::MatrixXd::Constant(n - i - 1, i + 1 - last, 0);
476 if (last == i) {
477 eigenvectors(last, last) = 1;
478 eigenvalues[last] = diagonal[last];
479 } else {
480 mrrr_cl<need_eigenvectors>(
481 diagonal.segment(last, i + 1 - last),
482 subdiagonal.segment(last, i - last),
483 eigenvalues.segment(last, i + 1 - last),
484 eigenvectors.block(last, last, i + 1 - last, i + 1 - last));
485 }
486 } else {
487 if (last == i) {
488 eigenvalues[last] = diagonal[last];
489 } else {
490 mrrr_cl<need_eigenvectors>(diagonal.segment(last, i + 1 - last),
491 subdiagonal.segment(last, i - last),
492 eigenvalues.segment(last, i + 1 - last),
494 }
495 }
496 last = i + 1;
497 }
498 }
499 if (last == n - 1) {
500 if (need_eigenvectors) {
501 eigenvectors(last, last) = 1;
502 }
503 eigenvalues[last] = diagonal[last];
504 } else {
505 if (need_eigenvectors) {
506 mrrr_cl<need_eigenvectors>(
507 diagonal.segment(last, n - last),
508 subdiagonal.segment(last, subdiagonal.size() - last),
509 eigenvalues.segment(last, n - last),
510 eigenvectors.block(last, last, n - last, n - last));
511 } else {
512 mrrr_cl<need_eigenvectors>(
513 diagonal.segment(last, n - last),
514 subdiagonal.segment(last, subdiagonal.size() - last),
515 eigenvalues.segment(last, n - last), eigenvectors);
516 }
517 }
518 eigenvalues_cl = to_matrix_cl(std::move(eigenvalues));
519 if (need_eigenvectors) {
520 eigenvectors_cl = to_matrix_cl(std::move(eigenvectors));
521 }
522}
523
524} // namespace internal
525} // namespace math
526} // namespace stan
527
528#endif
529#endif
Represents an arithmetic matrix on the OpenCL device.
Definition matrix_cl.hpp:47
auto transpose(Arg &&a)
Transposes a kernel generator expression.
auto diagonal(T &&a)
Diagonal of a kernel generator expression.
Definition diagonal.hpp:136
matrix_cl< scalar_type_t< T > > to_matrix_cl(T &&src)
Copies the source Eigen matrix, std::vector or scalar to the destination matrix that is stored on the...
Definition copy.hpp:45
auto from_matrix_cl(const T &src)
Copies the source matrix that is stored on the OpenCL device to the destination Eigen matrix.
Definition copy.hpp:61
double_d get_random_perturbation_multiplier()
Generates a random number for perturbing a relatively robust representation.
Definition mrrr.hpp:31
double get_ldl(const Eigen::Ref< const Eigen::VectorXd > diagonal, const Eigen::Ref< const Eigen::VectorXd > subdiagonal, const double shift, VectorXdd &l, VectorXdd &d_plus)
Calculates LDL decomposition of a shifted triagonal matrix T.
Definition mrrr.hpp:83
void find_shift(const VectorXdd &l, const VectorXdd &d, const double_d low, const double_d high, const double max_ele_growth, const double_d max_shift, VectorXdd &l2, VectorXdd &d2, double_d &shift, double &min_element_growth)
Finds good shift and shifts a LDL decomposition so as to keep element growth low.
Definition mrrr.hpp:242
double get_shifted_ldl(const VectorXdd &l, const VectorXdd &d, const double_d shift, VectorXdd &l_plus, VectorXdd &d_plus)
Shifts a LDL decomposition.
Definition mrrr.hpp:202
double find_initial_shift(const Eigen::Ref< const Eigen::VectorXd > diagonal, const Eigen::Ref< const Eigen::VectorXd > subdiagonal, VectorXdd &l0, VectorXdd &d0, const double min_eigval, const double max_eigval, const double max_ele_growth)
Finds a good value for shift of the initial LDL factorization T - shift * I = L * D * L^T.
Definition mrrr.hpp:109
bool isinf(double_d a)
Definition double_d.hpp:328
double copysign(double a, double_d b)
Definition double_d.hpp:329
int get_sturm_count_ldl(const VectorXdd &l, const VectorXdd &d, const double_d shift)
Calculates Sturm count of a LDL decomposition of a tridiagonal matrix - number of eigenvalues larger ...
Definition mrrr.hpp:139
void get_gresgorin(const Eigen::Ref< const Eigen::VectorXd > diagonal, const Eigen::Ref< const Eigen::VectorXd > subdiagonal, double &min_eigval, double &max_eigval)
Calculates bounds on eigenvalues of a symmetric tridiagonal matrix T using Gresgorin discs.
Definition mrrr.hpp:45
Eigen::Matrix< double_d, -1, 1 > VectorXdd
Definition mrrr.hpp:21
void eigenval_bisect_refine(const VectorXdd &l, const VectorXdd &d, double_d &low, double_d &high, const int i)
Refines bounds on the i-th largest eigenvalue of LDL decomposition using bisection.
Definition mrrr.hpp:170
Eigen::Matrix< double_d, -1, -1 > MatrixXdd
Definition mrrr.hpp:22
static const double_d perturbation_range
Definition mrrr.hpp:24
void mrrr_cl(const Eigen::Ref< const Eigen::VectorXd > diagonal, const Eigen::Ref< const Eigen::VectorXd > subdiagonal, Eigen::Ref< Eigen::VectorXd > eigenvalues, Eigen::Ref< Eigen::MatrixXd > eigenvectors, const double min_rel_sep=1e-4, const double maximum_ele_growth=15)
Calculates eigenvalues and eigenvectors of a irreducible tridiagonal matrix T using multiple relative...
Definition mrrr.hpp:298
bool isnan(double_d a)
Definition double_d.hpp:327
double max_nan(double a, double b)
NaN-favouring max.
Definition mrrr.hpp:69
void tridiagonal_eigensolver_cl(const matrix_cl< double > &diagonal_cl, const matrix_cl< double > &subdiagonal_cl, matrix_cl< double > &eigenvalues_cl, matrix_cl< double > &eigenvectors_cl, const double split_threshold=1e-15)
Calculates eigenvalues and eigenvectors of a tridiagonal matrix T using MRRR algorithm.
Definition mrrr.hpp:452
const kernel_cl< in_buffer, in_buffer, in_buffer, in_buffer, in_out_buffer, in_out_buffer, in_out_buffer, out_buffer > get_eigenvectors("get_eigenvectors", {stan::math::internal::double_d_src, get_eigenvectors_kernel_code})
const kernel_cl< in_buffer, in_buffer, in_buffer, in_buffer, out_buffer, out_buffer, out_buffer, double, double, double, char > eigenvals("eigenvals", {stan::math::internal::double_d_src, eigenvals_bisect_kernel_code})
static constexpr double e()
Return the base of the natural logarithm.
Definition constants.hpp:20
auto block(T_x &&x, size_t i, size_t j, size_t nrows, size_t ncols)
Return a nrows x ncols submatrix starting at (i-1, j-1).
Definition block.hpp:24
fvar< T > sqrt(const fvar< T > &x)
Definition sqrt.hpp:17
Eigen::Matrix< complex_return_t< value_type_t< EigMat > >, -1, -1 > eigenvectors(const EigMat &m)
Return the eigenvectors of a (real-valued) matrix.
Eigen::Matrix< complex_return_t< value_type_t< EigMat > >, -1, 1 > eigenvalues(const EigMat &m)
Return the eigenvalues of a (real-valued) matrix.
T plus(T &&x)
Returns the unary plus of the input.
Definition plus.hpp:17
fvar< T > fabs(const fvar< T > &x)
Definition fabs.hpp:15
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
bool isinf(const stan::math::var &a)
Return 1 if the specified argument is positive infinity or negative infinity and 0 otherwise.
Definition std_isinf.hpp:16