Automatic Differentiation
 
Loading...
Searching...
No Matches
qr_decomposition.hpp
Go to the documentation of this file.
1#ifndef STAN_MATH_OPENCL_QR_DECOMPOSITION_HPP
2#define STAN_MATH_OPENCL_QR_DECOMPOSITION_HPP
3#ifdef STAN_OPENCL
4
13#include <CL/opencl.hpp>
14#include <algorithm>
15#include <vector>
16#include <cmath>
17
18namespace stan {
19namespace math {
20
33template <bool need_Q = true>
35 matrix_cl<double>& R, int r = 100) {
36 using std::copysign;
37 using std::sqrt;
38 int rows = A.rows();
39 int cols = A.cols();
40 if (need_Q) {
41 Q = identity_matrix<matrix_cl<double>>(rows);
42 }
43 R = A;
44 if (rows <= 1) {
45 return;
46 }
47
48 matrix_cl<double> V_alloc(rows, r);
49 matrix_cl<double> W_alloc(rows, r);
50 Eigen::VectorXd temporary(rows > cols ? rows : cols);
51
52 for (int k = 0; k < std::min(rows - 1, cols); k += r) {
53 int actual_r = std::min({r, rows - k - 1, cols - k});
54 matrix_cl<double> V(V_alloc.buffer(), rows - k, actual_r);
55 matrix_cl<double>& Y_cl = V;
56 matrix_cl<double> W_cl(W_alloc.buffer(), rows - k, actual_r);
57
58 Eigen::MatrixXd R_cpu_block
59 = from_matrix_cl(block_zero_based(R, k, k, rows - k, actual_r));
60
61 Eigen::MatrixXd V_cpu(rows - k, actual_r);
62 V_cpu.triangularView<Eigen::StrictlyUpper>()
63 = Eigen::MatrixXd::Constant(V.rows(), V.cols(), 0);
64
65 for (int j = 0; j < actual_r; j++) {
66 Eigen::VectorXd householder = R_cpu_block.block(j, j, rows - k - j, 1);
67 householder.coeffRef(0)
68 -= copysign(householder.norm(), householder.coeff(0));
69 if (householder.rows() != 1) {
70 householder *= SQRT_TWO / householder.norm();
71 }
72
73 V_cpu.col(j).tail(V.rows() - j) = householder;
74
75 Eigen::VectorXd::AlignedMapType temp_map(temporary.data(), actual_r - j);
76 temp_map.noalias()
77 = R_cpu_block.block(j, j, rows - k - j, actual_r - j).transpose()
78 * householder;
79 R_cpu_block.block(j, j + 1, rows - k - j, actual_r - j - 1).noalias()
80 -= householder * temp_map.tail(actual_r - j - 1).transpose();
81 R_cpu_block.coeffRef(j, j) -= householder.coeff(0) * temp_map.coeff(0);
82 }
83
84 matrix_cl<double> R_back_block = to_matrix_cl(R_cpu_block);
85 R_back_block.view(matrix_cl_view::Upper);
86 block_zero_based(R, k, k, rows - k, actual_r) = R_back_block;
87
88 Eigen::MatrixXd& Y_cpu = V_cpu;
89 Eigen::MatrixXd W_cpu = V_cpu;
90 for (int j = 1; j < actual_r; j++) {
91 Eigen::VectorXd::AlignedMapType temp_map(temporary.data(), j);
92 temp_map.noalias() = Y_cpu.leftCols(j).transpose() * V_cpu.col(j);
93 W_cpu.col(j).noalias() -= W_cpu.leftCols(j) * temp_map;
94 }
95 W_cl = to_matrix_cl(W_cpu);
96 V = to_matrix_cl(V_cpu);
97
98 auto R_block
99 = block_zero_based(R, k, k + actual_r, rows - k, cols - k - actual_r);
100 R_block -= Y_cl * (transpose(W_cl) * R_block);
101
102 if (need_Q) {
103 auto Q_block = block_zero_based(Q, 0, k, Q.rows(), Q.cols() - k);
104 Q_block -= (Q_block * W_cl) * transpose(Y_cl);
105 }
106 }
107
109}
110
111} // namespace math
112} // namespace stan
113#endif
114#endif // STAN_MATH_OPENCL_QR_DECOMPOSITION_HPP
const cl::Buffer & buffer() const
const matrix_cl_view & view() const
Definition matrix_cl.hpp:70
Represents an arithmetic matrix on the OpenCL device.
Definition matrix_cl.hpp:47
auto block_zero_based(T &&a, int start_row, int start_col, int rows, int cols)
Block of a kernel generator expression.
auto transpose(Arg &&a)
Transposes a kernel generator expression.
int rows(const T_x &x)
Returns the number of rows in the specified kernel generator expression.
Definition rows.hpp:21
matrix_cl< scalar_type_t< T > > to_matrix_cl(T &&src)
Copies the source Eigen matrix, std::vector or scalar to the destination matrix that is stored on the...
Definition copy.hpp:45
int cols(const T_x &x)
Returns the number of columns in the specified kernel generator expression.
Definition cols.hpp:20
auto from_matrix_cl(const T &src)
Copies the source matrix that is stored on the OpenCL device to the destination Eigen matrix.
Definition copy.hpp:61
static constexpr double SQRT_TWO
The value of the square root of 2, .
void qr_decomposition_cl(const matrix_cl< double > &A, matrix_cl< double > &Q, matrix_cl< double > &R, int r=100)
Calculates QR decomposition of A using the block Householder algorithm.
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Definition fvar.hpp:9