1#ifndef STAN_MATH_OPENCL_QR_DECOMPOSITION_HPP
2#define STAN_MATH_OPENCL_QR_DECOMPOSITION_HPP
13#include <CL/opencl.hpp>
33template <
bool need_Q = true>
41 Q = identity_matrix<matrix_cl<double>>(
rows);
52 for (
int k = 0; k < std::min(
rows - 1,
cols); k += r) {
53 int actual_r = std::min({r,
rows - k - 1,
cols - k});
58 Eigen::MatrixXd R_cpu_block
61 Eigen::MatrixXd V_cpu(
rows - k, actual_r);
62 V_cpu.triangularView<Eigen::StrictlyUpper>()
63 = Eigen::MatrixXd::Constant(V.rows(), V.cols(), 0);
65 for (
int j = 0; j < actual_r; j++) {
66 Eigen::VectorXd householder = R_cpu_block.block(j, j,
rows - k - j, 1);
67 householder.coeffRef(0)
68 -= copysign(householder.norm(), householder.coeff(0));
69 if (householder.rows() != 1) {
70 householder *=
SQRT_TWO / householder.norm();
73 V_cpu.col(j).tail(V.rows() - j) = householder;
75 Eigen::VectorXd::AlignedMapType temp_map(temporary.data(), actual_r - j);
77 = R_cpu_block.block(j, j,
rows - k - j, actual_r - j).transpose()
79 R_cpu_block.block(j, j + 1,
rows - k - j, actual_r - j - 1).noalias()
80 -= householder * temp_map.tail(actual_r - j - 1).transpose();
81 R_cpu_block.coeffRef(j, j) -= householder.coeff(0) * temp_map.coeff(0);
88 Eigen::MatrixXd& Y_cpu = V_cpu;
89 Eigen::MatrixXd W_cpu = V_cpu;
90 for (
int j = 1; j < actual_r; j++) {
91 Eigen::VectorXd::AlignedMapType temp_map(temporary.data(), j);
92 temp_map.noalias() = Y_cpu.leftCols(j).transpose() * V_cpu.col(j);
93 W_cpu.col(j).noalias() -= W_cpu.leftCols(j) * temp_map;
100 R_block -= Y_cl * (
transpose(W_cl) * R_block);
104 Q_block -= (Q_block * W_cl) *
transpose(Y_cl);
const cl::Buffer & buffer() const
const matrix_cl_view & view() const
Represents an arithmetic matrix on the OpenCL device.
auto block_zero_based(T &&a, int start_row, int start_col, int rows, int cols)
Block of a kernel generator expression.
auto transpose(Arg &&a)
Transposes a kernel generator expression.
int64_t cols(const T_x &x)
Returns the number of columns in the specified kernel generator expression.
int64_t rows(const T_x &x)
Returns the number of rows in the specified kernel generator expression.
matrix_cl< scalar_type_t< T > > to_matrix_cl(T &&src)
Copies the source Eigen matrix, std::vector or scalar to the destination matrix that is stored on the...
auto from_matrix_cl(const T &src)
Copies the source matrix that is stored on the OpenCL device to the destination Eigen matrix.
static constexpr double SQRT_TWO
The value of the square root of 2, .
void qr_decomposition_cl(const matrix_cl< double > &A, matrix_cl< double > &Q, matrix_cl< double > &R, int r=100)
Calculates QR decomposition of A using the block Householder algorithm.
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...