1#ifndef STAN_MATH_OPENCL_TRIDIAGONALIZATION_HPP
2#define STAN_MATH_OPENCL_TRIDIAGONALIZATION_HPP
33 for (
size_t k = 0; k < A.
rows() - 2; k += r) {
34 const int actual_r = std::min({r,
static_cast<int>(A.
rows() - k - 2)});
38 for (
size_t j = 0; j < actual_r; j++) {
44 cl::NDRange(hh_local), cl::NDRange(hh_local), packed, V_cl, q_cl,
45 packed.
rows(), V_cl.rows(), j, k);
51 cl::NDRange(v_step_1_local * j), cl::NDRange(v_step_1_local),
52 packed, V_cl, Uu, Vu, packed.
rows(), V_cl.rows(), k);
58 cl::NDRange((A.
rows() - k - j - 1 + v_step_2_local - 1)
59 / v_step_2_local * v_step_2_local),
60 cl::NDRange(v_step_2_local), packed, V_cl, Uu, Vu, packed.
rows(),
66 cl::NDRange(v_step_3_local), cl::NDRange(v_step_3_local), packed,
67 V_cl, q_cl, packed.
rows(), V_cl.rows(), k, j);
68 }
catch (cl::Error&
e) {
73 packed, k + actual_r, k, A.
rows() - k - actual_r, actual_r);
75 V_cl, actual_r - 1, 0, V_cl.rows() - actual_r + 1, actual_r);
80 partial_update_cl.
rows(), partial_update_cl.
cols());
103 Eigen::MatrixXd scratch_space(A.
rows(), r);
104 for (
int k = (packed.rows() - 3) / r * r; k >= 0; k -= r) {
105 const int actual_r = std::min({r,
static_cast<int>(packed.rows() - k - 2)});
106 Eigen::MatrixXd W(packed.rows() - k - 1, actual_r);
107 W.col(0) = packed.col(k).tail(W.rows());
108 for (
size_t j = 1; j < actual_r; j++) {
109 scratch_space.col(0).head(j).noalias()
110 = packed.block(k + j + 1, k, packed.rows() - k - j - 1, j).transpose()
111 * packed.col(j + k).tail(packed.rows() - k - j - 1);
112 W.col(j).noalias() = -W.leftCols(j) * scratch_space.col(0).head(j);
113 W.col(j).tail(W.rows() - j)
114 += packed.col(j + k).tail(packed.rows() - k - j - 1);
124 = packed_block_transpose_triang_cl * A_bottom_cl_eval;
const matrix_cl_view & view() const
Represents an arithmetic matrix on the OpenCL device.
void check_opencl_error(const char *function, const cl::Error &e)
Throws the domain error with specifying the OpenCL error that occurred.
auto block_zero_based(T &&a, int start_row, int start_col, int rows, int cols)
Block of a kernel generator expression.
auto transpose(Arg &&a)
Transposes a kernel generator expression.
auto constant(const T a, int rows, int cols)
Matrix of repeated values in kernel generator expressions.
auto from_matrix_cl(const T &src)
Copies the source matrix that is stored on the OpenCL device to the destination Eigen matrix.
void block_householder_tridiag_cl(const matrix_cl< double > &A, matrix_cl< double > &packed, const int r=60)
Tridiagonalize a symmetric matrix using block Housholder algorithm.
void block_apply_packed_Q_cl(const matrix_cl< double > &packed_cl, matrix_cl< double > &A, const int r=200)
Calculates Q*A in-place.
const kernel_cl< in_buffer, in_buffer, out_buffer, out_buffer, int, int, int > tridiagonalization_v_step_1("tridiagonalization_v_step_1", {tridiagonalization_v_step_1_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}})
const kernel_cl< in_buffer, out_buffer, in_buffer, in_buffer, int, int, int, int > tridiagonalization_v_step_2("tridiagonalization_v_step_2", {tridiagonalization_v_step_2_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}})
const kernel_cl< in_out_buffer, in_out_buffer, out_buffer, int, int, int, int > tridiagonalization_v_step_3("tridiagonalization_v_step_3", {tridiagonalization_v_step_3_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 1024}})
const kernel_cl< in_out_buffer, in_out_buffer, out_buffer, int, int, int, int > tridiagonalization_householder("tridiagonalization_householder", {tridiagonalization_householder_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 1024}})
static constexpr double e()
Return the base of the natural logarithm.
auto block(T_x &&x, size_t i, size_t j, size_t nrows, size_t ncols)
Return a nrows x ncols submatrix starting at (i-1, j-1).
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...