Automatic Differentiation
 
Loading...
Searching...
No Matches
tridiagonalization.hpp
Go to the documentation of this file.
1#ifndef STAN_MATH_OPENCL_TRIDIAGONALIZATION_HPP
2#define STAN_MATH_OPENCL_TRIDIAGONALIZATION_HPP
3
4#ifdef STAN_OPENCL
5
10
12
13namespace stan {
14namespace math {
15namespace internal {
16
30 matrix_cl<double>& packed,
31 const int r = 60) {
32 packed = A;
33 for (size_t k = 0; k < A.rows() - 2; k += r) {
34 const int actual_r = std::min({r, static_cast<int>(A.rows() - k - 2)});
35 matrix_cl<double> V_cl = constant(0.0, A.rows() - k - 1, actual_r + 1);
36
37 matrix_cl<double> Uu(actual_r, 1), Vu(actual_r, 1), q_cl(1, 1);
38 for (size_t j = 0; j < actual_r; j++) {
39 try {
40 int hh_local
42 "LOCAL_SIZE_");
44 cl::NDRange(hh_local), cl::NDRange(hh_local), packed, V_cl, q_cl,
45 packed.rows(), V_cl.rows(), j, k);
46 if (j != 0) {
47 int v_step_1_local
49 "LOCAL_SIZE_");
51 cl::NDRange(v_step_1_local * j), cl::NDRange(v_step_1_local),
52 packed, V_cl, Uu, Vu, packed.rows(), V_cl.rows(), k);
53 }
54 int v_step_2_local
56 "LOCAL_SIZE_");
58 cl::NDRange((A.rows() - k - j - 1 + v_step_2_local - 1)
59 / v_step_2_local * v_step_2_local),
60 cl::NDRange(v_step_2_local), packed, V_cl, Uu, Vu, packed.rows(),
61 V_cl.rows(), k, j);
62 int v_step_3_local
64 "LOCAL_SIZE_");
66 cl::NDRange(v_step_3_local), cl::NDRange(v_step_3_local), packed,
67 V_cl, q_cl, packed.rows(), V_cl.rows(), k, j);
68 } catch (cl::Error& e) {
69 check_opencl_error("block_householder_tridiag_cl", e);
70 }
71 }
73 packed, k + actual_r, k, A.rows() - k - actual_r, actual_r);
75 V_cl, actual_r - 1, 0, V_cl.rows() - actual_r + 1, actual_r);
76 matrix_cl<double> partial_update_cl = U_cl * transpose(V_block_cl);
77
78 auto block
79 = block_zero_based(packed, k + actual_r, k + actual_r,
80 partial_update_cl.rows(), partial_update_cl.cols());
81 block = block - partial_update_cl - transpose(partial_update_cl);
82 }
83 block_zero_based(packed, packed.rows() - 2, packed.cols() - 1, 1, 1)
84 = block_zero_based(packed, packed.rows() - 1, packed.cols() - 2, 1, 1);
85}
86
100inline void block_apply_packed_Q_cl(const matrix_cl<double>& packed_cl,
101 matrix_cl<double>& A, const int r = 200) {
102 Eigen::MatrixXd packed = from_matrix_cl(packed_cl);
103 Eigen::MatrixXd scratch_space(A.rows(), r);
104 for (int k = (packed.rows() - 3) / r * r; k >= 0; k -= r) {
105 const int actual_r = std::min({r, static_cast<int>(packed.rows() - k - 2)});
106 Eigen::MatrixXd W(packed.rows() - k - 1, actual_r);
107 W.col(0) = packed.col(k).tail(W.rows());
108 for (size_t j = 1; j < actual_r; j++) {
109 scratch_space.col(0).head(j).noalias()
110 = packed.block(k + j + 1, k, packed.rows() - k - j - 1, j).transpose()
111 * packed.col(j + k).tail(packed.rows() - k - j - 1);
112 W.col(j).noalias() = -W.leftCols(j) * scratch_space.col(0).head(j);
113 W.col(j).tail(W.rows() - j)
114 += packed.col(j + k).tail(packed.rows() - k - j - 1);
115 }
116 matrix_cl<double> packed_block_transpose_triang_cl = transpose(
117 block_zero_based(packed_cl, k + 1, k, packed.rows() - k - 1, actual_r));
118 packed_block_transpose_triang_cl.view(matrix_cl_view::Upper);
119 matrix_cl<double> W_cl(W);
120 auto A_bottom_cl
121 = block_zero_based(A, k + 1, 0, A.rows() - k - 1, A.cols());
122 matrix_cl<double> A_bottom_cl_eval = A_bottom_cl;
124 = packed_block_transpose_triang_cl * A_bottom_cl_eval;
125 matrix_cl<double> tmp2 = W_cl * tmp1;
126 A_bottom_cl -= tmp2;
127 }
128}
129
130} // namespace internal
131} // namespace math
132} // namespace stan
133
134#endif
135#endif
const matrix_cl_view & view() const
Definition matrix_cl.hpp:70
Represents an arithmetic matrix on the OpenCL device.
Definition matrix_cl.hpp:47
void check_opencl_error(const char *function, const cl::Error &e)
Throws the domain error with specifying the OpenCL error that occurred.
auto block_zero_based(T &&a, int start_row, int start_col, int rows, int cols)
Block of a kernel generator expression.
auto transpose(Arg &&a)
Transposes a kernel generator expression.
auto constant(const T a, int rows, int cols)
Matrix of repeated values in kernel generator expressions.
Definition constant.hpp:130
auto from_matrix_cl(const T &src)
Copies the source matrix that is stored on the OpenCL device to the destination Eigen matrix.
Definition copy.hpp:61
void block_householder_tridiag_cl(const matrix_cl< double > &A, matrix_cl< double > &packed, const int r=60)
Tridiagonalize a symmetric matrix using block Housholder algorithm.
void block_apply_packed_Q_cl(const matrix_cl< double > &packed_cl, matrix_cl< double > &A, const int r=200)
Calculates Q*A in-place.
const kernel_cl< in_buffer, in_buffer, out_buffer, out_buffer, int, int, int > tridiagonalization_v_step_1("tridiagonalization_v_step_1", {tridiagonalization_v_step_1_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}})
const kernel_cl< in_buffer, out_buffer, in_buffer, in_buffer, int, int, int, int > tridiagonalization_v_step_2("tridiagonalization_v_step_2", {tridiagonalization_v_step_2_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}})
const kernel_cl< in_out_buffer, in_out_buffer, out_buffer, int, int, int, int > tridiagonalization_v_step_3("tridiagonalization_v_step_3", {tridiagonalization_v_step_3_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 1024}})
const kernel_cl< in_out_buffer, in_out_buffer, out_buffer, int, int, int, int > tridiagonalization_householder("tridiagonalization_householder", {tridiagonalization_householder_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 1024}})
static constexpr double e()
Return the base of the natural logarithm.
Definition constants.hpp:20
auto block(T_x &&x, size_t i, size_t j, size_t nrows, size_t ncols)
Return a nrows x ncols submatrix starting at (i-1, j-1).
Definition block.hpp:24
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...