Automatic Differentiation
 
Loading...
Searching...
No Matches
cholesky_decompose.hpp
Go to the documentation of this file.
1#ifndef STAN_MATH_OPENCL_CHOLESKY_DECOMPOSE_HPP
2#define STAN_MATH_OPENCL_CHOLESKY_DECOMPOSE_HPP
3#ifdef STAN_OPENCL
4
14#include <CL/opencl.hpp>
15#include <algorithm>
16#include <cmath>
17
18namespace stan {
19namespace math {
20namespace opencl {
42template <typename T, typename = require_floating_point_t<T>>
44 if (A.rows() == 0) {
45 return;
46 }
47 // Repeats the blocked cholesky decomposition until the size of the remaining
48 // submatrix is smaller or equal to the minimum blocks size
49 // or a heuristic of 100.
50 // The Cholesky (OpenCL) algorithm only uses one local block so we need the
51 // matrix To be less than the max thread block size.
53 try {
55 cl::NDRange(A.rows()), A, A.rows());
56 } catch (const cl::Error& e) {
57 check_opencl_error("cholesky_decompose", e);
58 }
60 return;
61 }
62 // NOTE: The code in this section follows the naming conventions
63 // in the report linked in the docs.
64 const int block
66 // Subset the top left block of the input A into A_11
67 matrix_cl<T> A_11 = block_zero_based(A, 0, 0, block, block);
68 // The following function either calls the
69 // blocked cholesky recursively for the submatrix A_11
70 // or calls the kernel directly if the size of the block is small enough
72 // Copies L_11 back to the input matrix
74 = block_zero_based(A_11, 0, 0, block, block);
75
76 const int block_subset = A.rows() - block;
77 matrix_cl<T> A_21 = block_zero_based(A, block, 0, block_subset, block);
78 // computes A_21*((L_11^-1)^T)
79 // and copies the resulting submatrix to the lower left hand corner of A
80 matrix_cl<T> L_21 = A_21 * transpose(tri_inverse(A_11));
81 block_zero_based(A, block, 0, block_subset, block)
82 = block_zero_based(L_21, 0, 0, block_subset, block);
83 matrix_cl<T> A_22
84 = block_zero_based(A, block, block, block_subset, block_subset);
85 // computes A_22 - L_21*(L_21^T)
86 matrix_cl<T> L_22 = A_22 - multiply_transpose(L_21);
87 // copy L_22 into A's lower left hand corner
89 block_zero_based(A, block, block, block_subset, block_subset)
90 = block_zero_based(L_22, 0, 0, block_subset, block_subset);
92}
93} // namespace opencl
94} // namespace math
95} // namespace stan
96
97#endif
98#endif
const matrix_cl_view & view() const
Definition matrix_cl.hpp:70
Represents an arithmetic matrix on the OpenCL device.
Definition matrix_cl.hpp:47
The API to access the methods and values in opencl_context_base.
void check_opencl_error(const char *function, const cl::Error &e)
Throws the domain error with specifying the OpenCL error that occurred.
opencl_context_base::tuning_struct & tuning_opts() noexcept
Returns the thread block size for the Cholesky Decompositions L_11.
auto block_zero_based(T &&a, int start_row, int start_col, int rows, int cols)
Block of a kernel generator expression.
auto transpose(Arg &&a)
Transposes a kernel generator expression.
const kernel_cl< in_out_buffer, int > cholesky_decompose("cholesky_decompose", {indexing_helpers, cholesky_decompose_kernel_code})
See the docs for cholesky_decompose() .
plain_type_t< T > tri_inverse(const T &A)
Computes the inverse of a triangular matrix.
void cholesky_decompose(matrix_cl< T > &A)
Performs an in-place computation of the lower-triangular Cholesky factor (i.e., matrix square root) o...
matrix_cl< T > multiply_transpose(const matrix_cl< T > &A)
Computes the product of a square OpenCL matrix with its transpose.
static constexpr double e()
Return the base of the natural logarithm.
Definition constants.hpp:20
auto block(T_x &&x, size_t i, size_t j, size_t nrows, size_t ncols)
Return a nrows x ncols submatrix starting at (i-1, j-1).
Definition block.hpp:24
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Definition fvar.hpp:9