1#ifndef STAN_MATH_OPENCL_KERNELS_DIAGONAL_INVERSE_LOWER_TRI_HPP
2#define STAN_MATH_OPENCL_KERNELS_DIAGONAL_INVERSE_LOWER_TRI_HPP
11namespace opencl_kernels {
13static constexpr const char* diag_inv_kernel_code =
STRINGIFY(
43 __kernel
void diag_inv(__global
double* A, __global
double* tmp_inv,
45 int index = get_local_id(0);
46 int group = get_group_id(0);
47 int block_size = get_local_size(0);
48 int A_offset = group * block_size;
50 int tmp_offset = group * block_size * block_size + index * block_size;
56 for (
int k = 0; k < block_size; k++) {
57 double diag_ele = A(A_offset + k, A_offset + k);
64 tmp_inv[tmp_offset + k] /= diag_ele;
66 barrier(CLK_LOCAL_MEM_FENCE);
69 for (
int i =
max(k + 1, index); i < block_size; i++) {
70 double factor = A(A_offset + i, A_offset + k);
71 tmp_inv[tmp_offset + i] -= tmp_inv[tmp_offset + k] * factor;
73 barrier(CLK_LOCAL_MEM_FENCE);
75 for (
int j = 0; j < block_size; j++) {
77 A(A_offset + j, A_offset + index) = tmp_inv[tmp_offset + j];
90 {{
"THREAD_BLOCK_SIZE", 32}});
const kernel_cl< in_out_buffer, in_out_buffer, int > diag_inv("diag_inv", {indexing_helpers, diag_inv_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}})
See the docs for add() .
int64_t rows(const T_x &x)
Returns the number of rows in the specified kernel generator expression.
static const std::string indexing_helpers
Defines helper macros for common matrix indexing operations.
auto max(T1 x, T2 y)
Returns the maximum value of the two specified scalar arguments.
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Creates functor for kernels.