1#ifndef STAN_MATH_OPENCL_KERNELS_INDEXING_REV_HPP
2#define STAN_MATH_OPENCL_KERNELS_INDEXING_REV_HPP
13namespace opencl_kernels {
16static constexpr const char* indexing_rev_global_atomic_kernel_code =
STRINGIFY(
33 __kernel
void indexing_rev(__global
double* adj,
const __global
int* index,
34 const __global
double* res,
int size) {
35 const int gid = get_global_id(0);
36 const int gsize = get_global_size(0);
37 for (
int i = gid; i <
size; i += gsize) {
48const kernel_cl<in_out_buffer, in_buffer, in_buffer, int>
50 {atomic_add_double_device_function,
51 indexing_rev_global_atomic_kernel_code});
54static constexpr const char* indexing_rev_local_atomic_kernel_code =
STRINGIFY(
71 __kernel
void indexing_rev(__global
double* adj,
const __global
int* index,
72 const __global
double* res,
73 __local
double* adj_loc,
int index_size,
75 const int gid = get_global_id(0);
76 const int lid = get_local_id(0);
77 const int gsize = get_global_size(0);
78 const int lsize = get_local_size(0);
79 for (
int i = lid; i < adj_size; i += lsize) {
82 barrier(CLK_LOCAL_MEM_FENCE);
83 for (
int i = gid; i < index_size; i += gsize) {
86 barrier(CLK_LOCAL_MEM_FENCE);
87 for (
int i = lid; i < adj_size; i += lsize) {
98const kernel_cl<in_out_buffer, in_buffer, in_buffer, cl::LocalSpaceArg, int,
101 {atomic_add_double_device_function,
102 indexing_rev_local_atomic_kernel_code});
105static constexpr const char* indexing_rev_local_independent_kernel_code
127 __global
double* adj,
const __global
int* index,
128 const __global
double* res, __local
double* adj_loc,
int index_size,
130 const int gid = get_global_id(0);
131 const int lid = get_local_id(0);
132 const int gsize = get_global_size(0);
133 const int lsize = get_local_size(0);
134 for (
int i = lid; i < adj_size * lsize; i += lsize) {
137 barrier(CLK_LOCAL_MEM_FENCE);
138 for (
int i = gid; i < index_size; i += gsize) {
139 adj_loc[index[i] + lid * adj_size] += res[i];
141 barrier(CLK_LOCAL_MEM_FENCE);
142 for (
int i = lid; i < adj_size; i += lsize) {
143 double p = adj_loc[i + adj_size];
144 for (
int j = 2; j < lsize; j++) {
145 p += adj_loc[i + j * adj_size];
149 barrier(CLK_LOCAL_MEM_FENCE);
150 for (
int i = lid; i < adj_size; i += lsize) {
161const kernel_cl<in_out_buffer, in_buffer, in_buffer, cl::LocalSpaceArg, int,
164 "indexing_rev", {atomic_add_double_device_function,
165 indexing_rev_local_independent_kernel_code});
void atomic_add_double(__global double *val, double delta)
Atomically add to a double value.
const kernel_cl< in_out_buffer, in_buffer, in_buffer, cl::LocalSpaceArg, int, int > indexing_rev_local_independent("indexing_rev", {atomic_add_double_device_function, indexing_rev_local_independent_kernel_code})
See the docs for add_batch() .
const kernel_cl< in_out_buffer, in_buffer, in_buffer, cl::LocalSpaceArg, int, int > indexing_rev_local_atomic("indexing_rev", {atomic_add_double_device_function, indexing_rev_local_atomic_kernel_code})
See the docs for add_batch() .
void local_atomic_add_double(__local double *val, double delta)
Atomically add to a local double value.
__kernel void indexing_rev(__global double *adj, const __global int *index, const __global double *res, int size)
Increments adjoint of the indexing operation argument given the indices and adjoints of the indexing ...
const kernel_cl< in_out_buffer, in_buffer, in_buffer, int > indexing_rev_global_atomic("indexing_rev", {atomic_add_double_device_function, indexing_rev_global_atomic_kernel_code})
See the docs for add_batch() .
int64_t size(const T &m)
Returns the size (number of the elements) of a matrix_cl or var_value<matrix_cl<T>>.
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...