Automatic Differentiation
 
Loading...
Searching...
No Matches
indexing_rev.hpp
Go to the documentation of this file.
1#ifndef STAN_MATH_OPENCL_KERNELS_INDEXING_REV_HPP
2#define STAN_MATH_OPENCL_KERNELS_INDEXING_REV_HPP
3#ifdef STAN_OPENCL
4
9#include <string>
10
11namespace stan {
12namespace math {
13namespace opencl_kernels {
14
15// \cond
16static constexpr const char* indexing_rev_global_atomic_kernel_code = STRINGIFY(
17 // \endcond
33 __kernel void indexing_rev(__global double* adj, const __global int* index,
34 const __global double* res, int size) {
35 const int gid = get_global_id(0);
36 const int gsize = get_global_size(0);
37 for (int i = gid; i < size; i += gsize) {
38 atomic_add_double(adj + index[i], res[i]);
39 }
40 }
41 // \cond
42);
43// \endcond
44
48const kernel_cl<in_out_buffer, in_buffer, in_buffer, int>
50 {atomic_add_double_device_function,
51 indexing_rev_global_atomic_kernel_code});
52
53// \cond
54static constexpr const char* indexing_rev_local_atomic_kernel_code = STRINGIFY(
55 // \endcond
71 __kernel void indexing_rev(__global double* adj, const __global int* index,
72 const __global double* res,
73 __local double* adj_loc, int index_size,
74 int adj_size) {
75 const int gid = get_global_id(0);
76 const int lid = get_local_id(0);
77 const int gsize = get_global_size(0);
78 const int lsize = get_local_size(0);
79 for (int i = lid; i < adj_size; i += lsize) {
80 adj_loc[i] = 0;
81 }
82 barrier(CLK_LOCAL_MEM_FENCE);
83 for (int i = gid; i < index_size; i += gsize) {
84 local_atomic_add_double(adj_loc + index[i], res[i]);
85 }
86 barrier(CLK_LOCAL_MEM_FENCE);
87 for (int i = lid; i < adj_size; i += lsize) {
88 atomic_add_double(adj + i, adj_loc[i]);
89 }
90 }
91 // \cond
92);
93// \endcond
94
98const kernel_cl<in_out_buffer, in_buffer, in_buffer, cl::LocalSpaceArg, int,
99 int>
101 {atomic_add_double_device_function,
102 indexing_rev_local_atomic_kernel_code});
103
104// \cond
105static constexpr const char* indexing_rev_local_independent_kernel_code
106 = STRINGIFY(
107 // \endcond
126 __kernel void indexing_rev(
127 __global double* adj, const __global int* index,
128 const __global double* res, __local double* adj_loc, int index_size,
129 int adj_size) {
130 const int gid = get_global_id(0);
131 const int lid = get_local_id(0);
132 const int gsize = get_global_size(0);
133 const int lsize = get_local_size(0);
134 for (int i = lid; i < adj_size * lsize; i += lsize) {
135 adj_loc[i] = 0;
136 }
137 barrier(CLK_LOCAL_MEM_FENCE);
138 for (int i = gid; i < index_size; i += gsize) {
139 adj_loc[index[i] + lid * adj_size] += res[i];
140 }
141 barrier(CLK_LOCAL_MEM_FENCE);
142 for (int i = lid; i < adj_size; i += lsize) {
143 double p = adj_loc[i + adj_size];
144 for (int j = 2; j < lsize; j++) {
145 p += adj_loc[i + j * adj_size];
146 }
147 adj_loc[i] += p;
148 }
149 barrier(CLK_LOCAL_MEM_FENCE);
150 for (int i = lid; i < adj_size; i += lsize) {
151 atomic_add_double(adj + i, adj_loc[i]);
152 }
153 }
154 // \cond
155 ); // NOLINT(whitespace/parens)
156// \endcond
157
161const kernel_cl<in_out_buffer, in_buffer, in_buffer, cl::LocalSpaceArg, int,
162 int>
164 "indexing_rev", {atomic_add_double_device_function,
165 indexing_rev_local_independent_kernel_code});
166} // namespace opencl_kernels
167} // namespace math
168} // namespace stan
169#endif
170#endif
void atomic_add_double(__global double *val, double delta)
Atomically add to a double value.
const kernel_cl< in_out_buffer, in_buffer, in_buffer, cl::LocalSpaceArg, int, int > indexing_rev_local_independent("indexing_rev", {atomic_add_double_device_function, indexing_rev_local_independent_kernel_code})
See the docs for add_batch() .
const kernel_cl< in_out_buffer, in_buffer, in_buffer, cl::LocalSpaceArg, int, int > indexing_rev_local_atomic("indexing_rev", {atomic_add_double_device_function, indexing_rev_local_atomic_kernel_code})
See the docs for add_batch() .
void local_atomic_add_double(__local double *val, double delta)
Atomically add to a local double value.
__kernel void indexing_rev(__global double *adj, const __global int *index, const __global double *res, int size)
Increments adjoint of the indexing operation argument given the indices and adjoints of the indexing ...
const kernel_cl< in_out_buffer, in_buffer, in_buffer, int > indexing_rev_global_atomic("indexing_rev", {atomic_add_double_device_function, indexing_rev_global_atomic_kernel_code})
See the docs for add_batch() .
int64_t size(const T &m)
Returns the size (number of the elements) of a matrix_cl or var_value<matrix_cl<T>>.
Definition size.hpp:19
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
#define STRINGIFY(...)
Definition stringify.hpp:9