Automatic Differentiation
 
Loading...
Searching...
No Matches
kernel_cl.hpp
Go to the documentation of this file.
1#ifndef STAN_MATH_OPENCL_KERNEL_CL_HPP
2#define STAN_MATH_OPENCL_KERNEL_CL_HPP
3#ifdef STAN_OPENCL
4
13#include <CL/opencl.hpp>
14#include <algorithm>
15#include <map>
16#include <string>
17#include <vector>
18#include <utility>
19
20namespace stan {
21namespace math {
22namespace opencl_kernels {
23namespace internal {
24
33template <typename T, require_not_matrix_cl_t<T>* = nullptr>
34inline const T& get_kernel_args(const T& t) {
35 return t;
36}
37
45template <typename K, require_matrix_cl_t<K>* = nullptr>
46inline const cl::Buffer& get_kernel_args(const K& m) {
47 return m.buffer();
48}
49
57template <typename T, require_not_matrix_cl_t<T>* = nullptr>
58inline void assign_event(const cl::Event& e, const T&) {}
59
60template <typename T, typename K, require_matrix_cl_t<K>* = nullptr,
61 require_same_t<T, in_buffer>* = nullptr>
62inline void assign_event(const cl::Event& e, const K& m) {
63 m.add_read_event(e);
64}
65template <typename T, typename K, require_matrix_cl_t<K>* = nullptr,
66 require_same_t<T, out_buffer>* = nullptr>
67inline void assign_event(const cl::Event& e, K& m) {
68 m.add_write_event(e);
69}
70template <typename T, typename K, require_matrix_cl_t<K>* = nullptr,
71 require_same_t<T, in_out_buffer>* = nullptr>
72inline void assign_event(const cl::Event& e, K& m) {
73 m.add_read_write_event(e);
74}
75
91template <typename T, require_same_t<T, cl::Event>* = nullptr>
92inline void assign_events(const T&) {}
93
94template <typename Arg, typename... Args, typename CallArg,
95 typename... CallArgs>
96inline void assign_events(const cl::Event& new_event, CallArg& m,
97 CallArgs&... args) {
98 assign_event<Arg>(new_event, m);
99 assign_events<Args...>(new_event, args...);
100}
101
111template <typename T, require_not_matrix_cl_t<T>* = nullptr>
112inline tbb::concurrent_vector<cl::Event> select_events(const T& m) {
113 return tbb::concurrent_vector<cl::Event>{};
114}
115template <typename T, typename K, require_matrix_cl_t<K>* = nullptr,
116 require_same_t<T, in_buffer>* = nullptr>
117inline const tbb::concurrent_vector<cl::Event>& select_events(const K& m) {
118 return m.write_events();
119}
120template <typename T, typename K, require_matrix_cl_t<K>* = nullptr,
121 require_any_same_t<T, out_buffer, in_out_buffer>* = nullptr>
122inline tbb::concurrent_vector<cl::Event> select_events(K& m) {
123 static_assert(!std::is_const<K>::value, "Can not write to const matrix_cl!");
124 return m.read_write_events();
125}
126
127} // namespace internal
128
136inline auto compile_kernel(
137 const char* name, const std::vector<std::string>& sources,
138 const std::unordered_map<std::string, int>& options) {
139 auto base_opts = opencl_context.base_opts();
140 for (auto& it : options) {
141 if (base_opts[it.first] > it.second) {
142 base_opts[it.first] = it.second;
143 }
144 }
145 std::string kernel_opts = "";
146 for (auto&& comp_opts : base_opts) {
147 kernel_opts += std::string(" -D") + comp_opts.first + "="
148 + std::to_string(comp_opts.second);
149 }
150 cl::Program program(opencl_context.context(), sources);
151 try {
152 program.build({opencl_context.device()}, kernel_opts.c_str());
153
154 return cl::Kernel(program, name);
155 } catch (const cl::Error& e) {
156 // in case of CL_BUILD_PROGRAM_FAILURE, print the build error
157 if (e.err() == -11) {
158 std::string buildlog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(
160 system_error("compile_kernel", name, e.err(), buildlog.c_str());
161 } else {
162 check_opencl_error(name, e);
163 }
164 }
165 return cl::Kernel(); // never reached because check_opencl_error throws
166}
167
173template <typename... Args>
174struct kernel_cl {
175 private:
176 const char* name_;
177 std::vector<std::string> sources_;
178 std::unordered_map<std::string, int> opts_;
179 mutable cl::Kernel kernel_;
180
181 public:
189 kernel_cl(const char* name, std::vector<std::string> sources,
190 std::unordered_map<std::string, int> options = {})
191 : name_(name), sources_(std::move(sources)), opts_(std::move(options)) {}
192
201 template <typename... CallArgs>
202 auto operator()(cl::NDRange global_thread_size, CallArgs&&... args) const {
203 if (kernel_() == NULL) {
206 }
207 cl::EnqueueArgs eargs(opencl_context.queue(),
208 vec_concat(std::vector<cl::Event>{},
209 internal::select_events<Args>(args)...),
210 global_thread_size);
211 cl::KernelFunctor<internal::to_const_buffer_t<Args>&...> kernel_functor(
212 kernel_);
213 cl::Event kern_event
214 = kernel_functor(eargs, internal::get_kernel_args(args)...);
215 internal::assign_events<Args...>(kern_event, args...);
216 return kern_event;
217 }
218
228 template <typename... CallArgs>
229 auto operator()(cl::NDRange global_thread_size, cl::NDRange thread_block_size,
230 CallArgs&&... args) const {
231 if (kernel_() == NULL) {
234 }
235 cl::EnqueueArgs eargs(opencl_context.queue(),
236 vec_concat(std::vector<cl::Event>{},
237 internal::select_events<Args>(args)...),
238 global_thread_size, thread_block_size);
239 cl::KernelFunctor<internal::to_const_buffer_t<Args>&...> kernel_functor(
240 kernel_);
241 cl::Event kern_event
242 = kernel_functor(eargs, internal::get_kernel_args(args)...);
243 internal::assign_events<Args...>(kern_event, args...);
244 return kern_event;
245 }
246
252 int get_option(const std::string option_name) const {
253 return std::min(opts_.at(option_name),
254 opencl_context.base_opts().at(option_name));
255 }
256};
257
258} // namespace opencl_kernels
259} // namespace math
260} // namespace stan
261
262#endif
263#endif
void register_kernel_cache(cl::Kernel *cache)
Registers a cached kernel.
The API to access the methods and values in opencl_context_base.
void check_opencl_error(const char *function, const cl::Error &e)
Throws the domain error with specifying the OpenCL error that occurred.
auto operator()(cl::NDRange global_thread_size, cl::NDRange thread_block_size, CallArgs &&... args) const
Executes a kernel.
kernel_cl(const char *name, std::vector< std::string > sources, std::unordered_map< std::string, int > options={})
Creates functor for kernels that only need access to defining the global work size.
auto operator()(cl::NDRange global_thread_size, CallArgs &&... args) const
Executes a kernel.
void assign_event(const cl::Event &e, const T &)
Assigns the event to a matrix_cl.
Definition kernel_cl.hpp:58
int get_option(const std::string option_name) const
Retrieves an option used for compiling the kernel.
void assign_events(const T &)
Adds the event to any matrix_cls in the arguments depending on whether they are in_buffer,...
Definition kernel_cl.hpp:92
const T & get_kernel_args(const T &t)
Extracts the kernel's arguments, used in the global and local kernel constructor.
Definition kernel_cl.hpp:34
tbb::concurrent_vector< cl::Event > select_events(const T &m)
Select events from kernel arguments.
auto compile_kernel(const char *name, const std::vector< std::string > &sources, const std::unordered_map< std::string, int > &options)
Compile an OpenCL kernel.
opencl_context_base::map_base_opts & base_opts() noexcept
Returns a copy of the map of kernel defines.
cl::Context & context() noexcept
Returns the reference to the OpenCL context.
std::vector< cl::Device > & device() noexcept
Returns a vector containing the OpenCL device used to create the context.
cl::CommandQueue & queue() noexcept
Returns the reference to the active OpenCL command queue for the device.
static constexpr double e()
Return the base of the natural logarithm.
Definition constants.hpp:20
auto vec_concat(const Vec &v1, const Args &... args)
Get the event stack from a vector of events and other arguments.
void system_error(const char *function, const char *name, const int &y, const char *msg1, const char *msg2)
Throw a system error with a consistently formatted message.
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
STL namespace.
std::vector< std::string > sources_
std::unordered_map< std::string, int > opts_
Creates functor for kernels.