1#ifndef STAN_MATH_OPENCL_KERNEL_CL_HPP
2#define STAN_MATH_OPENCL_KERNEL_CL_HPP
13#include <CL/opencl.hpp>
22namespace opencl_kernels {
33template <
typename T, require_not_matrix_cl_t<T>* =
nullptr>
45template <
typename K, require_matrix_cl_t<K>* =
nullptr>
57template <
typename T, require_not_matrix_cl_t<T>* =
nullptr>
60template <
typename T,
typename K, require_matrix_cl_t<K>* =
nullptr,
61 require_same_t<T, in_buffer>* =
nullptr>
65template <
typename T,
typename K, require_matrix_cl_t<K>* =
nullptr,
66 require_same_t<T, out_buffer>* =
nullptr>
70template <
typename T,
typename K, require_matrix_cl_t<K>* =
nullptr,
71 require_same_t<T, in_out_buffer>* =
nullptr>
73 m.add_read_write_event(
e);
91template <
typename T, require_same_t<T, cl::Event>* =
nullptr>
94template <
typename Arg,
typename... Args,
typename CallArg,
98 assign_event<Arg>(new_event, m);
111template <
typename T, require_not_matrix_cl_t<T>* =
nullptr>
113 return tbb::concurrent_vector<cl::Event>{};
115template <
typename T,
typename K, require_matrix_cl_t<K>* =
nullptr,
116 require_same_t<T, in_buffer>* =
nullptr>
118 return m.write_events();
120template <
typename T,
typename K, require_matrix_cl_t<K>* =
nullptr,
121 require_any_same_t<T, out_buffer, in_out_buffer>* =
nullptr>
123 static_assert(!std::is_const<K>::value,
"Can not write to const matrix_cl!");
124 return m.read_write_events();
137 const char* name,
const std::vector<std::string>& sources,
138 const std::unordered_map<std::string, int>& options) {
140 for (
auto& it : options) {
141 if (base_opts[it.first] > it.second) {
142 base_opts[it.first] = it.second;
145 std::string kernel_opts =
"";
146 for (
auto&& comp_opts : base_opts) {
147 kernel_opts += std::string(
" -D") + comp_opts.first +
"="
148 + std::to_string(comp_opts.second);
154 return cl::Kernel(program, name);
155 }
catch (
const cl::Error&
e) {
157 if (
e.err() == -11) {
158 std::string buildlog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(
160 system_error(
"compile_kernel", name,
e.err(), buildlog.c_str());
173template <
typename... Args>
178 std::unordered_map<std::string, int>
opts_;
189 kernel_cl(
const char* name, std::vector<std::string> sources,
190 std::unordered_map<std::string, int> options = {})
201 template <
typename... CallArgs>
202 auto operator()(cl::NDRange global_thread_size, CallArgs&&... args)
const {
209 internal::select_events<Args>(args)...),
211 cl::KernelFunctor<internal::to_const_buffer_t<Args>&...> kernel_functor(
228 template <
typename... CallArgs>
229 auto operator()(cl::NDRange global_thread_size, cl::NDRange thread_block_size,
230 CallArgs&&... args)
const {
237 internal::select_events<Args>(args)...),
238 global_thread_size, thread_block_size);
239 cl::KernelFunctor<internal::to_const_buffer_t<Args>&...> kernel_functor(
253 return std::min(
opts_.at(option_name),
void register_kernel_cache(cl::Kernel *cache)
Registers a cached kernel.
The API to access the methods and values in opencl_context_base.
void check_opencl_error(const char *function, const cl::Error &e)
Throws the domain error with specifying the OpenCL error that occurred.
auto operator()(cl::NDRange global_thread_size, cl::NDRange thread_block_size, CallArgs &&... args) const
Executes a kernel.
kernel_cl(const char *name, std::vector< std::string > sources, std::unordered_map< std::string, int > options={})
Creates functor for kernels that only need access to defining the global work size.
auto operator()(cl::NDRange global_thread_size, CallArgs &&... args) const
Executes a kernel.
void assign_event(const cl::Event &e, const T &)
Assigns the event to a matrix_cl.
int get_option(const std::string option_name) const
Retrieves an option used for compiling the kernel.
void assign_events(const T &)
Adds the event to any matrix_cls in the arguments depending on whether they are in_buffer,...
const T & get_kernel_args(const T &t)
Extracts the kernel's arguments, used in the global and local kernel constructor.
tbb::concurrent_vector< cl::Event > select_events(const T &m)
Select events from kernel arguments.
auto compile_kernel(const char *name, const std::vector< std::string > &sources, const std::unordered_map< std::string, int > &options)
Compile an OpenCL kernel.
opencl_context_base::map_base_opts & base_opts() noexcept
Returns a copy of the map of kernel defines.
cl::Context & context() noexcept
Returns the reference to the OpenCL context.
std::vector< cl::Device > & device() noexcept
Returns a vector containing the OpenCL device used to create the context.
cl::CommandQueue & queue() noexcept
Returns the reference to the active OpenCL command queue for the device.
static constexpr double e()
Return the base of the natural logarithm.
auto vec_concat(const Vec &v1, const Args &... args)
Get the event stack from a vector of events and other arguments.
void system_error(const char *function, const char *name, const int &y, const char *msg1, const char *msg2)
Throw a system error with a consistently formatted message.
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
std::vector< std::string > sources_
std::unordered_map< std::string, int > opts_
Creates functor for kernels.