1#ifndef STAN_MATH_OPENCL_KERNEL_GENERATOR_OPENCL_CODE_HPP
2#define STAN_MATH_OPENCL_KERNEL_GENERATOR_OPENCL_CODE_HPP
31template <
typename T_code,
typename T_scalar>
33 :
public operation_cl<opencl_code_output<T_code, T_scalar>, T_scalar,
55 auto&& code_copy = this->
template get_arg<0>().deep_copy();
69 const std::string& col_index_name,
70 const bool view_handled,
71 const std::string& dummy_var_name_code)
const {
83 return {-this->
rows() + 1, this->
cols() - 1};
88template <
const char* Code,
typename... T_arguments>
90 :
public operation_cl<opencl_code_impl<Code, T_arguments...>, double,
97 = std::tuple<typename std::pair<const char*, T_arguments>::first_type...>;
98 using base::var_name_;
108 :
base(
std::forward<T_arguments>(arguments)...), names_(names) {}
120 const std::string& row_index_name,
const std::string& col_index_name,
121 const bool view_handled,
122 std::tuple_element_t<
123 0, std::pair<const std::string&, T_arguments>>... var_names)
const {
124 return index_apply<
sizeof...(T_arguments)>([
this](
auto... Is) {
126 std::array<std::string,
sizeof...(T_arguments)> input_renames{
128 +
" " + std::get<Is>(names_) +
" = "
129 + this->
template get_arg<Is>().var_name_ +
";\n")...};
130 res.
body = std::accumulate(input_renames.begin(), input_renames.end(),
144template <
const char* Code,
typename... T_arguments>
150 = std::tuple<typename std::pair<const char*, T_arguments>::first_type...>;
161 std::make_shared<internal::opencl_code_impl<Code, T_arguments...>>(
162 names,
std::forward<T_arguments>(arguments)...)),
185 std::unordered_map<const void*, const char*>& generated,
186 std::unordered_map<const void*, const char*>& generated_all,
188 const std::string& col_index_name,
bool view_handled)
const {
189 return impl_->get_kernel_parts(generated, generated_all, name_gen,
190 row_index_name, col_index_name,
204 auto set_args(std::unordered_map<const void*, const char*>& generated,
205 std::unordered_map<const void*, const char*>& generated_all,
206 cl::Kernel& kernel,
int& arg_num)
const {
207 return impl_->set_args(generated, generated_all, kernel, arg_num);
221 return impl_->get_write_events(events);
229 template <
int N =
sizeof...(T_arguments),
230 std::enable_if_t<(N > 0)>* =
nullptr>
232 return impl_->rows();
234 template <
int N =
sizeof...(T_arguments),
235 std::enable_if_t<(N == 0)>* =
nullptr>
245 template <
int N =
sizeof...(T_arguments),
246 std::enable_if_t<(N > 0)>* =
nullptr>
248 return impl_->cols();
250 template <
int N =
sizeof...(T_arguments),
251 std::enable_if_t<(N == 0)>* =
nullptr>
261 template <
int N =
sizeof...(T_arguments),
262 std::enable_if_t<(N > 0)>* =
nullptr>
264 return impl_->thread_rows();
266 template <
int N =
sizeof...(T_arguments),
267 std::enable_if_t<(N == 0)>* =
nullptr>
277 template <
int N =
sizeof...(T_arguments),
278 std::enable_if_t<(N > 0)>* =
nullptr>
280 return impl_->thread_cols();
282 template <
int N =
sizeof...(T_arguments),
283 std::enable_if_t<(N == 0)>* =
nullptr>
302 std::unordered_map<const void*, int>& id_map,
303 int& next_id)
const {
304 return impl_->get_unique_matrix_accesses(uids, id_map, next_id);
312 return index_apply<
sizeof...(T_arguments)>([
this](
auto... Is) {
314 = std::make_tuple(this->
impl_->template get_arg<Is>().deep_copy()...);
316 Code, std::remove_reference_t<decltype(std::get<Is>(args_copy))>...>(
317 this->
impl_->names_, std::move(std::get<Is>(args_copy))...);
325 template <
typename T_scalar>
326 inline auto output(
const char* var_name)
const {
340template <
const char* Code,
typename... T_arguments,
343 std::tuple<
typename std::pair<const char*, T_arguments>::first_type...>
345 T_arguments&&... arguments) {
kernel_parts generate(const std::string &row_index_name, const std::string &col_index_name, const bool view_handled, std::tuple_element_t< 0, std::pair< const std::string &, T_arguments > >... var_names) const
Generates kernel code for this (select) operation.
std::tuple< typename std::pair< const char *, T_arguments >::first_type... > names_tuple
opencl_code_impl(names_tuple names, T_arguments &&... arguments)
Constructor.
Unique name generator for variables used in generated kernels.
std::shared_ptr< internal::opencl_code_impl< Code, T_arguments... > > impl_
auto thread_rows() const
Number of rows threads need to be launched for.
opencl_code_(const names_tuple &names, T_arguments &&... arguments)
Constructor.
auto get_kernel_parts(std::unordered_map< const void *, const char * > &generated, std::unordered_map< const void *, const char * > &generated_all, name_generator &name_gen, const std::string &row_index_name, const std::string &col_index_name, bool view_handled) const
Generates kernel code for this and nested expressions.
opencl_code_(const opencl_code_< Code, T_arguments... > &other)
Copy constructor.
auto get_write_events(std::vector< cl::Event > &events) const
Adds all write events on any matrices used by nested expressions to a list.
auto get_unique_matrix_accesses(std::vector< int > &uids, std::unordered_map< const void *, int > &id_map, int &next_id) const
Collects data that is needed beside types to uniqly identify a kernel generator expression.
auto thread_cols() const
Number of columns threads need to be launched for.
auto output(const char *var_name) const
Get object representing output variable of ccustom code.
auto add_read_event(cl::Event &e) const
Adds read event to any matrices used by nested expressions.
auto deep_copy() const
Creates a deep copy of this expression.
auto extreme_diagonals() const
Determine indices of extreme sub- and superdiagonals written.
auto set_args(std::unordered_map< const void *, const char * > &generated, std::unordered_map< const void *, const char * > &generated_all, cl::Kernel &kernel, int &arg_num) const
Sets kernel arguments for nested expressions.
auto rows() const
Number of rows of a matrix that would be the result of evaluating this expression.
std::tuple< typename std::pair< const char *, T_arguments >::first_type... > names_tuple
auto cols() const
Number of columns of a matrix that would be the result of evaluating this expression.
Represents custom code in kernel generator expressions.
const char * custom_var_name_
kernel_parts generate(const std::string &row_index_name, const std::string &col_index_name, const bool view_handled, const std::string &dummy_var_name_code) const
Generates kernel code for this operation.
opencl_code_output(T_code code, const char *custom_var_name)
Constructor.
auto deep_copy() const
Creates a deep copy of this expression.
std::pair< int, int > extreme_diagonals() const
Determine indices of extreme sub- and superdiagonals written.
Represents output variable of custom code in kernel generator expressions.
int cols() const
Number of columns of a matrix that would be the result of evaluating this expression.
int rows() const
Number of rows of a matrix that would be the result of evaluating this expression.
Base for all kernel generator operations.
Non-templated base of operation_cl is needed for easy checking if something is a subclass of operatio...
auto opencl_code(std::tuple< typename std::pair< const char *, T_arguments >::first_type... > names, T_arguments &&... arguments)
Custom code in kernel generator expressions.
require_all_t< is_kernel_expression< Types >... > require_all_kernel_expressions_t
Enables a template if all given types are are a valid kernel generator expressions.
T_operation && as_operation_cl(T_operation &&a)
Converts any valid kernel generator expression into an operation.
static constexpr double e()
Return the base of the natural logarithm.
std::string type_str()
Determines a string name of a type.
constexpr auto index_apply(F &&f)
Calls given callable with an index sequence.
typename scalar_type< T >::type scalar_type_t
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Parts of an OpenCL kernel, generated by an expression.