template<typename Derived, typename T, typename Operation>
class stan::math::colwise_reduction< Derived, T, Operation >
Represents a column wise reduction in kernel generator expressions.
So as to be efficient column wise reductions are only done partially. That means instead of 1 row kernel output will have a few rows that need to be reduced to obtain final result (actually it is 1 result per work group run - roughly 16 times the number of compute units on the OpenCL device). This can be done in a separate kernel or after copying to CPU. Also column wise reductions can not be used as arguments to other operations - they can only be evaluated.
- Template Parameters
-
Derived | derived type |
T | type of first argument |
Operation | type with member function generate that accepts two variable names and returns OpenCL source code for reduction operation_cl |
Definition at line 61 of file colwise_reduction.hpp.
|
| colwise_reduction (T &&a, const std::string &init) |
| Constructor.
|
|
template<typename T_result > |
kernel_parts | get_whole_kernel_parts (std::unordered_map< const void *, const char * > &generated, std::unordered_map< const void *, const char * > &generated_all, name_generator &ng, const std::string &row_index_name, const std::string &col_index_name, const T_result &result) const |
| Generates kernel code for assigning this expression into result expression.
|
|
kernel_parts | generate (const std::string &row_index_name, const std::string &col_index_name, const bool view_handled, const std::string &var_name_arg) const |
| Generates kernel code for this and nested expressions.
|
|
int | rows () const |
| Number of rows of a matrix that would be the result of evaluating this expression.
|
|
int | thread_rows () const |
| Number of rows threads need to be launched for.
|
|
std::pair< int, int > | extreme_diagonals () const |
| Determine indices of extreme sub- and superdiagonals written.
|
|
int | cols () const |
| Number of columns of a matrix that would be the result of evaluating this expression.
|
|
const auto & | get_arg () const |
| Returns an argument to this operation.
|
|
matrix_cl< std::remove_reference_t< T >::Scalar > | eval () const |
| Evaluates the expression.
|
|
void | evaluate_into (T_lhs &lhs) const |
| Evaluates this expression into given left-hand-side expression.
|
|
std::string | get_kernel_source_for_evaluating_into (const T_lhs &lhs) const |
| Generates kernel source for evaluating this expression into given left-hand-side expression.
|
|
kernel_parts | get_kernel_parts (std::unordered_map< const void *, const char * > &generated, std::unordered_map< const void *, const char * > &generated_all, name_generator &name_gen, const std::string &row_index_name, const std::string &col_index_name, bool view_handled) const |
| Generates kernel code for this and nested expressions.
|
|
void | modify_argument_indices (std::string &row_index_name, std::string &col_index_name) const |
| Does nothing.
|
|
void | set_args (std::unordered_map< const void *, const char * > &generated, std::unordered_map< const void *, const char * > &generated_all, cl::Kernel &kernel, int &arg_num) const |
| Sets kernel arguments for nested expressions.
|
|
void | add_read_event (cl::Event &e) const |
| Adds read event to any matrices used by nested expressions.
|
|
void | get_write_events (std::vector< cl::Event > &events) const |
| Adds all write events on any matrices used by nested expressions to a list.
|
|
int | size () const |
| Size of a matrix that would be the result of evaluating this expression.
|
|
int | thread_cols () const |
| Number of columns threads need to be launched for.
|
|
matrix_cl_view | view () const |
| View of a matrix that would be the result of evaluating this expression.
|
|
void | get_unique_matrix_accesses (std::vector< int > &uids, std::unordered_map< const void *, int > &id_map, int &next_id) const |
| Collects data that is needed beside types to uniqly identify a kernel generator expression.
|
|