1#ifndef STAN_MATH_OPENCL_MATRIX_CL_HPP 
    2#define STAN_MATH_OPENCL_MATRIX_CL_HPP 
   14#include <CL/opencl.hpp> 
   15#include <tbb/concurrent_vector.h> 
   61  template <matrix_cl_view matrix_view = matrix_cl_view::Entire>
 
  111  inline const tbb::concurrent_vector<cl::Event>& 
read_events()
 const {
 
  128    this->read_events_.push_back(new_event);
 
  136    this->write_events_.push_back(new_event);
 
  144    this->read_events_.push_back(new_event);
 
  145    this->write_events_.push_back(new_event);
 
  190  matrix_cl(
const cl::Buffer& A, 
const int R, 
const int C,
 
  204                            sizeof(T) * this->size());
 
  246  template <
typename Vec, require_std_vector_vt<is_eigen, Vec>* = 
nullptr,
 
  247            require_st_same<Vec, T>* = 
nullptr>
 
  250    if (this->
size() == 0) {
 
  255    buffer_cl_ = cl::Buffer(ctx, CL_MEM_READ_WRITE, 
sizeof(T) * 
size());
 
  256    for (
int i = 0, offset_size = 0; i < 
cols_; i++, offset_size += 
rows_) {
 
  258                       "matrix_cl rows", 
rows_);
 
  259      cl::Event write_event;
 
  260      queue.enqueueWriteBuffer(
 
  263          sizeof(T) * offset_size, 
sizeof(T) * 
rows_, A[i].data(), 
nullptr,
 
  267  } 
catch (
const cl::Error& 
e) {
 
  287    if (this->
size() == 0) {
 
  292      int flags = CL_MEM_READ_WRITE;
 
  294        flags |= CL_MEM_ALLOC_HOST_PTR;
 
  297    } 
catch (
const cl::Error& 
e) {
 
  319  template <
typename Mat, require_eigen_t<Mat>* = 
nullptr,
 
  320            require_vt_same<Mat, T>* = 
nullptr>
 
  324    using Mat_type = std::decay_t<ref_type_for_opencl_t<Mat>>;
 
  325    if (this->
size() == 0) {
 
  329        std::is_same<std::decay_t<Mat>, Mat_type>::value
 
  330        && (std::is_lvalue_reference<Mat>::value
 
  349  template <
typename Scal,
 
  354    initialize_buffer<std::is_rvalue_reference<Scal&&>::value>(
 
  355        const_cast<const std::decay_t<Scal>*
>(&A));
 
  372  template <
typename Vec, require_std_vector_t<Vec>* = 
nullptr,
 
  373            require_vt_same<Vec, T>* = 
nullptr>
 
  394  template <
typename Vec, require_std_vector_t<Vec>* = 
nullptr,
 
  395            require_vt_same<Vec, T>* = 
nullptr>
 
  399    initialize_buffer_no_heap_if<std::is_lvalue_reference<Vec>::value>(A);
 
  417  template <
typename U, require_same_t<T, U>* = 
nullptr>
 
  418  explicit matrix_cl(
const U* A, 
const int& R, 
const int& C,
 
  431  template <
typename Expr,
 
  444    buffer_cl_ = std::move(a.buffer_cl_);
 
  463                              sizeof(T) * a.
size());
 
  478  template <
typename Expr,
 
  508    if (this->
size() == 0) {
 
  511    cl::Event zero_event;
 
  512    const std::size_t write_events_size = this->
write_events().size();
 
  513    const std::size_t read_events_size = this->
read_events().size();
 
  514    const std::size_t read_write_size = write_events_size + read_events_size;
 
  518    for (std::size_t i = 0; i < read_events_size; ++i) {
 
  521    for (std::size_t i = read_events_size, j = 0; j < write_events_size;
 
  527                                               sizeof(T) * this->
size(),
 
  529    } 
catch (
const cl::Error& 
e) {
 
  550  template <
bool in_order = false>
 
  552    cl::Event transfer_event;
 
  553    if (this->
size() == 0) {
 
  554      return transfer_event;
 
  559      buffer_cl_ = cl::Buffer(ctx, CL_MEM_READ_WRITE, 
sizeof(T) * 
size());
 
  562                               sizeof(T) * 
size(), A, 
nullptr, &transfer_event);
 
  564    } 
catch (
const cl::Error& 
e) {
 
  567    return transfer_event;
 
  570  template <
bool in_order = false>
 
  572    cl::Event transfer_event;
 
  573    if (this->
size() == 0) {
 
  574      return transfer_event;
 
  580        constexpr auto copy_or_share
 
  581            = CL_MEM_COPY_HOST_PTR * INTEGRATED_OPENCL
 
  582              | (CL_MEM_USE_HOST_PTR * !INTEGRATED_OPENCL);
 
  584            = cl::Buffer(ctx, CL_MEM_READ_WRITE | copy_or_share,
 
  585                         sizeof(T) * 
size(), A);  
 
  587        buffer_cl_ = cl::Buffer(ctx, CL_MEM_READ_WRITE, 
sizeof(T) * 
size());
 
  588        queue.enqueueWriteBuffer(
 
  590            sizeof(T) * 
size(), A, 
nullptr, &transfer_event);
 
  593    } 
catch (
const cl::Error& 
e) {
 
  596    return transfer_event;
 
  613  template <
bool No_heap, 
typename U, std::enable_if_t<No_heap>* = 
nullptr>
 
  615    if (this->
size() == 0) {
 
  622  template <
bool No_heap, 
typename U, std::enable_if_t<!No_heap>* = 
nullptr>
 
  624    using U_val = std::decay_t<ref_type_for_opencl_t<U>>;
 
  625    if (this->
size() == 0) {
 
  628    auto* obj_heap = 
new U_val(std::move(obj));
 
  632        buffer_cl_.setDestructorCallback(&delete_it_destructor<U_val>,
 
  635        e.setCallback(CL_COMPLETE, &delete_it_event<U_val>, obj_heap);
 
  650    cl::Event cstr_event;
 
  651    std::vector<cl::Event>* dep_events = 
new std::vector<cl::Event>(
 
  655                                               A.
size() * 
sizeof(T), dep_events,
 
  661        cstr_event.setCallback(
 
  666    } 
catch (
const cl::Error& 
e) {
 
  682  template <
typename U>
 
  684    delete static_cast<U*
>(container);
 
  693  template <
typename U>
 
  695    delete static_cast<U*
>(container);
 
A variant of matrix_cl that schedules its destructor to be called, so it can be used on the AD stack.
 
Non-templated base class for matrix_cl simplifies checking if something is matrix_cl.
 
matrix_cl(Vec &&A, matrix_cl_view partial_view=matrix_cl_view::Entire)
Construct a matrix_cl of size Nx1 from std::vector.
 
void initialize_buffer_cl(const matrix_cl< T > &A)
Initializes the OpenCL buffer of this matrix by copying the data from given matrix_cl.
 
matrix_cl< T > & operator=(matrix_cl< T > &&a)
Move assignment operator.
 
matrix_cl(const int rows, const int cols, matrix_cl_view partial_view=matrix_cl_view::Entire)
Constructor for the matrix_cl that only allocates the buffer on the OpenCL device.
 
const cl::Buffer & buffer() const
 
const tbb::concurrent_vector< cl::Event > read_write_events() const
Get the events from the event stacks.
 
void wait_for_write_events() const
Waits for the write events and clears the read event stack.
 
void initialize_buffer_no_heap_if(U &&obj)
Initializes the OpenCL buffer of this matrix by copying the data from given object.
 
void add_read_event(cl::Event new_event) const
Add an event to the read event stack.
 
void wait_for_read_write_events() const
Waits for read and write events to finish and clears the read, write, and read/write event stacks.
 
void add_read_write_event(cl::Event new_event) const
Add an event to the read/write event stack.
 
void setZero()
Set the values of a matrix_cl to zero.
 
void wait_for_read_events() const
Waits for the read events and clears the read event stack.
 
static void delete_it_destructor(cl_mem buff, void *container)
Deletes the container.
 
~matrix_cl()
Destructor waits for write events to prevent any kernels from writing memory that has already been re...
 
matrix_cl(const cl::Buffer &A, const int R, const int C, matrix_cl_view partial_view=matrix_cl_view::Entire)
Construct a matrix_cl<T> from an existing cl::Buffer object.
 
matrix_cl(Vec &&A)
Constructor for the matrix_cl that creates a copy of a std::vector of Eigen matrices on the OpenCL de...
 
tbb::concurrent_vector< cl::Event > read_events_
 
tbb::concurrent_vector< cl::Event > write_events_
 
matrix_cl(const U *A, const int &R, const int &C, matrix_cl_view partial_view=matrix_cl_view::Entire)
Construct from array with given rows and columns.
 
matrix_cl(Vec &&A, const int &R, const int &C, matrix_cl_view partial_view=matrix_cl_view::Entire)
Construct from std::vector with given rows and columns.
 
void view(const matrix_cl_view &view)
 
const matrix_cl_view & view() const
 
void clear_write_events() const
Clear the write events from the event stacks.
 
cl::Event initialize_buffer(T *A)
 
const matrix_cl< T > & eval() const &
Evaluates this.
 
static void delete_it_event(cl_event e, cl_int status, void *container)
Deletes the container.
 
const tbb::concurrent_vector< cl::Event > & read_events() const
Get the events from the event stacks.
 
void add_write_event(cl::Event new_event) const
Add an event to the write event stack.
 
matrix_cl(Scal &&A, matrix_cl_view partial_view=matrix_cl_view::Diagonal)
Constructor for the matrix_cl that creates a copy of a scalar on the OpenCL device.
 
matrix_cl< T > & operator=(const matrix_cl< T > &a)
Copy assignment operator.
 
matrix_cl(Mat &&A, matrix_cl_view partial_view=matrix_cl_view::Entire)
Constructor for the matrix_cl that creates a copy of the Eigen matrix or Eigen expression on the Open...
 
matrix_cl(const matrix_cl< T > &A)
Copy constructor.
 
matrix_cl(matrix_cl< T > &&A)
Move constructor.
 
void clear_read_write_events() const
Clear the write events from the event stacks.
 
cl::Event initialize_buffer(const T *A)
Initializes the OpenCL buffer of this matrix by copying the data from given buffer.
 
const tbb::concurrent_vector< cl::Event > & write_events() const
Get the events from the event stacks.
 
void clear_read_events() const
Clear the read events from the event stacks.
 
Represents an arithmetic matrix on the OpenCL device.
 
The API to access the methods and values in opencl_context_base.
 
void check_opencl_error(const char *function, const cl::Error &e)
Throws the domain error with specifying the OpenCL error that occurred.
 
void zeros_strict_tri()
Stores zeros in the strict's triangular part (excluding the diagonal) of a matrix on the OpenCL devic...
 
require_not_t< is_matrix_cl< std::decay_t< T > > > require_not_matrix_cl_t
Require type does not satisfy is_matrix_cl.
 
bool & in_order() noexcept
Return a bool representing whether the write to the OpenCL device are blocking.
 
cl::Context & context() noexcept
Returns the reference to the OpenCL context.
 
std::vector< cl::Device > & device() noexcept
Returns a vector containing the OpenCL device used to create the context.
 
cl::CommandQueue & queue() noexcept
Returns the reference to the active OpenCL command queue for the device.
 
require_all_t< is_kernel_expression_and_not_scalar< Types >... > require_all_kernel_expressions_and_none_scalar_t
Enables a template if all given types are non-scalar types that are a valid kernel generator expressi...
 
require_t< std::is_same< std::decay_t< T >, std::decay_t< S > > > require_same_t
Require types T and S satisfies std::is_same.
 
static constexpr double e()
Return the base of the natural logarithm.
 
auto vec_concat(const Vec &v1, const Args &... args)
Get the event stack from a vector of events and other arguments.
 
void check_size_match(const char *function, const char *name_i, T_size1 i, const char *name_j, T_size2 j)
Check if the provided sizes match.
 
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
 
Check if a type is an Eigen::Map with contiguous stride.