1#ifndef STAN_MATH_OPENCL_MATRIX_CL_HPP
2#define STAN_MATH_OPENCL_MATRIX_CL_HPP
14#include <CL/opencl.hpp>
15#include <tbb/concurrent_vector.h>
61 template <matrix_cl_view matrix_view = matrix_cl_view::Entire>
111 inline const tbb::concurrent_vector<cl::Event>&
read_events()
const {
128 this->read_events_.push_back(new_event);
136 this->write_events_.push_back(new_event);
144 this->read_events_.push_back(new_event);
145 this->write_events_.push_back(new_event);
190 matrix_cl(
const cl::Buffer& A,
const int R,
const int C,
204 sizeof(T) * this->size());
246 template <
typename Vec, require_std_vector_vt<is_eigen, Vec>* =
nullptr,
247 require_st_same<Vec, T>* =
nullptr>
250 if (this->
size() == 0) {
255 buffer_cl_ = cl::Buffer(ctx, CL_MEM_READ_WRITE,
sizeof(T) *
size());
256 for (
int i = 0, offset_size = 0; i <
cols_; i++, offset_size +=
rows_) {
258 "matrix_cl rows",
rows_);
259 cl::Event write_event;
260 queue.enqueueWriteBuffer(
263 sizeof(T) * offset_size,
sizeof(T) *
rows_, A[i].data(),
nullptr,
267 }
catch (
const cl::Error&
e) {
287 if (this->
size() == 0) {
292 int flags = CL_MEM_READ_WRITE;
294 flags |= CL_MEM_ALLOC_HOST_PTR;
297 }
catch (
const cl::Error&
e) {
319 template <
typename Mat, require_eigen_t<Mat>* =
nullptr,
320 require_vt_same<Mat, T>* =
nullptr>
324 using Mat_type = std::decay_t<ref_type_for_opencl_t<Mat>>;
325 if (this->
size() == 0) {
329 std::is_same<std::decay_t<Mat>, Mat_type>::value
330 && (std::is_lvalue_reference<Mat>::value
349 template <
typename Scal,
354 initialize_buffer<std::is_rvalue_reference<Scal&&>::value>(
355 const_cast<const std::decay_t<Scal>*
>(&A));
372 template <
typename Vec, require_std_vector_t<Vec>* =
nullptr,
373 require_vt_same<Vec, T>* =
nullptr>
394 template <
typename Vec, require_std_vector_t<Vec>* =
nullptr,
395 require_vt_same<Vec, T>* =
nullptr>
399 initialize_buffer_no_heap_if<std::is_lvalue_reference<Vec>::value>(A);
417 template <
typename U, require_same_t<T, U>* =
nullptr>
418 explicit matrix_cl(
const U* A,
const int& R,
const int& C,
431 template <
typename Expr,
444 buffer_cl_ = std::move(a.buffer_cl_);
463 sizeof(T) * a.
size());
478 template <
typename Expr,
508 if (this->
size() == 0) {
511 cl::Event zero_event;
512 const std::size_t write_events_size = this->
write_events().size();
513 const std::size_t read_events_size = this->
read_events().size();
514 const std::size_t read_write_size = write_events_size + read_events_size;
518 for (std::size_t i = 0; i < read_events_size; ++i) {
521 for (std::size_t i = read_events_size, j = 0; j < write_events_size;
527 sizeof(T) * this->
size(),
529 }
catch (
const cl::Error&
e) {
550 template <
bool in_order = false>
552 cl::Event transfer_event;
553 if (this->
size() == 0) {
554 return transfer_event;
559 buffer_cl_ = cl::Buffer(ctx, CL_MEM_READ_WRITE,
sizeof(T) *
size());
562 sizeof(T) *
size(), A,
nullptr, &transfer_event);
564 }
catch (
const cl::Error&
e) {
567 return transfer_event;
570 template <
bool in_order = false>
572 cl::Event transfer_event;
573 if (this->
size() == 0) {
574 return transfer_event;
580 constexpr auto copy_or_share
581 = CL_MEM_COPY_HOST_PTR * INTEGRATED_OPENCL
582 | (CL_MEM_USE_HOST_PTR * !INTEGRATED_OPENCL);
584 = cl::Buffer(ctx, CL_MEM_READ_WRITE | copy_or_share,
585 sizeof(T) *
size(), A);
587 buffer_cl_ = cl::Buffer(ctx, CL_MEM_READ_WRITE,
sizeof(T) *
size());
588 queue.enqueueWriteBuffer(
590 sizeof(T) *
size(), A,
nullptr, &transfer_event);
593 }
catch (
const cl::Error&
e) {
596 return transfer_event;
613 template <
bool No_heap,
typename U, std::enable_if_t<No_heap>* =
nullptr>
615 if (this->
size() == 0) {
622 template <
bool No_heap,
typename U, std::enable_if_t<!No_heap>* =
nullptr>
624 using U_val = std::decay_t<ref_type_for_opencl_t<U>>;
625 if (this->
size() == 0) {
628 auto* obj_heap =
new U_val(std::move(obj));
632 buffer_cl_.setDestructorCallback(&delete_it_destructor<U_val>,
635 e.setCallback(CL_COMPLETE, &delete_it_event<U_val>, obj_heap);
650 cl::Event cstr_event;
651 std::vector<cl::Event>* dep_events =
new std::vector<cl::Event>(
655 A.
size() *
sizeof(T), dep_events,
661 cstr_event.setCallback(
666 }
catch (
const cl::Error&
e) {
682 template <
typename U>
684 delete static_cast<U*
>(container);
693 template <
typename U>
695 delete static_cast<U*
>(container);
A variant of matrix_cl that schedules its destructor to be called, so it can be used on the AD stack.
Non-templated base class for matrix_cl simplifies checking if something is matrix_cl.
matrix_cl(Vec &&A, matrix_cl_view partial_view=matrix_cl_view::Entire)
Construct a matrix_cl of size Nx1 from std::vector.
void initialize_buffer_cl(const matrix_cl< T > &A)
Initializes the OpenCL buffer of this matrix by copying the data from given matrix_cl.
matrix_cl< T > & operator=(matrix_cl< T > &&a)
Move assignment operator.
matrix_cl(const int rows, const int cols, matrix_cl_view partial_view=matrix_cl_view::Entire)
Constructor for the matrix_cl that only allocates the buffer on the OpenCL device.
const cl::Buffer & buffer() const
const tbb::concurrent_vector< cl::Event > read_write_events() const
Get the events from the event stacks.
void wait_for_write_events() const
Waits for the write events and clears the read event stack.
void initialize_buffer_no_heap_if(U &&obj)
Initializes the OpenCL buffer of this matrix by copying the data from given object.
void add_read_event(cl::Event new_event) const
Add an event to the read event stack.
void wait_for_read_write_events() const
Waits for read and write events to finish and clears the read, write, and read/write event stacks.
void add_read_write_event(cl::Event new_event) const
Add an event to the read/write event stack.
void setZero()
Set the values of a matrix_cl to zero.
void wait_for_read_events() const
Waits for the read events and clears the read event stack.
static void delete_it_destructor(cl_mem buff, void *container)
Deletes the container.
~matrix_cl()
Destructor waits for write events to prevent any kernels from writing memory that has already been re...
matrix_cl(const cl::Buffer &A, const int R, const int C, matrix_cl_view partial_view=matrix_cl_view::Entire)
Construct a matrix_cl<T> from an existing cl::Buffer object.
matrix_cl(Vec &&A)
Constructor for the matrix_cl that creates a copy of a std::vector of Eigen matrices on the OpenCL de...
tbb::concurrent_vector< cl::Event > read_events_
tbb::concurrent_vector< cl::Event > write_events_
matrix_cl(const U *A, const int &R, const int &C, matrix_cl_view partial_view=matrix_cl_view::Entire)
Construct from array with given rows and columns.
matrix_cl(Vec &&A, const int &R, const int &C, matrix_cl_view partial_view=matrix_cl_view::Entire)
Construct from std::vector with given rows and columns.
void view(const matrix_cl_view &view)
const matrix_cl_view & view() const
void clear_write_events() const
Clear the write events from the event stacks.
cl::Event initialize_buffer(T *A)
const matrix_cl< T > & eval() const &
Evaluates this.
static void delete_it_event(cl_event e, cl_int status, void *container)
Deletes the container.
const tbb::concurrent_vector< cl::Event > & read_events() const
Get the events from the event stacks.
void add_write_event(cl::Event new_event) const
Add an event to the write event stack.
matrix_cl(Scal &&A, matrix_cl_view partial_view=matrix_cl_view::Diagonal)
Constructor for the matrix_cl that creates a copy of a scalar on the OpenCL device.
matrix_cl< T > & operator=(const matrix_cl< T > &a)
Copy assignment operator.
matrix_cl(Mat &&A, matrix_cl_view partial_view=matrix_cl_view::Entire)
Constructor for the matrix_cl that creates a copy of the Eigen matrix or Eigen expression on the Open...
matrix_cl(const matrix_cl< T > &A)
Copy constructor.
matrix_cl(matrix_cl< T > &&A)
Move constructor.
void clear_read_write_events() const
Clear the write events from the event stacks.
cl::Event initialize_buffer(const T *A)
Initializes the OpenCL buffer of this matrix by copying the data from given buffer.
const tbb::concurrent_vector< cl::Event > & write_events() const
Get the events from the event stacks.
void clear_read_events() const
Clear the read events from the event stacks.
Represents an arithmetic matrix on the OpenCL device.
The API to access the methods and values in opencl_context_base.
void check_opencl_error(const char *function, const cl::Error &e)
Throws the domain error with specifying the OpenCL error that occurred.
void zeros_strict_tri()
Stores zeros in the strict's triangular part (excluding the diagonal) of a matrix on the OpenCL devic...
require_not_t< is_matrix_cl< std::decay_t< T > > > require_not_matrix_cl_t
Require type does not satisfy is_matrix_cl.
bool & in_order() noexcept
Return a bool representing whether the write to the OpenCL device are blocking.
cl::Context & context() noexcept
Returns the reference to the OpenCL context.
std::vector< cl::Device > & device() noexcept
Returns a vector containing the OpenCL device used to create the context.
cl::CommandQueue & queue() noexcept
Returns the reference to the active OpenCL command queue for the device.
require_all_t< is_kernel_expression_and_not_scalar< Types >... > require_all_kernel_expressions_and_none_scalar_t
Enables a template if all given types are non-scalar types that are a valid kernel generator expressi...
require_t< std::is_same< std::decay_t< T >, std::decay_t< S > > > require_same_t
Require types T and S satisfies std::is_same.
static constexpr double e()
Return the base of the natural logarithm.
auto vec_concat(const Vec &v1, const Args &... args)
Get the event stack from a vector of events and other arguments.
void check_size_match(const char *function, const char *name_i, T_size1 i, const char *name_j, T_size2 j)
Check if the provided sizes match.
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Check if a type is an Eigen::Map with contiguous stride.