1#ifndef STAN_MATH_OPENCL_COPY_HPP
2#define STAN_MATH_OPENCL_COPY_HPP
20#include <CL/opencl.hpp>
44template <
typename T, require_st_arithmetic<T>* =
nullptr>
58template <
typename T_ret,
typename T, require_eigen_t<T_ret>* =
nullptr,
59 require_matrix_cl_t<T>* =
nullptr,
60 require_st_same<T_ret, T>* =
nullptr>
64 = Eigen::Matrix<scalar_type_t<T_ret>, T_ret::RowsAtCompileTime,
65 T_ret::ColsAtCompileTime>;
66 T_ret_col_major dst(src.rows(), src.cols());
67 if (src.size() == 0) {
72 && src.rows() == src.cols()) {
74 = std::conditional_t<std::is_same<T_val, bool>::value, char, T_val>;
79 for (
int j = 0; j < src.cols(); ++j) {
80 for (
int k = 0; k < j; ++k) {
81 dst.coeffRef(k, j) = 0;
83 for (
int i = j; i < src.cols(); ++i) {
84 dst.coeffRef(i, j) = packed[pos++];
88 for (
int j = 0; j < src.cols(); ++j) {
89 for (
int i = 0; i <= j; ++i) {
90 dst.coeffRef(i, j) = packed[pos++];
92 for (
int k = j + 1; k < src.cols(); ++k) {
93 dst.coeffRef(k, j) = 0;
101 std::vector<cl::Event> copy_write_events(src.write_events().begin(),
102 src.write_events().end());
104 sizeof(T_val) * dst.size(), dst.data(),
105 ©_write_events, ©_event);
107 src.clear_write_events();
108 }
catch (
const cl::Error&
e) {
112 dst.template triangularView<Eigen::StrictlyLower>()
113 = T_ret_col_major::Zero(dst.rows(), dst.cols());
116 dst.template triangularView<Eigen::StrictlyUpper>()
117 = T_ret_col_major::Zero(dst.rows(), dst.cols());
132template <
typename T_ret,
typename T,
136 return from_matrix_cl<T_ret>(src.eval());
145template <
typename T_dst,
typename T, require_arithmetic_t<T>* =
nullptr,
146 require_same_t<T_dst, T>* =
nullptr>
154 cl::Event copy_event;
156 std::vector<cl::Event> copy_write_events(src.
write_events().begin(),
159 sizeof(T), &dst, ©_write_events, ©_event);
162 }
catch (
const cl::Error&
e) {
177template <
typename T_dst,
typename T,
183 T_dst dst(src.rows());
184 if (src.rows() == 0) {
188 cl::Event copy_event;
189 const cl::CommandQueue& queue = opencl_context.queue();
190 std::vector<cl::Event> copy_write_events(src.write_events().begin(),
191 src.write_events().end());
192 queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0,
193 sizeof(T) * src.rows(), dst.data(),
194 ©_write_events, ©_event);
196 src.clear_write_events();
197 }
catch (
const cl::Error&
e) {
213template <
typename T_dst,
typename T,
214 require_std_vector_vt<is_eigen_vector, T_dst>* =
nullptr,
215 require_all_st_same<T_dst, T>* =
nullptr>
217 Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> tmp =
from_matrix_cl(src);
219 dst.reserve(src.cols());
220 for (
int i = 0; i < src.cols(); i++) {
221 dst.emplace_back(tmp.col(i));
234template <
typename T, require_all_kernel_expressions_t<T>* =
nullptr>
237 Eigen::Matrix<scalar_type_t<T>, Eigen::Dynamic, Eigen::Dynamic>>(src);
248template <
typename T, require_matrix_cl_t<T>* =
nullptr>
251 const int packed_size = src.rows() * (src.rows() + 1) / 2;
254 = std::conditional_t<std::is_same<T_val, bool>::value, char, T_val>;
255 std::vector<T_not_bool> dst(packed_size);
256 if (dst.size() == 0) {
263 packed, src, src.
rows(), src.rows(),
265 const std::vector<cl::Event> mat_events
268 cl::Event copy_event;
270 sizeof(T_val) * packed_size, dst.data(),
271 &mat_events, ©_event);
273 src.clear_write_events();
274 }
catch (
const cl::Error&
e) {
303 const int packed_size =
rows * (
rows + 1) / 2;
305 src.size(),
"rows * (rows + 1) / 2", packed_size);
307 if (dst.
size() == 0) {
312 cl::Event packed_event;
314 queue.enqueueWriteBuffer(
317 sizeof(Vec_scalar) * packed_size, src.data(),
nullptr, &packed_event);
322 }
catch (
const cl::Error&
e) {
339template <
typename T, require_matrix_cl_t<T>* =
nullptr>
const cl::Buffer & buffer() const
const tbb::concurrent_vector< cl::Event > read_write_events() const
Get the events from the event stacks.
void clear_write_events() const
Clear the write events from the event stacks.
void add_write_event(cl::Event new_event) const
Add an event to the write event stack.
const tbb::concurrent_vector< cl::Event > & write_events() const
Get the events from the event stacks.
Represents an arithmetic matrix on the OpenCL device.
The API to access the methods and values in opencl_context_base.
void check_triangular(const char *function, const char *name, const T &A)
Check if the matrix_cl is either upper triangular or lower triangular.
void check_opencl_error(const char *function, const cl::Error &e)
Throws the domain error with specifying the OpenCL error that occurred.
require_not_t< is_matrix_cl< std::decay_t< T > > > require_not_matrix_cl_t
Require type does not satisfy is_matrix_cl.
bool & in_order() noexcept
Return a bool representing whether the write to the OpenCL device are blocking.
cl::CommandQueue & queue() noexcept
Returns the reference to the active OpenCL command queue for the device.
require_all_t< is_kernel_expression< Types >... > require_all_kernel_expressions_t
Enables a template if all given types are are a valid kernel generator expressions.
const kernel_cl< out_buffer, in_buffer, int, int, matrix_cl_view > unpack("unpack", {indexing_helpers, unpack_kernel_code})
See the docs for unpack() .
const kernel_cl< out_buffer, in_buffer, int, int, matrix_cl_view > pack("pack", {indexing_helpers, pack_kernel_code})
See the docs for pack() .
int64_t rows(const T_x &x)
Returns the number of rows in the specified kernel generator expression.
auto packed_copy(const T &src)
Packs the square flat triangular matrix on the OpenCL device and copies it to the std::vector.
plain_type_t< T > copy_cl(const T &src)
Copies the source matrix to the destination matrix.
matrix_cl< scalar_type_t< T > > to_matrix_cl(T &&src)
Copies the source Eigen matrix, std::vector or scalar to the destination matrix that is stored on the...
bool contains_nonzero(const matrix_cl_view view, const matrix_cl_view part)
Check whether a view contains certain nonzero part.
auto from_matrix_cl(const T &src)
Copies the source matrix that is stored on the OpenCL device to the destination Eigen matrix.
require_all_t< std::is_same< scalar_type_t< std::decay_t< T > >, scalar_type_t< std::decay_t< Types > > >... > require_all_st_same
All scalar types of T and all of the Types satisfy std::is_same.
require_t< container_type_check_base< is_std_vector, value_type_t, TypeCheck, Check... > > require_std_vector_vt
Require type satisfies is_std_vector.
typename value_type< T >::type value_type_t
Helper function for accessing underlying type.
require_t< container_type_check_base< is_vector, value_type_t, TypeCheck, Check... > > require_vector_vt
Require type satisfies is_vector.
static constexpr double e()
Return the base of the natural logarithm.
auto vec_concat(const Vec &v1, const Args &... args)
Get the event stack from a vector of events and other arguments.
void check_size_match(const char *function, const char *name_i, T_size1 i, const char *name_j, T_size2 j)
Check if the provided sizes match.
typename plain_type< T >::type plain_type_t
typename scalar_type< T >::type scalar_type_t
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...