Automatic Differentiation
 
Loading...
Searching...
No Matches
copy.hpp
Go to the documentation of this file.
1#ifndef STAN_MATH_OPENCL_COPY_HPP
2#define STAN_MATH_OPENCL_COPY_HPP
3#ifdef STAN_OPENCL
4
19
20#include <CL/opencl.hpp>
21#include <algorithm>
22#include <iostream>
23#include <type_traits>
24#include <vector>
25
26namespace stan {
27namespace math {
28
44template <typename T, require_st_arithmetic<T>* = nullptr>
46 return matrix_cl<scalar_type_t<T>>(std::forward<T>(src));
47}
48
58template <typename T_ret, typename T, require_eigen_t<T_ret>* = nullptr,
59 require_matrix_cl_t<T>* = nullptr,
60 require_st_same<T_ret, T>* = nullptr>
61inline auto from_matrix_cl(const T& src) {
62 using T_val = value_type_t<T>;
63 using T_ret_col_major
64 = Eigen::Matrix<scalar_type_t<T_ret>, T_ret::RowsAtCompileTime,
65 T_ret::ColsAtCompileTime>;
66 T_ret_col_major dst(src.rows(), src.cols());
67 if (src.size() == 0) {
68 return dst;
69 }
70 if ((src.view() == matrix_cl_view::Lower
71 || src.view() == matrix_cl_view::Upper)
72 && src.rows() == src.cols()) {
73 using T_not_bool
74 = std::conditional_t<std::is_same<T_val, bool>::value, char, T_val>;
75 std::vector<T_not_bool> packed = packed_copy(src);
76
77 size_t pos = 0;
78 if (src.view() == matrix_cl_view::Lower) {
79 for (int j = 0; j < src.cols(); ++j) {
80 for (int k = 0; k < j; ++k) {
81 dst.coeffRef(k, j) = 0;
82 }
83 for (int i = j; i < src.cols(); ++i) {
84 dst.coeffRef(i, j) = packed[pos++];
85 }
86 }
87 } else {
88 for (int j = 0; j < src.cols(); ++j) {
89 for (int i = 0; i <= j; ++i) {
90 dst.coeffRef(i, j) = packed[pos++];
91 }
92 for (int k = j + 1; k < src.cols(); ++k) {
93 dst.coeffRef(k, j) = 0;
94 }
95 }
96 }
97 } else {
98 try {
99 cl::Event copy_event;
100 const cl::CommandQueue& queue = opencl_context.queue();
101 std::vector<cl::Event> copy_write_events(src.write_events().begin(),
102 src.write_events().end());
103 queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0,
104 sizeof(T_val) * dst.size(), dst.data(),
105 &copy_write_events, &copy_event);
106 copy_event.wait();
107 src.clear_write_events();
108 } catch (const cl::Error& e) {
109 check_opencl_error("copy (OpenCL)->Eigen", e);
110 }
111 if (!contains_nonzero(src.view(), matrix_cl_view::Lower)) {
112 dst.template triangularView<Eigen::StrictlyLower>()
113 = T_ret_col_major::Zero(dst.rows(), dst.cols());
114 }
115 if (!contains_nonzero(src.view(), matrix_cl_view::Upper)) {
116 dst.template triangularView<Eigen::StrictlyUpper>()
117 = T_ret_col_major::Zero(dst.rows(), dst.cols());
118 }
119 }
120 return dst;
121}
122
132template <typename T_ret, typename T,
135inline auto from_matrix_cl(const T& src) {
136 return from_matrix_cl<T_ret>(src.eval());
137}
138
145template <typename T_dst, typename T, require_arithmetic_t<T>* = nullptr,
146 require_same_t<T_dst, T>* = nullptr>
147inline T_dst from_matrix_cl(const matrix_cl<T>& src) {
148 T dst;
149 check_size_match("from_matrix_cl<scalar>", "src.rows()", src.rows(),
150 "dst.rows()", 1);
151 check_size_match("from_matrix_cl<scalar>", "src.cols()", src.cols(),
152 "dst.cols()", 1);
153 try {
154 cl::Event copy_event;
155 const cl::CommandQueue& queue = opencl_context.queue();
156 std::vector<cl::Event> copy_write_events(src.write_events().begin(),
157 src.write_events().end());
158 queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0,
159 sizeof(T), &dst, &copy_write_events, &copy_event);
160 copy_event.wait();
161 src.clear_write_events();
162 } catch (const cl::Error& e) {
163 check_opencl_error("from_matrix_cl<scalar>", e);
164 }
165 return dst;
166}
167
177template <typename T_dst, typename T,
180inline T_dst from_matrix_cl(const matrix_cl<T>& src) {
181 check_size_match("from_matrix_cl<std::vector>", "src.cols()", src.cols(),
182 "dst.cols()", 1);
183 T_dst dst(src.rows());
184 if (src.rows() == 0) {
185 return dst;
186 }
187 try {
188 cl::Event copy_event;
189 const cl::CommandQueue& queue = opencl_context.queue();
190 std::vector<cl::Event> copy_write_events(src.write_events().begin(),
191 src.write_events().end());
192 queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0,
193 sizeof(T) * src.rows(), dst.data(),
194 &copy_write_events, &copy_event);
195 copy_event.wait();
196 src.clear_write_events();
197 } catch (const cl::Error& e) {
198 check_opencl_error("from_matrix_cl<std::vector>", e);
199 }
200 return dst;
201}
202
213template <typename T_dst, typename T,
214 require_std_vector_vt<is_eigen_vector, T_dst>* = nullptr,
215 require_all_st_same<T_dst, T>* = nullptr>
216inline T_dst from_matrix_cl(const matrix_cl<T>& src) {
217 Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> tmp = from_matrix_cl(src);
218 T_dst dst;
219 dst.reserve(src.cols());
220 for (int i = 0; i < src.cols(); i++) {
221 dst.emplace_back(tmp.col(i));
222 }
223 return dst;
224}
225
234template <typename T, require_all_kernel_expressions_t<T>* = nullptr>
235auto from_matrix_cl(const T& src) {
236 return from_matrix_cl<
237 Eigen::Matrix<scalar_type_t<T>, Eigen::Dynamic, Eigen::Dynamic>>(src);
238}
239
248template <typename T, require_matrix_cl_t<T>* = nullptr>
249inline auto packed_copy(const T& src) {
250 check_triangular("packed_copy", "src", src);
251 const int packed_size = src.rows() * (src.rows() + 1) / 2;
252 using T_val = value_type_t<T>;
253 using T_not_bool
254 = std::conditional_t<std::is_same<T_val, bool>::value, char, T_val>;
255 std::vector<T_not_bool> dst(packed_size);
256 if (dst.size() == 0) {
257 return dst;
258 }
259 try {
260 const cl::CommandQueue& queue = opencl_context.queue();
261 matrix_cl<T_val> packed(packed_size, 1);
262 stan::math::opencl_kernels::pack(cl::NDRange(src.rows(), src.rows()),
263 packed, src, src.rows(), src.rows(),
264 src.view());
265 const std::vector<cl::Event> mat_events
266 = vec_concat(std::vector<cl::Event>{}, packed.read_write_events(),
267 src.write_events());
268 cl::Event copy_event;
269 queue.enqueueReadBuffer(packed.buffer(), opencl_context.in_order(), 0,
270 sizeof(T_val) * packed_size, dst.data(),
271 &mat_events, &copy_event);
272 copy_event.wait();
273 src.clear_write_events();
274 } catch (const cl::Error& e) {
275 check_opencl_error("packed_copy (OpenCL->std::vector)", e);
276 }
277 return dst;
278}
279
299template <matrix_cl_view matrix_view, typename Vec,
300 typename Vec_scalar = scalar_type_t<Vec>,
302inline matrix_cl<Vec_scalar> packed_copy(Vec&& src, int rows) {
303 const int packed_size = rows * (rows + 1) / 2;
304 check_size_match("copy (packed std::vector -> OpenCL)", "src.size()",
305 src.size(), "rows * (rows + 1) / 2", packed_size);
306 matrix_cl<Vec_scalar> dst(rows, rows, matrix_view);
307 if (dst.size() == 0) {
308 return dst;
309 }
310 try {
311 matrix_cl<Vec_scalar> packed(packed_size, 1);
312 cl::Event packed_event;
313 const cl::CommandQueue& queue = opencl_context.queue();
314 queue.enqueueWriteBuffer(
315 packed.buffer(),
316 opencl_context.in_order() || std::is_rvalue_reference<Vec&&>::value, 0,
317 sizeof(Vec_scalar) * packed_size, src.data(), nullptr, &packed_event);
318 packed.add_write_event(packed_event);
319 stan::math::opencl_kernels::unpack(cl::NDRange(dst.rows(), dst.rows()), dst,
320 packed, dst.rows(), dst.rows(),
321 matrix_view);
322 } catch (const cl::Error& e) {
323 check_opencl_error("packed_copy (std::vector->OpenCL)", e);
324 }
325 return dst;
326}
327
339template <typename T, require_matrix_cl_t<T>* = nullptr>
340inline plain_type_t<T> copy_cl(const T& src) {
341 return plain_type_t<T>(src);
342}
343
344} // namespace math
345} // namespace stan
346#endif
347#endif
const cl::Buffer & buffer() const
const tbb::concurrent_vector< cl::Event > read_write_events() const
Get the events from the event stacks.
void clear_write_events() const
Clear the write events from the event stacks.
Definition matrix_cl.hpp:77
void add_write_event(cl::Event new_event) const
Add an event to the write event stack.
const tbb::concurrent_vector< cl::Event > & write_events() const
Get the events from the event stacks.
Represents an arithmetic matrix on the OpenCL device.
Definition matrix_cl.hpp:47
The API to access the methods and values in opencl_context_base.
void check_triangular(const char *function, const char *name, const T &A)
Check if the matrix_cl is either upper triangular or lower triangular.
void check_opencl_error(const char *function, const cl::Error &e)
Throws the domain error with specifying the OpenCL error that occurred.
require_not_t< is_matrix_cl< std::decay_t< T > > > require_not_matrix_cl_t
Require type does not satisfy is_matrix_cl.
bool & in_order() noexcept
Return a bool representing whether the write to the OpenCL device are blocking.
cl::CommandQueue & queue() noexcept
Returns the reference to the active OpenCL command queue for the device.
require_all_t< is_kernel_expression< Types >... > require_all_kernel_expressions_t
Enables a template if all given types are are a valid kernel generator expressions.
const kernel_cl< out_buffer, in_buffer, int, int, matrix_cl_view > unpack("unpack", {indexing_helpers, unpack_kernel_code})
See the docs for unpack() .
const kernel_cl< out_buffer, in_buffer, int, int, matrix_cl_view > pack("pack", {indexing_helpers, pack_kernel_code})
See the docs for pack() .
int64_t rows(const T_x &x)
Returns the number of rows in the specified kernel generator expression.
Definition rows.hpp:22
auto packed_copy(const T &src)
Packs the square flat triangular matrix on the OpenCL device and copies it to the std::vector.
Definition copy.hpp:249
plain_type_t< T > copy_cl(const T &src)
Copies the source matrix to the destination matrix.
Definition copy.hpp:340
matrix_cl< scalar_type_t< T > > to_matrix_cl(T &&src)
Copies the source Eigen matrix, std::vector or scalar to the destination matrix that is stored on the...
Definition copy.hpp:45
bool contains_nonzero(const matrix_cl_view view, const matrix_cl_view part)
Check whether a view contains certain nonzero part.
auto from_matrix_cl(const T &src)
Copies the source matrix that is stored on the OpenCL device to the destination Eigen matrix.
Definition copy.hpp:61
require_all_t< std::is_same< scalar_type_t< std::decay_t< T > >, scalar_type_t< std::decay_t< Types > > >... > require_all_st_same
All scalar types of T and all of the Types satisfy std::is_same.
require_t< container_type_check_base< is_std_vector, value_type_t, TypeCheck, Check... > > require_std_vector_vt
Require type satisfies is_std_vector.
typename value_type< T >::type value_type_t
Helper function for accessing underlying type.
require_t< container_type_check_base< is_vector, value_type_t, TypeCheck, Check... > > require_vector_vt
Require type satisfies is_vector.
static constexpr double e()
Return the base of the natural logarithm.
Definition constants.hpp:20
auto vec_concat(const Vec &v1, const Args &... args)
Get the event stack from a vector of events and other arguments.
void check_size_match(const char *function, const char *name_i, T_size1 i, const char *name_j, T_size2 j)
Check if the provided sizes match.
typename plain_type< T >::type plain_type_t
typename scalar_type< T >::type scalar_type_t
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...