math/copy_8hpp_source.html

#ifndef STAN_MATH_OPENCL_COPY_HPP

#define STAN_MATH_OPENCL_COPY_HPP

#ifdef STAN_OPENCL


#include <stan/math/opencl/buffer_types.hpp>

#include <stan/math/opencl/kernel_cl.hpp>

#include <stan/math/opencl/matrix_cl.hpp>

#include <stan/math/opencl/matrix_cl_view.hpp>

#include <stan/math/opencl/opencl_context.hpp>

#include <stan/math/opencl/value_type.hpp>

#include <stan/math/opencl/scalar_type.hpp>

#include <stan/math/opencl/kernels/pack.hpp>

#include <stan/math/opencl/kernels/unpack.hpp>

#include <stan/math/opencl/err/check_opencl.hpp>

#include <stan/math/opencl/err/check_triangular.hpp>

#include <stan/math/prim/meta.hpp>

#include <stan/math/prim/fun/Eigen.hpp>

#include <stan/math/prim/fun/vec_concat.hpp>


#include <CL/opencl.hpp>

#include <algorithm>

#include <iostream>

#include <type_traits>

#include <vector>


namespace stan {

namespace math {


template <typename T, require_st_arithmetic<T>* = nullptr>

inline matrix_cl<scalar_type_t<T>> to_matrix_cl(T&& src) {

  return matrix_cl<scalar_type_t<T>>(std::forward<T>(src));

}


template <typename T_ret, typename T, require_eigen_t<T_ret>* = nullptr,

          require_matrix_cl_t<T>* = nullptr,

          require_st_same<T_ret, T>* = nullptr>

inline auto from_matrix_cl(const T& src) {

  using T_val = value_type_t<T>;

  using T_ret_col_major

      = Eigen::Matrix<scalar_type_t<T_ret>, T_ret::RowsAtCompileTime,

                      T_ret::ColsAtCompileTime>;

  T_ret_col_major dst(src.rows(), src.cols());

  if (src.size() == 0) {

    return dst;

  }

  if ((src.view() == matrix_cl_view::Lower

       || src.view() == matrix_cl_view::Upper)

      && src.rows() == src.cols()) {

    using T_not_bool

        = std::conditional_t<std::is_same<T_val, bool>::value, char, T_val>;

    std::vector<T_not_bool> packed = packed_copy(src);


    size_t pos = 0;

    if (src.view() == matrix_cl_view::Lower) {

      for (int j = 0; j < src.cols(); ++j) {

        for (int k = 0; k < j; ++k) {

          dst.coeffRef(k, j) = 0;

        }

        for (int i = j; i < src.cols(); ++i) {

          dst.coeffRef(i, j) = packed[pos++];

        }

      }

    } else {

      for (int j = 0; j < src.cols(); ++j) {

        for (int i = 0; i <= j; ++i) {

          dst.coeffRef(i, j) = packed[pos++];

        }

        for (int k = j + 1; k < src.cols(); ++k) {

          dst.coeffRef(k, j) = 0;

        }

      }

    }

  } else {

    try {

      cl::Event copy_event;

      const cl::CommandQueue& queue = opencl_context.queue();

      std::vector<cl::Event> copy_write_events(src.write_events().begin(),

                                               src.write_events().end());

      queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0,

                              sizeof(T_val) * dst.size(), dst.data(),

                              &copy_write_events, &copy_event);

      copy_event.wait();

      src.clear_write_events();

    } catch (const cl::Error& e) {

      check_opencl_error("copy (OpenCL)->Eigen", e);

    }

    if (!contains_nonzero(src.view(), matrix_cl_view::Lower)) {

      dst.template triangularView<Eigen::StrictlyLower>()

          = T_ret_col_major::Zero(dst.rows(), dst.cols());

    }

    if (!contains_nonzero(src.view(), matrix_cl_view::Upper)) {

      dst.template triangularView<Eigen::StrictlyUpper>()

          = T_ret_col_major::Zero(dst.rows(), dst.cols());

    }

  }

  return dst;

}


template <typename T_ret, typename T,

          require_all_kernel_expressions_t<T>* = nullptr,

          require_not_matrix_cl_t<T>* = nullptr>

inline auto from_matrix_cl(const T& src) {

  return from_matrix_cl<T_ret>(src.eval());

}


template <typename T_dst, typename T, require_arithmetic_t<T>* = nullptr,

          require_same_t<T_dst, T>* = nullptr>

inline T_dst from_matrix_cl(const matrix_cl<T>& src) {

  T dst;

  check_size_match("from_matrix_cl<scalar>", "src.rows()", src.rows(),

                   "dst.rows()", 1);

  check_size_match("from_matrix_cl<scalar>", "src.cols()", src.cols(),

                   "dst.cols()", 1);

  try {

    cl::Event copy_event;

    const cl::CommandQueue& queue = opencl_context.queue();

    std::vector<cl::Event> copy_write_events(src.write_events().begin(),

                                             src.write_events().end());

    queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0,

                            sizeof(T), &dst, &copy_write_events, &copy_event);

    copy_event.wait();

    src.clear_write_events();

  } catch (const cl::Error& e) {

    check_opencl_error("from_matrix_cl<scalar>", e);

  }

  return dst;

}


template <typename T_dst, typename T,

          require_std_vector_vt<std::is_arithmetic, T_dst>* = nullptr,

          require_all_st_same<T_dst, T>* = nullptr>

inline T_dst from_matrix_cl(const matrix_cl<T>& src) {

  check_size_match("from_matrix_cl<std::vector>", "src.cols()", src.cols(),

                   "dst.cols()", 1);

  T_dst dst(src.rows());

  if (src.rows() == 0) {

    return dst;

  }

  try {

    cl::Event copy_event;

    const cl::CommandQueue& queue = opencl_context.queue();

    std::vector<cl::Event> copy_write_events(src.write_events().begin(),

                                             src.write_events().end());

    queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0,

                            sizeof(T) * src.rows(), dst.data(),

                            &copy_write_events, &copy_event);

    copy_event.wait();

    src.clear_write_events();

  } catch (const cl::Error& e) {

    check_opencl_error("from_matrix_cl<std::vector>", e);

  }

  return dst;

}


template <typename T_dst, typename T,

          require_std_vector_vt<is_eigen_vector, T_dst>* = nullptr,

          require_all_st_same<T_dst, T>* = nullptr>

inline T_dst from_matrix_cl(const matrix_cl<T>& src) {

  Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> tmp = from_matrix_cl(src);

  T_dst dst;

  dst.reserve(src.cols());

  for (int i = 0; i < src.cols(); i++) {

    dst.emplace_back(tmp.col(i));

  }

  return dst;

}


template <typename T, require_all_kernel_expressions_t<T>* = nullptr>

auto from_matrix_cl(const T& src) {

  return from_matrix_cl<

      Eigen::Matrix<scalar_type_t<T>, Eigen::Dynamic, Eigen::Dynamic>>(src);

}


template <typename T, require_matrix_cl_t<T>* = nullptr>

inline auto packed_copy(const T& src) {

  check_triangular("packed_copy", "src", src);

  const int packed_size = src.rows() * (src.rows() + 1) / 2;

  using T_val = value_type_t<T>;

  using T_not_bool

      = std::conditional_t<std::is_same<T_val, bool>::value, char, T_val>;

  std::vector<T_not_bool> dst(packed_size);

  if (dst.size() == 0) {

    return dst;

  }

  try {

    const cl::CommandQueue& queue = opencl_context.queue();

    matrix_cl<T_val> packed(packed_size, 1);

    stan::math::opencl_kernels::pack(cl::NDRange(src.rows(), src.rows()),

                                     packed, src, src.rows(), src.rows(),

                                     src.view());

    const std::vector<cl::Event> mat_events

        = vec_concat(std::vector<cl::Event>{}, packed.read_write_events(),

                     src.write_events());

    cl::Event copy_event;

    queue.enqueueReadBuffer(packed.buffer(), opencl_context.in_order(), 0,

                            sizeof(T_val) * packed_size, dst.data(),

                            &mat_events, &copy_event);

    copy_event.wait();

    src.clear_write_events();

  } catch (const cl::Error& e) {

    check_opencl_error("packed_copy (OpenCL->std::vector)", e);

  }

  return dst;

}


template <matrix_cl_view matrix_view, typename Vec,

          typename Vec_scalar = scalar_type_t<Vec>,

          require_vector_vt<std::is_arithmetic, Vec>* = nullptr>

inline matrix_cl<Vec_scalar> packed_copy(Vec&& src, int rows) {

  const int packed_size = rows * (rows + 1) / 2;

  check_size_match("copy (packed std::vector -> OpenCL)", "src.size()",

                   src.size(), "rows * (rows + 1) / 2", packed_size);

  matrix_cl<Vec_scalar> dst(rows, rows, matrix_view);

  if (dst.size() == 0) {

    return dst;

  }

  try {

    matrix_cl<Vec_scalar> packed(packed_size, 1);

    cl::Event packed_event;

    const cl::CommandQueue& queue = opencl_context.queue();

    queue.enqueueWriteBuffer(

        packed.buffer(),

        opencl_context.in_order() || std::is_rvalue_reference<Vec&&>::value, 0,

        sizeof(Vec_scalar) * packed_size, src.data(), nullptr, &packed_event);

    packed.add_write_event(packed_event);

    stan::math::opencl_kernels::unpack(cl::NDRange(dst.rows(), dst.rows()), dst,

                                       packed, dst.rows(), dst.rows(),

                                       matrix_view);

  } catch (const cl::Error& e) {

    check_opencl_error("packed_copy (std::vector->OpenCL)", e);

  }

  return dst;

}


template <typename T, require_matrix_cl_t<T>* = nullptr>

inline plain_type_t<T> copy_cl(const T& src) {

  return plain_type_t<T>(src);

}


}  // namespace math

}  // namespace stan

#endif

#endif

Eigen.hpp

buffer_types.hpp

check_opencl.hpp

check_triangular.hpp

stan::math::matrix_cl::buffer
const cl::Buffer & buffer() const
Definition matrix_cl.hpp:177

stan::math::matrix_cl::read_write_events
const tbb::concurrent_vector< cl::Event > read_write_events() const
Get the events from the event stacks.
Definition matrix_cl.hpp:119

stan::math::matrix_cl::size
int size() const
Definition matrix_cl.hpp:68

stan::math::matrix_cl::cols
int cols() const
Definition matrix_cl.hpp:66

stan::math::matrix_cl::rows
int rows() const
Definition matrix_cl.hpp:64

stan::math::matrix_cl::clear_write_events
void clear_write_events() const
Clear the write events from the event stacks.
Definition matrix_cl.hpp:77

stan::math::matrix_cl::add_write_event
void add_write_event(cl::Event new_event) const
Add an event to the write event stack.
Definition matrix_cl.hpp:135

stan::math::matrix_cl::write_events
const tbb::concurrent_vector< cl::Event > & write_events() const
Get the events from the event stacks.
Definition matrix_cl.hpp:103

stan::math::matrix_cl
Represents an arithmetic matrix on the OpenCL device.
Definition matrix_cl.hpp:47

stan::math::opencl_context
The API to access the methods and values in opencl_context_base.
Definition opencl_context.hpp:210

stan::math::check_triangular
void check_triangular(const char *function, const char *name, const T &A)
Check if the matrix_cl is either upper triangular or lower triangular.
Definition check_triangular.hpp:23

stan::math::check_opencl_error
void check_opencl_error(const char *function, const cl::Error &e)
Throws the domain error with specifying the OpenCL error that occurred.
Definition check_opencl.hpp:23

stan::require_not_matrix_cl_t
require_not_t< is_matrix_cl< std::decay_t< T > > > require_not_matrix_cl_t
Require type does not satisfy is_matrix_cl.
Definition is_matrix_cl.hpp:41

stan::math::opencl_context::in_order
bool & in_order() noexcept
Return a bool representing whether the write to the OpenCL device are blocking.
Definition opencl_context.hpp:407

stan::math::opencl_context::queue
cl::CommandQueue & queue() noexcept
Returns the reference to the active OpenCL command queue for the device.
Definition opencl_context.hpp:363

stan::require_all_kernel_expressions_t
require_all_t< is_kernel_expression< Types >... > require_all_kernel_expressions_t
Enables a template if all given types are are a valid kernel generator expressions.
Definition is_kernel_expression.hpp:66

stan::math::opencl_kernels::unpack
const kernel_cl< out_buffer, in_buffer, int, int, matrix_cl_view > unpack("unpack", {indexing_helpers, unpack_kernel_code})
See the docs for unpack() .

stan::math::opencl_kernels::pack
const kernel_cl< out_buffer, in_buffer, int, int, matrix_cl_view > pack("pack", {indexing_helpers, pack_kernel_code})
See the docs for pack() .

stan::math::rows
int64_t rows(const T_x &x)
Returns the number of rows in the specified kernel generator expression.
Definition rows.hpp:22

stan::math::packed_copy
auto packed_copy(const T &src)
Packs the square flat triangular matrix on the OpenCL device and copies it to the std::vector.
Definition copy.hpp:249

stan::math::copy_cl
plain_type_t< T > copy_cl(const T &src)
Copies the source matrix to the destination matrix.
Definition copy.hpp:340

stan::math::to_matrix_cl
matrix_cl< scalar_type_t< T > > to_matrix_cl(T &&src)
Copies the source Eigen matrix, std::vector or scalar to the destination matrix that is stored on the...
Definition copy.hpp:45

stan::math::contains_nonzero
bool contains_nonzero(const matrix_cl_view view, const matrix_cl_view part)
Check whether a view contains certain nonzero part.
Definition matrix_cl_view.hpp:45

stan::math::from_matrix_cl
auto from_matrix_cl(const T &src)
Copies the source matrix that is stored on the OpenCL device to the destination Eigen matrix.
Definition copy.hpp:61

stan::require_all_st_same
require_all_t< std::is_same< scalar_type_t< std::decay_t< T > >, scalar_type_t< std::decay_t< Types > > >... > require_all_st_same
All scalar types of T and all of the Types satisfy std::is_same.
Definition require_generics.hpp:65

stan::require_std_vector_vt
require_t< container_type_check_base< is_std_vector, value_type_t, TypeCheck, Check... > > require_std_vector_vt
Require type satisfies is_std_vector.
Definition is_vector.hpp:679

stan::value_type_t
typename value_type< T >::type value_type_t
Helper function for accessing underlying type.
Definition value_type.hpp:35

stan::require_vector_vt
require_t< container_type_check_base< is_vector, value_type_t, TypeCheck, Check... > > require_vector_vt
Require type satisfies is_vector.
Definition is_vector.hpp:473

kernel_cl.hpp

matrix_cl.hpp

matrix_cl_view.hpp

stan::math::e
static constexpr double e()
Return the base of the natural logarithm.
Definition constants.hpp:20

stan::math::vec_concat
auto vec_concat(const Vec &v1, const Args &... args)
Get the event stack from a vector of events and other arguments.
Definition vec_concat.hpp:57

stan::math::matrix_cl_view
matrix_cl_view
Definition matrix_cl_view.hpp:11

stan::math::matrix_cl_view::Upper
@ Upper

stan::math::matrix_cl_view::Lower
@ Lower

stan::math::check_size_match
void check_size_match(const char *function, const char *name_i, T_size1 i, const char *name_j, T_size2 j)
Check if the provided sizes match.
Definition check_size_match.hpp:24

stan::plain_type_t
typename plain_type< std::decay_t< T > >::type plain_type_t
Definition plain_type.hpp:23

stan::scalar_type_t
typename scalar_type< T >::type scalar_type_t
Definition scalar_type.hpp:25

stan
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Definition unit_vector_constrain.hpp:15

scalar_type.hpp

value_type.hpp

opencl_context.hpp

pack.hpp

meta.hpp

unpack.hpp

vec_concat.hpp