math/opencl_2prim_2cumulative__sum_8hpp_source.html

#ifndef STAN_MATH_OPENCL_PRIM_CUMULATIVE_SUM_HPP

#define STAN_MATH_OPENCL_PRIM_CUMULATIVE_SUM_HPP

#ifdef STAN_OPENCL

#include <stan/math/opencl/prim/size.hpp>

#include <stan/math/opencl/kernels/cumulative_sum.hpp>

#include <stan/math/prim/meta.hpp>

#include <stan/math/prim/err.hpp>


namespace stan {

namespace math {


template <typename T_vec,

          require_all_kernel_expressions_and_none_scalar_t<T_vec>* = nullptr>

inline auto cumulative_sum(T_vec&& v) {

  using T_scal = scalar_type_t<T_vec>;

  check_vector("cumulative_sum(OpenCL)", "v", v);


  matrix_cl<T_scal> res(v.rows(), v.cols());

  if (v.size() == 0) {

    return res;

  }


  if (!is_matrix_cl<T_vec>::value) {

    res = v;

  }

  const int local_size

      = opencl_kernels::cumulative_sum<T_scal>::kernel1.get_option(

          "LOCAL_SIZE_");

  const int work_groups = std::min(

      (v.size() + local_size - 1) / local_size,

      static_cast<int>(

          opencl_context.device()[0].getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>())

          * 16);

  const int local_size2

      = opencl_kernels::cumulative_sum<T_scal>::kernel2.get_option(

          "LOCAL_SIZE_");

  const matrix_cl<T_scal>& in

      = static_select<is_matrix_cl<T_vec>::value>(v, res);


  matrix_cl<T_scal> tmp_threads(local_size * work_groups, 1);

  matrix_cl<T_scal> tmp_wgs(work_groups, 1);

  try {

    opencl_kernels::cumulative_sum<T_scal>::kernel1(

        cl::NDRange(local_size * work_groups), cl::NDRange(local_size), tmp_wgs,

        tmp_threads, in, v.size());

    opencl_kernels::cumulative_sum<T_scal>::kernel2(cl::NDRange(local_size2),

                                                    cl::NDRange(local_size2),

                                                    tmp_wgs, work_groups);

    opencl_kernels::cumulative_sum<T_scal>::kernel3(

        cl::NDRange(local_size * work_groups), cl::NDRange(local_size), res, in,

        tmp_threads, tmp_wgs, v.size());

  } catch (const cl::Error& e) {

    check_opencl_error("cumulative_sum", e);

  }

  return res;

}


}  // namespace math

}  // namespace stan


#endif

#endif

stan::math::matrix_cl::size
int size() const
Definition matrix_cl.hpp:68

stan::math::matrix_cl
Represents an arithmetic matrix on the OpenCL device.
Definition matrix_cl.hpp:47

stan::math::opencl_context
The API to access the methods and values in opencl_context_base.
Definition opencl_context.hpp:210

stan::math::check_opencl_error
void check_opencl_error(const char *function, const cl::Error &e)
Throws the domain error with specifying the OpenCL error that occurred.
Definition check_opencl.hpp:23

stan::math::opencl_context::device
std::vector< cl::Device > & device() noexcept
Returns a vector containing the OpenCL device used to create the context.
Definition opencl_context.hpp:393

stan::math::e
static constexpr double e()
Return the base of the natural logarithm.
Definition constants.hpp:20

stan::math::check_vector
void check_vector(const char *function, const char *name, const Mat &x)
Check the input is either a row vector or column vector or a matrix with a single row or column.
Definition check_vector.hpp:32

stan::math::cumulative_sum
auto cumulative_sum(T_vec &&v)
Return the cumulative sum of the specified vector.
Definition cumulative_sum.hpp:25

stan::scalar_type_t
typename scalar_type< T >::type scalar_type_t
Definition scalar_type.hpp:25

stan
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Definition unit_vector_constrain.hpp:15

cumulative_sum.hpp

size.hpp

err.hpp

meta.hpp

stan::is_matrix_cl
Checks if the decayed type of T is a matrix_cl.
Definition is_matrix_cl.hpp:26

stan::math::opencl_kernels::cumulative_sum
struct containing cumulative_sum kernels, grouped by scalar type.
Definition cumulative_sum.hpp:168