math/opencl_2prim_2sum_8hpp_source.html

#ifndef STAN_MATH_OPENCL_PRIM_SUM_HPP

#define STAN_MATH_OPENCL_PRIM_SUM_HPP

#ifdef STAN_OPENCL


#include <stan/math/prim/meta.hpp>

#include <stan/math/prim/fun/sum.hpp>

#include <stan/math/opencl/matrix_cl.hpp>

#include <stan/math/opencl/kernel_generator.hpp>


namespace stan {

namespace math {


template <typename T,

          require_all_kernel_expressions_and_none_scalar_t<T>* = nullptr>

inline value_type_t<T> sum(const T& m) {

  if constexpr (is_matrix_cl<T>::value) {

    if (m.size() < 1000) {

      // for small matrices running another kernel is not worth it

      return sum(from_matrix_cl(m));

    }

  }

  matrix_cl<value_type_t<T>> res;

  if (m.rows() <= 8) {

    // without transpose we would use just a few threads in a work group

    res = sum_2d(transpose(m));

  } else {

    res = sum_2d(m);

  }

  return sum(from_matrix_cl(res));

}


}  // namespace math

}  // namespace stan


#endif

#endif

stan::math::sum_2d
auto sum_2d(T &&a)
Two dimensional sum - reduction of a kernel generator expression.
Definition reduction_2d.hpp:233

stan::math::transpose
auto transpose(Arg &&a)
Transposes a kernel generator expression.
Definition transpose.hpp:139

stan::math::from_matrix_cl
auto from_matrix_cl(const T &src)
Copies the source matrix that is stored on the OpenCL device to the destination Eigen matrix.
Definition copy.hpp:61

kernel_generator.hpp

matrix_cl.hpp

stan::math::sum
auto sum(const std::vector< T > &m)
Return the sum of the entries of the specified standard vector.
Definition sum.hpp:23

stan
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Definition unit_vector_constrain.hpp:15

sum.hpp

meta.hpp