math/colwise__reduction_8hpp_source.html

#ifndef STAN_MATH_OPENCL_KERNEL_GENERATOR_COLWISE_REDUCTION_HPP

#define STAN_MATH_OPENCL_KERNEL_GENERATOR_COLWISE_REDUCTION_HPP

#ifdef STAN_OPENCL


#include <stan/math/prim/meta.hpp>

#include <stan/math/opencl/opencl_context.hpp>

#include <stan/math/opencl/matrix_cl_view.hpp>

#include <stan/math/opencl/kernel_generator/type_str.hpp>

#include <stan/math/opencl/kernel_generator/name_generator.hpp>

#include <stan/math/opencl/kernel_generator/operation_cl.hpp>

#include <stan/math/opencl/kernel_generator/as_operation_cl.hpp>

#include <stan/math/opencl/kernel_generator/rowwise_reduction.hpp>

#include <stan/math/opencl/kernel_generator/calc_if.hpp>

#include <map>

#include <string>

#include <type_traits>

#include <utility>


namespace stan {

namespace math {

namespace internal {

class colwise_reduction_base {};


inline int colwise_reduction_wgs_rows(int n_rows, int n_cols) {

  int local = opencl_context.base_opts().at("LOCAL_SIZE_");

  int preferred_work_groups

      = opencl_context.device()[0].getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>() * 16;

  // round up n_rows/local/n_cols

  return (std::min(preferred_work_groups, (n_rows + local - 1) / local) + n_cols

          - 1)

         / n_cols;

}

}  // namespace internal


template <typename Derived, typename T, typename Operation>

class colwise_reduction

    : public internal::colwise_reduction_base,

      public operation_cl<Derived, typename std::remove_reference_t<T>::Scalar,

                          T> {

 public:

  using Scalar = typename std::remove_reference_t<T>::Scalar;

  using base = operation_cl<Derived, Scalar, T>;

  using base::var_name_;

  static constexpr bool require_specific_local_size = true;


 protected:

  std::string init_;

  using base::derived;


 public:

  using base::cols;

  explicit colwise_reduction(T&& a, const std::string& init)

      : base(std::forward<T>(a)), init_(init) {}


  template <typename T_result>

  kernel_parts get_whole_kernel_parts(

      std::unordered_map<const void*, const char*>& generated,

      std::unordered_map<const void*, const char*>& generated_all,

      name_generator& ng, const std::string& row_index_name,

      const std::string& col_index_name, const T_result& result) const {

    kernel_parts parts = derived().get_kernel_parts(

        generated, generated_all, ng, row_index_name, col_index_name, false);

    kernel_parts out_parts = result.get_kernel_parts_lhs(

        generated, generated_all, ng, row_index_name, col_index_name);


    parts.args += out_parts.args;

    parts.reduction_1d += "if (lid_i == 0) {\n"

                     + result.var_name_

                     + "_global[j * n_groups_i + wg_id_i] = "

                     + derived().var_name_ + "_local[0];\n"

                     "}\n";

    return parts;

  }


  inline kernel_parts generate(const std::string& row_index_name,

                               const std::string& col_index_name,

                               const bool view_handled,

                               const std::string& var_name_arg) const {

    kernel_parts res;

    res.declarations = "__local " + type_str<Scalar>() + " " + var_name_

                       + "_local[LOCAL_SIZE_];\n" + type_str<Scalar>() + " "

                       + var_name_ + ";\n";

    res.initialization = var_name_ + " = " + init_ + ";\n";

    res.body = var_name_ + " = " + Operation::generate(var_name_, var_name_arg)

               + ";\n";

    res.reduction_1d =

          var_name_ + "_local[lid_i] = " + var_name_ + ";\n"

          "barrier(CLK_LOCAL_MEM_FENCE);\n"

          "for (int step = lsize_i / REDUCTION_STEP_SIZE; "

                "step > 0; step /= REDUCTION_STEP_SIZE) {\n"

          "  if (lid_i < step) {\n"

          "    for (int i = 1; i < REDUCTION_STEP_SIZE; i++) {\n"

          "      " + var_name_ + "_local[lid_i] = " +

        Operation::generate(var_name_ + "_local[lid_i]",

                            var_name_ + "_local[lid_i + step * i]") + ";\n"

          "    }\n"

          "  }\n"

          "  barrier(CLK_LOCAL_MEM_FENCE);\n"

          "}\n";

    return res;

  }


  inline int rows() const {

    int arg_rows = this->template get_arg<0>().rows();

    int arg_cols = this->template get_arg<0>().cols();

    if (arg_cols == 0) {

      return 1;

    }

    if (arg_cols == -1) {

      return -1;

    }

    return internal::colwise_reduction_wgs_rows(arg_rows, arg_cols);

  }


  inline int thread_rows() const { return this->template get_arg<0>().rows(); }


  inline std::pair<int, int> extreme_diagonals() const {

    return {-rows() + 1, cols() - 1};

  }

};  // namespace math


template <typename T>

class colwise_sum_ : public colwise_reduction<colwise_sum_<T>, T, sum_op> {

  using base = colwise_reduction<colwise_sum_<T>, T, sum_op>;

  using base::arguments_;


 public:

  explicit colwise_sum_(T&& a)

      : colwise_reduction<colwise_sum_<T>, T, sum_op>(std::forward<T>(a), "0") {

  }

  inline auto deep_copy() const {

    auto&& arg_copy = this->template get_arg<0>().deep_copy();

    return colwise_sum_<std::remove_reference_t<decltype(arg_copy)>>(

        std::move(arg_copy));

  }

};


template <typename T, require_all_kernel_expressions_t<T>* = nullptr>

inline auto colwise_sum(T&& a) {

  auto&& arg_copy = as_operation_cl(std::forward<T>(a)).deep_copy();

  return colwise_sum_<as_operation_cl_t<T>>(

      as_operation_cl(std::forward<T>(a)));

}


template <typename T>

class colwise_prod_ : public colwise_reduction<colwise_prod_<T>, T, prod_op> {

  using base = colwise_reduction<colwise_prod_<T>, T, prod_op>;

  using base::arguments_;


 public:

  explicit colwise_prod_(T&& a)

      : colwise_reduction<colwise_prod_<T>, T, prod_op>(std::forward<T>(a),

                                                        "1") {}

  inline auto deep_copy() const {

    auto&& arg_copy = this->template get_arg<0>().deep_copy();

    return colwise_prod_<std::remove_reference_t<decltype(arg_copy)>>(

        std::move(arg_copy));

  }

};


template <typename T, require_all_kernel_expressions_t<T>* = nullptr>

inline auto colwise_prod(T&& a) {

  auto&& arg_copy = as_operation_cl(std::forward<T>(a)).deep_copy();

  return colwise_prod_<as_operation_cl_t<T>>(

      as_operation_cl(std::forward<T>(a)));

}


template <typename T>

class colwise_max_ : public colwise_reduction<

                         colwise_max_<T>, T,

                         max_op<typename std::remove_reference_t<T>::Scalar>> {

  using base

      = colwise_reduction<colwise_max_<T>, T,

                          max_op<typename std::remove_reference_t<T>::Scalar>>;

  using base::arguments_;


 public:

  using op = max_op<typename std::remove_reference_t<T>::Scalar>;

  explicit colwise_max_(T&& a)

      : colwise_reduction<colwise_max_<T>, T, op>(std::forward<T>(a),

                                                  op::init()) {}

  inline auto deep_copy() const {

    auto&& arg_copy = this->template get_arg<0>().deep_copy();

    return colwise_max_<std::remove_reference_t<decltype(arg_copy)>>(

        std::move(arg_copy));

  }

};


template <typename T, require_all_kernel_expressions_t<T>* = nullptr>

inline auto colwise_max(T&& a) {

  auto&& arg_copy = as_operation_cl(std::forward<T>(a)).deep_copy();

  return colwise_max_<as_operation_cl_t<T>>(

      as_operation_cl(std::forward<T>(a)));

}


template <typename T>

class colwise_min_ : public colwise_reduction<

                         colwise_min_<T>, T,

                         min_op<typename std::remove_reference_t<T>::Scalar>> {

  using base

      = colwise_reduction<colwise_min_<T>, T,

                          min_op<typename std::remove_reference_t<T>::Scalar>>;

  using base::arguments_;


 public:

  using op = min_op<typename std::remove_reference_t<T>::Scalar>;

  explicit colwise_min_(T&& a)

      : colwise_reduction<colwise_min_<T>, T, op>(std::forward<T>(a),

                                                  op::init()) {}

  inline auto deep_copy() const {

    auto&& arg_copy = this->template get_arg<0>().deep_copy();

    return colwise_min_<std::remove_reference_t<decltype(arg_copy)>>(

        std::move(arg_copy));

  }

};


template <typename T, require_all_kernel_expressions_t<T>* = nullptr>

inline auto colwise_min(T&& a) {

  return colwise_min_<as_operation_cl_t<T>>(

      as_operation_cl(std::forward<T>(a)));

}


namespace internal {

template <typename T>

struct is_colwise_reduction_impl

    : public std::is_base_of<internal::colwise_reduction_base,

                             std::decay_t<T>> {};

template <typename T>

struct is_colwise_reduction_impl<calc_if_<true, T>>

    : public std::is_base_of<internal::colwise_reduction_base,

                             std::decay_t<T>> {};

}  // namespace internal


template <typename T>

using is_colwise_reduction

    = internal::is_colwise_reduction_impl<std::decay_t<T>>;


}  // namespace math

}  // namespace stan

#endif

#endif

as_operation_cl.hpp

calc_if.hpp

stan::math::calc_if_
Represents a calc_if in kernel generator expressions.
Definition calc_if.hpp:31

stan::math::colwise_max_::deep_copy
auto deep_copy() const
Creates a deep copy of this expression.
Definition colwise_reduction.hpp:296

stan::math::colwise_max_::colwise_max_
colwise_max_(T &&a)
Definition colwise_reduction.hpp:289

stan::math::colwise_max_
Represents column wise max - reduction in kernel generator expressions.
Definition colwise_reduction.hpp:281

stan::math::colwise_min_::deep_copy
auto deep_copy() const
Creates a deep copy of this expression.
Definition colwise_reduction.hpp:345

stan::math::colwise_min_::colwise_min_
colwise_min_(T &&a)
Definition colwise_reduction.hpp:338

stan::math::colwise_min_
Represents column wise min - reduction in kernel generator expressions.
Definition colwise_reduction.hpp:330

stan::math::colwise_prod_::colwise_prod_
colwise_prod_(T &&a)
Definition colwise_reduction.hpp:240

stan::math::colwise_prod_::deep_copy
auto deep_copy() const
Creates a deep copy of this expression.
Definition colwise_reduction.hpp:247

stan::math::colwise_prod_
Represents column wise product - reduction in kernel generator expressions.
Definition colwise_reduction.hpp:235

stan::math::colwise_reduction::extreme_diagonals
std::pair< int, int > extreme_diagonals() const
Determine indices of extreme sub- and superdiagonals written.
Definition colwise_reduction.hpp:181

stan::math::colwise_reduction::generate
kernel_parts generate(const std::string &row_index_name, const std::string &col_index_name, const bool view_handled, const std::string &var_name_arg) const
Generates kernel code for this and nested expressions.
Definition colwise_reduction.hpp:126

stan::math::colwise_reduction::require_specific_local_size
static constexpr bool require_specific_local_size
Definition colwise_reduction.hpp:69

stan::math::colwise_reduction::get_whole_kernel_parts
kernel_parts get_whole_kernel_parts(std::unordered_map< const void *, const char * > &generated, std::unordered_map< const void *, const char * > &generated_all, name_generator &ng, const std::string &row_index_name, const std::string &col_index_name, const T_result &result) const
Generates kernel code for assigning this expression into result expression.
Definition colwise_reduction.hpp:98

stan::math::colwise_reduction::thread_rows
int thread_rows() const
Number of rows threads need to be launched for.
Definition colwise_reduction.hpp:175

stan::math::colwise_reduction::derived
Derived & derived()
Casts the instance into its derived type.
Definition operation_cl.hpp:119

stan::math::colwise_reduction::Scalar
typename std::remove_reference_t< T >::Scalar Scalar
Definition colwise_reduction.hpp:66

stan::math::colwise_reduction::colwise_reduction
colwise_reduction(T &&a, const std::string &init)
Constructor.
Definition colwise_reduction.hpp:82

stan::math::colwise_reduction::init_
std::string init_
Definition colwise_reduction.hpp:72

stan::math::colwise_reduction::var_name_
std::string var_name_
Definition operation_cl.hpp:111

stan::math::colwise_reduction::rows
int rows() const
Number of rows of a matrix that would be the result of evaluating this expression.
Definition colwise_reduction.hpp:159

stan::math::colwise_reduction::cols
int cols() const
Number of columns of a matrix that would be the result of evaluating this expression.
Definition operation_cl.hpp:381

stan::math::colwise_reduction
Represents a column wise reduction in kernel generator expressions.
Definition colwise_reduction.hpp:64

stan::math::colwise_sum_::colwise_sum_
colwise_sum_(T &&a)
Definition colwise_reduction.hpp:196

stan::math::colwise_sum_::deep_copy
auto deep_copy() const
Creates a deep copy of this expression.
Definition colwise_reduction.hpp:203

stan::math::colwise_sum_
Represents column wise sum - reduction in kernel generator expressions.
Definition colwise_reduction.hpp:191

stan::math::internal::colwise_reduction_base
Definition colwise_reduction.hpp:26

stan::math::name_generator
Unique name generator for variables used in generated kernels.
Definition name_generator.hpp:16

stan::math::opencl_context
The API to access the methods and values in opencl_context_base.
Definition opencl_context.hpp:210

stan::math::operation_cl::derived
Derived & derived()
Casts the instance into its derived type.
Definition operation_cl.hpp:119

stan::math::operation_cl::var_name_
std::string var_name_
Definition operation_cl.hpp:111

stan::math::operation_cl::arguments_
std::tuple< Args... > arguments_
Definition operation_cl.hpp:110

stan::math::operation_cl::cols
int cols() const
Number of columns of a matrix that would be the result of evaluating this expression.
Definition operation_cl.hpp:381

stan::math::operation_cl
Base for all kernel generator operations.
Definition operation_cl.hpp:103

stan::math::opencl_context::base_opts
opencl_context_base::map_base_opts & base_opts() noexcept
Returns a copy of the map of kernel defines.
Definition opencl_context.hpp:369

stan::math::opencl_context::device
std::vector< cl::Device > & device() noexcept
Returns a vector containing the OpenCL device used to create the context.
Definition opencl_context.hpp:393

stan::math::colwise_min
auto colwise_min(T &&a)
Column wise min - reduction of a kernel generator expression.
Definition colwise_reduction.hpp:366

stan::math::colwise_prod
auto colwise_prod(T &&a)
Column wise product - reduction of a kernel generator expression.
Definition colwise_reduction.hpp:268

stan::math::colwise_max
auto colwise_max(T &&a)
Column wise max - reduction of a kernel generator expression.
Definition colwise_reduction.hpp:317

stan::math::colwise_sum
auto colwise_sum(T &&a)
Column wise sum - reduction of a kernel generator expression.
Definition colwise_reduction.hpp:224

stan::math::as_operation_cl
T_operation && as_operation_cl(T_operation &&a)
Converts any valid kernel generator expression into an operation.
Definition as_operation_cl.hpp:31

matrix_cl_view.hpp

name_generator.hpp

stan::math::internal::colwise_reduction_wgs_rows
int colwise_reduction_wgs_rows(int n_rows, int n_cols)
Determine number of work groups in rows direction that will be run fro colwise reduction of given siz...
Definition colwise_reduction.hpp:35

stan
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Definition unit_vector_constrain.hpp:15

std
STL namespace.

opencl_context.hpp

operation_cl.hpp

meta.hpp

rowwise_reduction.hpp

stan::math::internal::is_colwise_reduction_impl
Definition colwise_reduction.hpp:375

stan::math::kernel_parts::body
std::string body
Definition operation_cl.hpp:41

stan::math::kernel_parts::reduction_1d
std::string reduction_1d
Definition operation_cl.hpp:45

stan::math::kernel_parts::args
std::string args
Definition operation_cl.hpp:50

stan::math::kernel_parts::declarations
std::string declarations
Definition operation_cl.hpp:35

stan::math::kernel_parts::initialization
std::string initialization
Definition operation_cl.hpp:36

stan::math::kernel_parts
Parts of an OpenCL kernel, generated by an expression.
Definition operation_cl.hpp:32

stan::math::max_op
Operation for max reduction.
Definition rowwise_reduction.hpp:395

stan::math::min_op
Operation for min reduction.
Definition rowwise_reduction.hpp:462

stan::math::prod_op
Operation for product reduction.
Definition rowwise_reduction.hpp:339

stan::math::sum_op
Operation for sum reduction.
Definition rowwise_reduction.hpp:285

type_str.hpp