math/reduction__2d_8hpp_source.html

#ifndef STAN_MATH_OPENCL_KERNEL_GENERATOR_REDUCTION_2D_HPP

#define STAN_MATH_OPENCL_KERNEL_GENERATOR_REDUCTION_2D_HPP

#ifdef STAN_OPENCL


#include <stan/math/prim/meta.hpp>

#include <stan/math/opencl/opencl_context.hpp>

#include <stan/math/opencl/matrix_cl_view.hpp>

#include <stan/math/opencl/kernel_generator/type_str.hpp>

#include <stan/math/opencl/kernel_generator/name_generator.hpp>

#include <stan/math/opencl/kernel_generator/operation_cl.hpp>

#include <stan/math/opencl/kernel_generator/as_operation_cl.hpp>

#include <stan/math/opencl/kernel_generator/colwise_reduction.hpp>

#include <stan/math/opencl/kernel_generator/rowwise_reduction.hpp>

#include <stan/math/opencl/kernel_generator/calc_if.hpp>

#include <map>

#include <string>

#include <type_traits>

#include <utility>


namespace stan {

namespace math {

namespace internal {

class reduction_2d_base {};

}  // namespace internal


template <typename Derived, typename T, typename Operation>

class reduction_2d

    : public internal::reduction_2d_base,

      public operation_cl<Derived, typename std::remove_reference_t<T>::Scalar,

                          T> {

 public:

  using Scalar = typename std::remove_reference_t<T>::Scalar;

  using base = operation_cl<Derived, Scalar, T>;

  using base::var_name_;

  static constexpr bool require_specific_local_size = true;


 protected:

  std::string init_;

  using base::derived;


 public:

  using base::cols;

  explicit reduction_2d(T&& a, const std::string& init)

      : base(std::forward<T>(a)), init_(init) {}


  template <typename T_result>

  kernel_parts get_whole_kernel_parts(

      std::unordered_map<const void*, const char*>& generated,

      std::unordered_map<const void*, const char*>& generated_all,

      name_generator& ng, const std::string& row_index_name,

      const std::string& col_index_name, const T_result& result) const {

    kernel_parts parts = derived().get_kernel_parts(

        generated, generated_all, ng, row_index_name, col_index_name, false);

    kernel_parts out_parts = result.get_kernel_parts_lhs(

        generated, generated_all, ng, row_index_name, col_index_name);


    parts.args += out_parts.args;

    parts.reduction_2d += "if (lid_i == 0) {\n"

                     + result.var_name_

                     + "_global[wg_id_j * n_groups_i + wg_id_i] = "

                     + derived().var_name_ + "_local[0];\n"

                     "}\n";

    return parts;

  }


  inline kernel_parts generate(const std::string& row_index_name,

                               const std::string& col_index_name,

                               const bool view_handled,

                               const std::string& var_name_arg) const {

    kernel_parts res;

    res.declarations = "__local " + type_str<Scalar>() + " " + var_name_

                       + "_local[LOCAL_SIZE_];\n" + type_str<Scalar>() + " "

                       + var_name_ + " = " + init_ + ";\n";

    res.body = var_name_ + " = " + Operation::generate(var_name_, var_name_arg)

               + ";\n";

    res.reduction_2d =

          var_name_ + "_local[lid_i] = " + var_name_ + ";\n"

          "barrier(CLK_LOCAL_MEM_FENCE);\n"

          "for (int step = lsize_i / REDUCTION_STEP_SIZE; "

                "step > 0; step /= REDUCTION_STEP_SIZE) {\n"

          "  if (lid_i < step) {\n"

          "    for (int i = 1; i < REDUCTION_STEP_SIZE; i++) {\n"

          "      " + var_name_ + "_local[lid_i] = " +

        Operation::generate(var_name_ + "_local[lid_i]",

                            var_name_ + "_local[lid_i + step * i]") + ";\n"

          "    }\n"

          "  }\n"

          "  barrier(CLK_LOCAL_MEM_FENCE);\n"

          "}\n";

    return res;

  }


  inline int rows() const {

    int arg_rows = this->template get_arg<0>().rows();

    int arg_cols = this->template get_arg<0>().cols();

    if (arg_cols == 0) {

      return 1;

    }

    if (arg_cols == base::dynamic) {

      return base::dynamic;

    }

    return internal::colwise_reduction_wgs_rows(arg_rows, arg_cols);

  }


  inline int cols() const {

    int arg_rows = this->template get_arg<0>().rows();

    int arg_cols = this->template get_arg<0>().cols();

    if (arg_cols == 0) {

      return 0;

    }

    if (arg_cols == base::dynamic) {

      return base::dynamic;

    }

    int wgs_rows = internal::colwise_reduction_wgs_rows(arg_rows, arg_cols);

    if (wgs_rows == 0) {

      return 0;

    }

    return (arg_cols + wgs_rows - 1) / wgs_rows;

  }


  inline int thread_rows() const { return this->template get_arg<0>().rows(); }


  inline int thread_cols() const { return this->template get_arg<0>().cols(); }


  inline std::pair<int, int> extreme_diagonals() const {

    return {-rows() + 1, cols() - 1};

  }

};  // namespace math


template <typename T>

class sum_2d_ : public reduction_2d<sum_2d_<T>, T, sum_op> {

  using base = reduction_2d<sum_2d_<T>, T, sum_op>;

  using base::arguments_;


 public:

  explicit sum_2d_(T&& a)

      : reduction_2d<sum_2d_<T>, T, sum_op>(std::forward<T>(a), "0") {}

  inline auto deep_copy() const {

    auto&& arg_copy = this->template get_arg<0>().deep_copy();

    return sum_2d_<std::remove_reference_t<decltype(arg_copy)>>(

        std::move(arg_copy));

  }

};


template <typename T, require_all_kernel_expressions_t<T>* = nullptr>

inline auto sum_2d(T&& a) {

  auto&& arg_copy = as_operation_cl(std::forward<T>(a)).deep_copy();

  return sum_2d_<as_operation_cl_t<T>>(as_operation_cl(std::forward<T>(a)));

}


template <typename T>

class prod_2d_ : public reduction_2d<prod_2d_<T>, T, prod_op> {

  using base = reduction_2d<prod_2d_<T>, T, prod_op>;

  using base::arguments_;


 public:

  explicit prod_2d_(T&& a)

      : reduction_2d<prod_2d_<T>, T, prod_op>(std::forward<T>(a), "1") {}

  inline auto deep_copy() const {

    auto&& arg_copy = this->template get_arg<0>().deep_copy();

    return prod_2d_<std::remove_reference_t<decltype(arg_copy)>>(

        std::move(arg_copy));

  }

};


template <typename T, require_all_kernel_expressions_t<T>* = nullptr>

inline auto prod_2d(T&& a) {

  auto&& arg_copy = as_operation_cl(std::forward<T>(a)).deep_copy();

  return prod_2d_<as_operation_cl_t<T>>(as_operation_cl(std::forward<T>(a)));

}


template <typename T>

class max_2d_

    : public reduction_2d<max_2d_<T>, T,

                          max_op<typename std::remove_reference_t<T>::Scalar>> {

  using base

      = reduction_2d<max_2d_<T>, T,

                     max_op<typename std::remove_reference_t<T>::Scalar>>;

  using base::arguments_;


 public:

  using op = max_op<typename std::remove_reference_t<T>::Scalar>;

  explicit max_2d_(T&& a)

      : reduction_2d<max_2d_<T>, T, op>(std::forward<T>(a), op::init()) {}

  inline auto deep_copy() const {

    auto&& arg_copy = this->template get_arg<0>().deep_copy();

    return max_2d_<std::remove_reference_t<decltype(arg_copy)>>(

        std::move(arg_copy));

  }

};


template <typename T, require_all_kernel_expressions_t<T>* = nullptr>

inline auto max_2d(T&& a) {

  auto&& arg_copy = as_operation_cl(std::forward<T>(a)).deep_copy();

  return max_2d_<as_operation_cl_t<T>>(as_operation_cl(std::forward<T>(a)));

}


template <typename T>

class min_2d_

    : public reduction_2d<min_2d_<T>, T,

                          min_op<typename std::remove_reference_t<T>::Scalar>> {

  using base

      = reduction_2d<min_2d_<T>, T,

                     min_op<typename std::remove_reference_t<T>::Scalar>>;

  using base::arguments_;


 public:

  using op = min_op<typename std::remove_reference_t<T>::Scalar>;

  explicit min_2d_(T&& a)

      : reduction_2d<min_2d_<T>, T, op>(std::forward<T>(a), op::init()) {}

  inline auto deep_copy() const {

    auto&& arg_copy = this->template get_arg<0>().deep_copy();

    return min_2d_<std::remove_reference_t<decltype(arg_copy)>>(

        std::move(arg_copy));

  }

};


template <typename T, require_all_kernel_expressions_t<T>* = nullptr>

inline auto min_2d(T&& a) {

  return min_2d_<as_operation_cl_t<T>>(as_operation_cl(std::forward<T>(a)));

}


namespace internal {

template <typename T>

struct is_reduction_2d_impl

    : public std::is_base_of<internal::reduction_2d_base, std::decay_t<T>> {};

template <typename T>

struct is_reduction_2d_impl<calc_if_<true, T>>

    : public std::is_base_of<internal::reduction_2d_base, std::decay_t<T>> {};

}  // namespace internal


template <typename T>

using is_reduction_2d = internal::is_reduction_2d_impl<std::decay_t<T>>;


}  // namespace math

}  // namespace stan

#endif

#endif

as_operation_cl.hpp

calc_if.hpp

stan::math::calc_if_
Represents a calc_if in kernel generator expressions.
Definition calc_if.hpp:31

stan::math::internal::reduction_2d_base
Definition reduction_2d.hpp:27

stan::math::max_2d_::deep_copy
auto deep_copy() const
Creates a deep copy of this expression.
Definition reduction_2d.hpp:302

stan::math::max_2d_::max_2d_
max_2d_(T &&a)
Definition reduction_2d.hpp:296

stan::math::max_2d_
Represents two dimensional max - reduction in kernel generator expressions.
Definition reduction_2d.hpp:288

stan::math::min_2d_::deep_copy
auto deep_copy() const
Creates a deep copy of this expression.
Definition reduction_2d.hpp:349

stan::math::min_2d_::min_2d_
min_2d_(T &&a)
Definition reduction_2d.hpp:343

stan::math::min_2d_
Represents two dimensional min - reduction in kernel generator expressions.
Definition reduction_2d.hpp:335

stan::math::name_generator
Unique name generator for variables used in generated kernels.
Definition name_generator.hpp:16

stan::math::operation_cl::dynamic
static constexpr int dynamic
Definition operation_cl.hpp:136

stan::math::operation_cl::derived
Derived & derived()
Casts the instance into its derived type.
Definition operation_cl.hpp:119

stan::math::operation_cl::var_name_
std::string var_name_
Definition operation_cl.hpp:111

stan::math::operation_cl::arguments_
std::tuple< Args... > arguments_
Definition operation_cl.hpp:110

stan::math::operation_cl::cols
int cols() const
Number of columns of a matrix that would be the result of evaluating this expression.
Definition operation_cl.hpp:381

stan::math::operation_cl
Base for all kernel generator operations.
Definition operation_cl.hpp:103

stan::math::prod_2d_::deep_copy
auto deep_copy() const
Creates a deep copy of this expression.
Definition reduction_2d.hpp:255

stan::math::prod_2d_::prod_2d_
prod_2d_(T &&a)
Definition reduction_2d.hpp:249

stan::math::prod_2d_
Represents two dimensional product - reduction in kernel generator expressions.
Definition reduction_2d.hpp:244

stan::math::reduction_2d::get_whole_kernel_parts
kernel_parts get_whole_kernel_parts(std::unordered_map< const void *, const char * > &generated, std::unordered_map< const void *, const char * > &generated_all, name_generator &ng, const std::string &row_index_name, const std::string &col_index_name, const T_result &result) const
Generates kernel code for assigning this expression into result expression.
Definition reduction_2d.hpp:82

stan::math::reduction_2d::init_
std::string init_
Definition reduction_2d.hpp:56

stan::math::reduction_2d::thread_rows
int thread_rows() const
Number of rows threads need to be launched for.
Definition reduction_2d.hpp:179

stan::math::reduction_2d::extreme_diagonals
std::pair< int, int > extreme_diagonals() const
Determine indices of extreme sub- and superdiagonals written.
Definition reduction_2d.hpp:191

stan::math::reduction_2d::derived
Derived & derived()
Casts the instance into its derived type.
Definition operation_cl.hpp:119

stan::math::reduction_2d::thread_cols
int thread_cols() const
Number of rows threads need to be launched for.
Definition reduction_2d.hpp:185

stan::math::reduction_2d::cols
int cols() const
Number of columns of a matrix that would be the result of evaluating this expression.
Definition reduction_2d.hpp:159

stan::math::reduction_2d::var_name_
std::string var_name_
Definition operation_cl.hpp:111

stan::math::reduction_2d::Scalar
typename std::remove_reference_t< T >::Scalar Scalar
Definition reduction_2d.hpp:50

stan::math::reduction_2d::rows
int rows() const
Number of rows of a matrix that would be the result of evaluating this expression.
Definition reduction_2d.hpp:142

stan::math::reduction_2d::reduction_2d
reduction_2d(T &&a, const std::string &init)
Constructor.
Definition reduction_2d.hpp:66

stan::math::reduction_2d::generate
kernel_parts generate(const std::string &row_index_name, const std::string &col_index_name, const bool view_handled, const std::string &var_name_arg) const
Generates kernel code for this and nested expressions.
Definition reduction_2d.hpp:110

stan::math::reduction_2d::require_specific_local_size
static constexpr bool require_specific_local_size
Definition reduction_2d.hpp:53

stan::math::reduction_2d
Represents a two dimensional reduction in kernel generator expressions.
Definition reduction_2d.hpp:48

stan::math::sum_2d_::sum_2d_
sum_2d_(T &&a)
Definition reduction_2d.hpp:206

stan::math::sum_2d_::deep_copy
auto deep_copy() const
Creates a deep copy of this expression.
Definition reduction_2d.hpp:212

stan::math::sum_2d_
Represents two dimensional sum - reduction in kernel generator expressions.
Definition reduction_2d.hpp:201

colwise_reduction.hpp

stan::math::sum_2d
auto sum_2d(T &&a)
Two dimensional sum - reduction of a kernel generator expression.
Definition reduction_2d.hpp:233

stan::math::min_2d
auto min_2d(T &&a)
Two dimensional min - reduction of a kernel generator expression.
Definition reduction_2d.hpp:370

stan::math::prod_2d
auto prod_2d(T &&a)
Two dimensional product - reduction of a kernel generator expression.
Definition reduction_2d.hpp:276

stan::math::as_operation_cl
T_operation && as_operation_cl(T_operation &&a)
Converts any valid kernel generator expression into an operation.
Definition as_operation_cl.hpp:31

stan::math::max_2d
auto max_2d(T &&a)
Two dimensional max - reduction of a kernel generator expression.
Definition reduction_2d.hpp:323

matrix_cl_view.hpp

name_generator.hpp

stan::math::internal::colwise_reduction_wgs_rows
int colwise_reduction_wgs_rows(int n_rows, int n_cols)
Determine number of work groups in rows direction that will be run fro colwise reduction of given siz...
Definition colwise_reduction.hpp:35

stan
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Definition unit_vector_constrain.hpp:15

std
STL namespace.

opencl_context.hpp

operation_cl.hpp

meta.hpp

rowwise_reduction.hpp

stan::math::internal::is_reduction_2d_impl
Definition reduction_2d.hpp:377

stan::math::kernel_parts::body
std::string body
Definition operation_cl.hpp:41

stan::math::kernel_parts::reduction_2d
std::string reduction_2d
Definition operation_cl.hpp:48

stan::math::kernel_parts::args
std::string args
Definition operation_cl.hpp:50

stan::math::kernel_parts::declarations
std::string declarations
Definition operation_cl.hpp:35

stan::math::kernel_parts
Parts of an OpenCL kernel, generated by an expression.
Definition operation_cl.hpp:32

stan::math::max_op
Operation for max reduction.
Definition rowwise_reduction.hpp:395

stan::math::min_op
Operation for min reduction.
Definition rowwise_reduction.hpp:462

stan::math::prod_op
Operation for product reduction.
Definition rowwise_reduction.hpp:339

stan::math::sum_op
Operation for sum reduction.
Definition rowwise_reduction.hpp:285

type_str.hpp