math/rowwise__reduction_8hpp_source.html

#ifndef STAN_MATH_OPENCL_KERNEL_GENERATOR_ROWWISE_REDUCTION_HPP

#define STAN_MATH_OPENCL_KERNEL_GENERATOR_ROWWISE_REDUCTION_HPP

#ifdef STAN_OPENCL


#include <stan/math/prim/meta.hpp>

#include <stan/math/opencl/matrix_cl_view.hpp>

#include <stan/math/opencl/kernel_generator/as_operation_cl.hpp>

#include <stan/math/opencl/kernel_generator/broadcast.hpp>

#include <stan/math/opencl/kernel_generator/binary_operation.hpp>

#include <stan/math/opencl/kernel_generator/name_generator.hpp>

#include <stan/math/opencl/kernel_generator/operation_cl.hpp>

#include <stan/math/opencl/kernel_generator/type_str.hpp>

#include <map>

#include <string>

#include <type_traits>

#include <utility>


namespace stan {

namespace math {

namespace internal {


template <typename Arg>

struct matvec_mul_opt {

  // in general the optimization is not possible

  enum { is_possible = 0 };


  static matrix_cl_view view(const Arg&) { return matrix_cl_view::Entire; }


  static kernel_parts get_kernel_parts(

      const Arg& a, std::unordered_map<const void*, const char*>& generated,

      std::unordered_map<const void*, const char*>& generated_all,

      name_generator& name_gen, const std::string& row_index_name,

      const std::string& col_index_name) {

    return {};

  }

};


template <typename Mat, typename VecT>

struct matvec_mul_opt<elt_multiply_<Mat, broadcast_<VecT, true, false>>> {

  // if the argument of rowwise reduction is multiplication with a broadcast

  // vector we can do the optimization

  enum { is_possible = 1 };

  using Arg = elt_multiply_<Mat, broadcast_<VecT, true, false>>;


  static matrix_cl_view view(const Arg& a) {

    return a.template get_arg<1>().template get_arg<0>().view();

  }


  static kernel_parts get_kernel_parts(

      const Arg& mul, std::unordered_map<const void*, const char*>& generated,

      std::unordered_map<const void*, const char*>& generated_all,

      name_generator& name_gen, const std::string& row_index_name,

      const std::string& col_index_name) {

    kernel_parts res{};

    if (generated.count(&mul) == 0) {

      mul.var_name_ = name_gen.generate();

      generated[&mul] = "";


      const auto& matrix = mul.template get_arg<0>();

      const auto& broadcast = mul.template get_arg<1>();

      res = matrix.get_kernel_parts(generated, generated_all, name_gen,

                                    row_index_name, col_index_name, true);

      if (generated.count(&broadcast) == 0) {

        broadcast.var_name_ = name_gen.generate();

        generated[&broadcast] = "";


        const auto& vec = broadcast.template get_arg<0>();

        std::string row_index_name_bc = row_index_name;

        std::string col_index_name_bc = col_index_name;

        broadcast.modify_argument_indices(row_index_name_bc, col_index_name_bc);

        res += vec.get_kernel_parts(generated, generated_all, name_gen,

                                    row_index_name_bc, col_index_name_bc, true);

        res += broadcast.generate(row_index_name, col_index_name, true,

                                  vec.var_name_);

      }

      res += mul.generate(row_index_name, col_index_name, true,

                          matrix.var_name_, broadcast.var_name_);

    }

    return res;

  }

};


}  // namespace internal


template <typename Derived, typename T, typename operation, bool PassZero>

class rowwise_reduction

    : public operation_cl<Derived, typename std::remove_reference_t<T>::Scalar,

                          T> {

 public:

  using T_no_ref = std::remove_reference_t<T>;

  using Scalar = typename T_no_ref::Scalar;

  using base = operation_cl<Derived, Scalar, T>;

  using base::var_name_;


 protected:

  std::string init_;


 public:

  using base::rows;

  explicit rowwise_reduction(T&& a, const std::string& init)

      : base(std::forward<T>(a)), init_(init) {}


  inline kernel_parts get_kernel_parts(

      std::unordered_map<const void*, const char*>& generated,

      std::unordered_map<const void*, const char*>& generated_all,

      name_generator& name_gen, const std::string& row_index_name,

      const std::string& col_index_name, bool view_handled) const {

    kernel_parts res{};

    if (generated.count(this) == 0) {

      this->var_name_ = name_gen.generate();

      generated[this] = "";


      std::unordered_map<const void*, const char*> generated2;

      if (PassZero && internal::matvec_mul_opt<T_no_ref>::is_possible) {

        res = internal::matvec_mul_opt<T_no_ref>::get_kernel_parts(

            this->template get_arg<0>(), generated2, generated_all, name_gen,

            row_index_name, var_name_ + "_j");

      } else {

        res = this->template get_arg<0>().get_kernel_parts(

            generated2, generated_all, name_gen, row_index_name,

            var_name_ + "_j", view_handled || PassZero);

      }

      kernel_parts my_part

          = generate(row_index_name, col_index_name, view_handled,

                     this->template get_arg<0>().var_name_);

      res += my_part;

      res.body = res.body_prefix + res.body;

      res.body_prefix = "";

    }

    return res;

  }


  inline kernel_parts generate(const std::string& row_index_name,

                               const std::string& col_index_name,

                               const bool view_handled,

                               const std::string& var_name_arg) const {

    kernel_parts res;

    res.body_prefix

        = type_str<Scalar>() + " " + var_name_ + " = " + init_ + ";\n";

    if (PassZero) {

      res.body_prefix += "int " + var_name_ + "_start = contains_nonzero("

                         + var_name_ + "_view, LOWER) ? 0 : " + row_index_name

                         + ";\n";

      if (internal::matvec_mul_opt<T_no_ref>::is_possible) {

        res.body_prefix += "int " + var_name_ + "_end_temp = contains_nonzero("

                           + var_name_ + "_view, UPPER) ? " + var_name_

                           + "_cols : min(" + var_name_ + "_cols, "

                           + row_index_name + " + 1);\n";

        res.body_prefix += "int " + var_name_ + "_end = contains_nonzero("

                           + var_name_ + "_vec_view, UPPER) ? " + var_name_

                           + "_end_temp : min(1, " + var_name_

                           + "_end_temp);\n";

      } else {

        res.body_prefix += "int " + var_name_ + "_end = contains_nonzero("

                           + var_name_ + "_view, UPPER) ? " + var_name_

                           + "_cols : min(" + var_name_ + "_cols, "

                           + row_index_name + " + 1);\n";

      }

      res.body_prefix += "for(int " + var_name_ + "_j = " + var_name_

                         + "_start; " + var_name_ + "_j < " + var_name_

                         + "_end; " + var_name_ + "_j++){\n";

    } else {

      res.body_prefix += "for(int " + var_name_ + "_j = 0; " + var_name_

                         + "_j < " + var_name_ + "_cols; " + var_name_

                         + "_j++){\n";

    }

    res.body += var_name_ + " = " + operation::generate(var_name_, var_name_arg)

                + ";\n}\n";

    res.args = "int " + var_name_ + "_view, int " + var_name_ + "_cols, ";

    if (PassZero && internal::matvec_mul_opt<T_no_ref>::is_possible) {

      res.args += "int " + var_name_ + "_vec_view, ";

    }

    return res;

  }


  inline void set_args(

      std::unordered_map<const void*, const char*>& generated,

      std::unordered_map<const void*, const char*>& generated_all,

      cl::Kernel& kernel, int& arg_num) const {

    if (generated.count(this) == 0) {

      generated[this] = "";

      std::unordered_map<const void*, const char*> generated2;

      this->template get_arg<0>().set_args(generated2, generated_all, kernel,

                                           arg_num);

      kernel.setArg(arg_num++, this->template get_arg<0>().view());

      kernel.setArg(arg_num++, this->template get_arg<0>().cols());

      if (PassZero && internal::matvec_mul_opt<T>::is_possible) {

        kernel.setArg(arg_num++, internal::matvec_mul_opt<T_no_ref>::view(

                                     this->template get_arg<0>()));

      }

    }

  }


  inline int cols() const { return 1; }


  inline std::pair<int, int> extreme_diagonals() const {

    return {-rows() + 1, cols() - 1};

  }

};


struct sum_op {

  inline static std::string generate(const std::string& a,

                                     const std::string& b) {

    return a + " + " + b;

  }

};


template <typename T>

class rowwise_sum_

    : public rowwise_reduction<rowwise_sum_<T>, T, sum_op, true> {

  using base = rowwise_reduction<rowwise_sum_<T>, T, sum_op, true>;

  using base::arguments_;


 public:

  explicit rowwise_sum_(T&& a) : base(std::forward<T>(a), "0") {}


  inline auto deep_copy() const {

    auto&& arg_copy = this->template get_arg<0>().deep_copy();

    return rowwise_sum_<std::remove_reference_t<decltype(arg_copy)>>(

        std::move(arg_copy));

  }

};


template <typename T,

          typename = require_all_kernel_expressions_and_none_scalar_t<T>>

inline auto rowwise_sum(T&& a) {

  auto&& arg_copy = as_operation_cl(std::forward<T>(a)).deep_copy();

  return rowwise_sum_<std::remove_reference_t<decltype(arg_copy)>>(

      std::move(arg_copy));

}


struct prod_op {

  inline static std::string generate(const std::string& a,

                                     const std::string& b) {

    return a + " * " + b;

  }

};


template <typename T>

class rowwise_prod_

    : public rowwise_reduction<rowwise_prod_<T>, T, prod_op, false> {

  using base = rowwise_reduction<rowwise_prod_<T>, T, prod_op, false>;

  using base::arguments_;


 public:

  explicit rowwise_prod_(T&& a) : base(std::forward<T>(a), "1") {}


  inline auto deep_copy() const {

    auto&& arg_copy = this->template get_arg<0>().deep_copy();

    return rowwise_prod_<std::remove_reference_t<decltype(arg_copy)>>(

        std::move(arg_copy));

  }

};


template <typename T,

          typename = require_all_kernel_expressions_and_none_scalar_t<T>>

inline auto rowwise_prod(T&& a) {

  auto&& arg_copy = as_operation_cl(std::forward<T>(a)).deep_copy();

  return rowwise_prod_<std::remove_reference_t<decltype(arg_copy)>>(

      std::move(arg_copy));

}


template <typename T>

struct max_op {

  inline static std::string generate(const std::string& a,

                                     const std::string& b) {

    if (std::is_floating_point<T>()) {

      return "fmax(" + a + ", " + b + ")";

    }

    return "max(" + a + ", " + b + ")";

  }


  inline static std::string init() {

    if (std::is_floating_point<T>()) {

      return "-INFINITY";

    }

    return "INT_MIN";

  }

};


template <typename T>

class rowwise_max_

    : public rowwise_reduction<

          rowwise_max_<T>, T,

          max_op<typename std::remove_reference_t<T>::Scalar>, false> {

  using op = max_op<typename std::remove_reference_t<T>::Scalar>;

  using base = rowwise_reduction<rowwise_max_<T>, T, op, false>;

  using base::arguments_;


 public:

  explicit rowwise_max_(T&& a) : base(std::forward<T>(a), op::init()) {}

  inline auto deep_copy() const {

    auto&& arg_copy = this->template get_arg<0>().deep_copy();

    return rowwise_max_<std::remove_reference_t<decltype(arg_copy)>>(

        std::move(arg_copy));

  }

};


template <typename T,

          typename = require_all_kernel_expressions_and_none_scalar_t<T>>

inline auto rowwise_max(T&& a) {

  auto&& arg_copy = as_operation_cl(std::forward<T>(a)).deep_copy();

  return rowwise_max_<std::remove_reference_t<decltype(arg_copy)>>(

      std::move(arg_copy));

}

template <typename T>

struct min_op {

  inline static std::string generate(const std::string& a,

                                     const std::string& b) {

    if (std::is_floating_point<T>()) {

      return "fmin(" + a + ", " + b + ")";

    }

    return "min(" + a + ", " + b + ")";

  }


  inline static std::string init() {

    if (std::is_floating_point<T>()) {

      return "INFINITY";

    }

    return "INT_MAX";

  }

};


template <typename T>

class rowwise_min_

    : public rowwise_reduction<

          rowwise_min_<T>, T,

          min_op<typename std::remove_reference_t<T>::Scalar>, false> {

  using op = min_op<typename std::remove_reference_t<T>::Scalar>;

  using base = rowwise_reduction<rowwise_min_<T>, T, op, false>;

  using base::arguments_;


 public:

  explicit rowwise_min_(T&& a) : base(std::forward<T>(a), op::init()) {}

  inline auto deep_copy() const {

    auto&& arg_copy = this->template get_arg<0>().deep_copy();

    return rowwise_min_<std::remove_reference_t<decltype(arg_copy)>>(

        std::move(arg_copy));

  }

};


template <typename T,

          typename = require_all_kernel_expressions_and_none_scalar_t<T>>

inline auto rowwise_min(T&& a) {

  auto&& arg_copy = as_operation_cl(std::forward<T>(a)).deep_copy();

  return rowwise_min_<std::remove_reference_t<decltype(arg_copy)>>(

      std::move(arg_copy));

}

}  // namespace math

}  // namespace stan


#endif

#endif

as_operation_cl.hpp

binary_operation.hpp

broadcast.hpp

stan::math::binary_operation::generate
kernel_parts generate(const std::string &row_index_name, const std::string &col_index_name, const bool view_handled, const std::string &var_name_a, const std::string &var_name_b) const
Generates kernel code for this expression.
Definition binary_operation.hpp:74

stan::math::binary_operation::var_name_
std::string var_name_
Definition operation_cl.hpp:111

stan::math::broadcast_
Represents a broadcasting operation in kernel generator expressions.
Definition broadcast.hpp:33

stan::math::elt_multiply_
Definition binary_operation.hpp:204

stan::math::name_generator::generate
std::string generate()
Generates a unique variable name.
Definition name_generator.hpp:22

stan::math::name_generator
Unique name generator for variables used in generated kernels.
Definition name_generator.hpp:16

stan::math::operation_cl::view
matrix_cl_view view() const
View of a matrix that would be the result of evaluating this expression.
Definition operation_cl.hpp:435

stan::math::operation_cl::var_name_
std::string var_name_
Definition operation_cl.hpp:111

stan::math::operation_cl::arguments_
std::tuple< Args... > arguments_
Definition operation_cl.hpp:110

stan::math::operation_cl::rows
int rows() const
Number of rows of a matrix that would be the result of evaluating this expression.
Definition operation_cl.hpp:367

stan::math::operation_cl
Base for all kernel generator operations.
Definition operation_cl.hpp:103

stan::math::rowwise_max_::rowwise_max_
rowwise_max_(T &&a)
Definition rowwise_reduction.hpp:432

stan::math::rowwise_max_::deep_copy
auto deep_copy() const
Creates a deep copy of this expression.
Definition rowwise_reduction.hpp:437

stan::math::rowwise_max_::op
max_op< typename std::remove_reference_t< T >::Scalar > op
Definition rowwise_reduction.hpp:427

stan::math::rowwise_max_
Represents rowwise max reduction in kernel generator expressions.
Definition rowwise_reduction.hpp:426

stan::math::rowwise_min_::op
min_op< typename std::remove_reference_t< T >::Scalar > op
Definition rowwise_reduction.hpp:494

stan::math::rowwise_min_::rowwise_min_
rowwise_min_(T &&a)
Definition rowwise_reduction.hpp:499

stan::math::rowwise_min_::deep_copy
auto deep_copy() const
Creates a deep copy of this expression.
Definition rowwise_reduction.hpp:504

stan::math::rowwise_min_
Represents rowwise min reduction in kernel generator expressions.
Definition rowwise_reduction.hpp:493

stan::math::rowwise_prod_::rowwise_prod_
rowwise_prod_(T &&a)
Definition rowwise_reduction.hpp:363

stan::math::rowwise_prod_::deep_copy
auto deep_copy() const
Creates a deep copy of this expression.
Definition rowwise_reduction.hpp:369

stan::math::rowwise_prod_
Represents rowwise product reduction in kernel generator expressions.
Definition rowwise_reduction.hpp:358

stan::math::rowwise_reduction::init_
std::string init_
Definition rowwise_reduction.hpp:132

stan::math::rowwise_reduction::generate
kernel_parts generate(const std::string &row_index_name, const std::string &col_index_name, const bool view_handled, const std::string &var_name_arg) const
Generates kernel code for this expression.
Definition rowwise_reduction.hpp:195

stan::math::rowwise_reduction::set_args
void set_args(std::unordered_map< const void *, const char * > &generated, std::unordered_map< const void *, const char * > &generated_all, cl::Kernel &kernel, int &arg_num) const
Sets kernel arguments for this and nested expressions.
Definition rowwise_reduction.hpp:248

stan::math::rowwise_reduction::extreme_diagonals
std::pair< int, int > extreme_diagonals() const
Determine indices of extreme sub- and superdiagonals written.
Definition rowwise_reduction.hpp:277

stan::math::rowwise_reduction::get_kernel_parts
kernel_parts get_kernel_parts(std::unordered_map< const void *, const char * > &generated, std::unordered_map< const void *, const char * > &generated_all, name_generator &name_gen, const std::string &row_index_name, const std::string &col_index_name, bool view_handled) const
Generates kernel code for this and nested expressions.
Definition rowwise_reduction.hpp:156

stan::math::rowwise_reduction::Scalar
typename T_no_ref::Scalar Scalar
Definition rowwise_reduction.hpp:127

stan::math::rowwise_reduction::var_name_
std::string var_name_
Definition operation_cl.hpp:111

stan::math::rowwise_reduction::T_no_ref
std::remove_reference_t< T > T_no_ref
Definition rowwise_reduction.hpp:126

stan::math::rowwise_reduction::rowwise_reduction
rowwise_reduction(T &&a, const std::string &init)
Constructor.
Definition rowwise_reduction.hpp:141

stan::math::rowwise_reduction::cols
int cols() const
Number of columns of a matrix that would be the result of evaluating this expression.
Definition rowwise_reduction.hpp:271

stan::math::rowwise_reduction::rows
int rows() const
Number of rows of a matrix that would be the result of evaluating this expression.
Definition operation_cl.hpp:367

stan::math::rowwise_reduction
Represents a rowwise reduction in kernel generator expressions.
Definition rowwise_reduction.hpp:124

stan::math::rowwise_sum_::deep_copy
auto deep_copy() const
Creates a deep copy of this expression.
Definition rowwise_reduction.hpp:315

stan::math::rowwise_sum_::rowwise_sum_
rowwise_sum_(T &&a)
Definition rowwise_reduction.hpp:309

stan::math::rowwise_sum_
Represents rowwise sum reduction in kernel generator expressions.
Definition rowwise_reduction.hpp:304

stan::math::rowwise_max
auto rowwise_max(T &&a)
Rowwise max reduction of a kernel generator expression.
Definition rowwise_reduction.hpp:452

stan::math::rowwise_min
auto rowwise_min(T &&a)
Min reduction of a kernel generator expression.
Definition rowwise_reduction.hpp:519

stan::math::rowwise_sum
auto rowwise_sum(T &&a)
Rowwise sum reduction of a kernel generator expression.
Definition rowwise_reduction.hpp:330

stan::math::rowwise_prod
auto rowwise_prod(T &&a)
Rowwise product reduction of a kernel generator expression.
Definition rowwise_reduction.hpp:384

stan::math::broadcast
auto broadcast(T &&a)
Broadcast an expression in specified dimension(s).
Definition broadcast.hpp:125

stan::math::as_operation_cl
T_operation && as_operation_cl(T_operation &&a)
Converts any valid kernel generator expression into an operation.
Definition as_operation_cl.hpp:31

stan::require_all_kernel_expressions_and_none_scalar_t
require_all_t< is_kernel_expression_and_not_scalar< Types >... > require_all_kernel_expressions_and_none_scalar_t
Enables a template if all given types are non-scalar types that are a valid kernel generator expressi...
Definition is_kernel_expression.hpp:58

matrix_cl_view.hpp

name_generator.hpp

stan::math::matrix_cl_view
matrix_cl_view
Definition matrix_cl_view.hpp:11

stan::math::matrix_cl_view::Entire
@ Entire

stan
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Definition unit_vector_constrain.hpp:15

std
STL namespace.

operation_cl.hpp

meta.hpp

stan::math::internal::matvec_mul_opt< elt_multiply_< Mat, broadcast_< VecT, true, false > > >::view
static matrix_cl_view view(const Arg &a)
Return view of the vector.
Definition rowwise_reduction.hpp:55

stan::math::internal::matvec_mul_opt< elt_multiply_< Mat, broadcast_< VecT, true, false > > >::get_kernel_parts
static kernel_parts get_kernel_parts(const Arg &mul, std::unordered_map< const void *, const char * > &generated, std::unordered_map< const void *, const char * > &generated_all, name_generator &name_gen, const std::string &row_index_name, const std::string &col_index_name)
Generates kernel code for the argument of rowwise reduction, applying the optimization - ignoring the...
Definition rowwise_reduction.hpp:73

stan::math::internal::matvec_mul_opt::is_possible
@ is_possible
Definition rowwise_reduction.hpp:29

stan::math::internal::matvec_mul_opt::view
static matrix_cl_view view(const Arg &)
Definition rowwise_reduction.hpp:31

stan::math::internal::matvec_mul_opt::get_kernel_parts
static kernel_parts get_kernel_parts(const Arg &a, std::unordered_map< const void *, const char * > &generated, std::unordered_map< const void *, const char * > &generated_all, name_generator &name_gen, const std::string &row_index_name, const std::string &col_index_name)
Definition rowwise_reduction.hpp:33

stan::math::internal::matvec_mul_opt
Implementation of an optimization for usage of rowwise reduction in matrix-vector multiplication.
Definition rowwise_reduction.hpp:27

stan::math::kernel_parts::body
std::string body
Definition operation_cl.hpp:41

stan::math::kernel_parts::body_prefix
std::string body_prefix
Definition operation_cl.hpp:39

stan::math::kernel_parts::args
std::string args
Definition operation_cl.hpp:50

stan::math::kernel_parts
Parts of an OpenCL kernel, generated by an expression.
Definition operation_cl.hpp:32

stan::math::max_op::generate
static std::string generate(const std::string &a, const std::string &b)
Generates max reduction kernel code.
Definition rowwise_reduction.hpp:402

stan::math::max_op::init
static std::string init()
Definition rowwise_reduction.hpp:410

stan::math::max_op
Operation for max reduction.
Definition rowwise_reduction.hpp:395

stan::math::min_op::generate
static std::string generate(const std::string &a, const std::string &b)
Generates min reduction kernel code.
Definition rowwise_reduction.hpp:469

stan::math::min_op::init
static std::string init()
Definition rowwise_reduction.hpp:477

stan::math::min_op
Operation for min reduction.
Definition rowwise_reduction.hpp:462

stan::math::prod_op::generate
static std::string generate(const std::string &a, const std::string &b)
Generates prod reduction kernel code.
Definition rowwise_reduction.hpp:346

stan::math::prod_op
Operation for product reduction.
Definition rowwise_reduction.hpp:339

stan::math::sum_op::generate
static std::string generate(const std::string &a, const std::string &b)
Generates sum reduction kernel code.
Definition rowwise_reduction.hpp:292

stan::math::sum_op
Operation for sum reduction.
Definition rowwise_reduction.hpp:285

type_str.hpp