math/opencl_2prim_2dirichlet__lpdf_8hpp_source.html

#ifndef STAN_MATH_OPENCL_PRIM_DIRICHLET_LPDF_HPP

#define STAN_MATH_OPENCL_PRIM_DIRICHLET_LPDF_HPP

#ifdef STAN_OPENCL


#include <stan/math/prim/meta.hpp>

#include <stan/math/prim/err.hpp>

#include <stan/math/prim/fun/constants.hpp>

#include <stan/math/prim/fun/elt_divide.hpp>

#include <stan/math/prim/fun/elt_multiply.hpp>

#include <stan/math/prim/fun/lgamma.hpp>

#include <stan/math/opencl/kernel_generator.hpp>

#include <stan/math/prim/functor/partials_propagator.hpp>

#include <stan/math/prim/err/constraint_tolerance.hpp>


namespace stan {

namespace math {


template <bool propto, typename T_prob_cl, typename T_prior_size_cl,

          require_all_prim_or_rev_kernel_expression_t<

              T_prob_cl, T_prior_size_cl>* = nullptr,

          require_any_not_stan_scalar_t<T_prob_cl, T_prior_size_cl>* = nullptr>

inline return_type_t<T_prob_cl, T_prior_size_cl> dirichlet_lpdf(

    const T_prob_cl& theta, const T_prior_size_cl& alpha) {

  static constexpr const char* function = "dirichlet_lpdf(OpenCL)";


  check_consistent_sizes(function, "probabilities", theta, "prior sample sizes",

                         alpha);


  if (max_size(theta, alpha) == 0) {

    return 0.0;

  }

  if (!include_summand<propto, T_prob_cl, T_prior_size_cl>::value) {

    return 0.0;

  }


  const auto& theta_val = value_of(theta);

  const auto& alpha_val = value_of(alpha);


  auto check_alpha_positive

      = check_cl(function, "prior sample sizes", alpha_val, "positive");

  auto alpha_positive = alpha_val > 0.0;

  auto check_theta_nonnegative

      = check_cl(function, "probabilities", theta_val, "nonnegative");

  auto theta_nonnegative = theta_val >= 0.0;


  auto theta_csum = colwise_sum(theta_val);

  auto alpha_m_1 = rowwise_optional_broadcast(alpha_val - 1.0);

  auto theta_log = rowwise_optional_broadcast(log(theta_val));


  matrix_cl<double> theta_csum_cl;

  matrix_cl<double> alpha_csum_cl;

  matrix_cl<double> lgamma_alpha_csum_cl;

  matrix_cl<double> theta_log_alpha_m_1_sum_cl;

  matrix_cl<double> theta_deriv_cl;

  matrix_cl<double> alpha_deriv_cl;

  if (theta.cols() > alpha.cols()) {

    auto alpha_csum = colwise_sum(alpha_val);

    auto lgamma_alpha_csum = colwise_sum(lgamma(alpha_val));

    matrix_cl<double> digamma_alpha_cl(alpha.rows(), alpha.cols());

    results(check_alpha_positive, alpha_csum_cl, lgamma_alpha_csum_cl,

            digamma_alpha_cl)

        = expressions(

            alpha_positive,

            calc_if<include_summand<propto, T_prior_size_cl>::value>(

                alpha_csum),

            calc_if<include_summand<propto, T_prior_size_cl>::value>(

                lgamma_alpha_csum),

            calc_if<!is_constant<T_prior_size_cl>::value>(digamma(alpha_val)));


    auto theta_log_alpha_m_1_sum = sum_2d(elt_multiply(theta_log, alpha_m_1));


    auto theta_deriv

        = elt_divide(alpha_m_1, rowwise_optional_broadcast(theta_val));

    auto alpha_deriv = theta_log - rowwise_optional_broadcast(digamma_alpha_cl);


    results(check_theta_nonnegative, theta_csum_cl, theta_log_alpha_m_1_sum_cl,

            theta_deriv_cl, alpha_deriv_cl)

        = expressions(

            theta_nonnegative, theta_csum,

            calc_if<include_summand<propto, T_prob_cl, T_prior_size_cl>::value>(

                theta_log_alpha_m_1_sum),

            calc_if<!is_constant<T_prob_cl>::value>(theta_deriv),

            calc_if<!is_constant<T_prior_size_cl>::value>(alpha_deriv));


    if (include_summand<propto, T_prior_size_cl>::value) {

      matrix_cl<double> alpha_csum_cl2;

      matrix_cl<double> lgamma_alpha_csum_cl2;

      while (alpha_csum_cl.rows() > 1) {

        results(alpha_csum_cl2, lgamma_alpha_csum_cl2) = expressions(

            calc_if<include_summand<propto, T_prior_size_cl>::value>(

                colwise_sum(alpha_csum_cl)),

            calc_if<include_summand<propto, T_prior_size_cl>::value>(

                colwise_sum(lgamma_alpha_csum_cl)));

        alpha_csum_cl = std::move(alpha_csum_cl2);

        lgamma_alpha_csum_cl = std::move(lgamma_alpha_csum_cl2);

      }

    }

    while (theta_csum_cl.rows() > 1) {

      theta_csum_cl = colwise_sum(theta_csum_cl).eval();

    }

  } else {

    auto alpha_csum = colwise_sum(alpha_val);

    auto lgamma_alpha_csum = colwise_sum(lgamma(alpha_val));

    if (alpha.cols() > theta.cols()) {

      matrix_cl<double> log_theta_cl(theta.rows(), theta.cols());

      results(check_theta_nonnegative, theta_csum_cl, log_theta_cl)

          = expressions(

              theta_nonnegative, theta_csum,

              calc_if<

                  include_summand<propto, T_prob_cl, T_prior_size_cl>::value>(

                  theta_log));


      auto log_theta_bc = rowwise_optional_broadcast(log_theta_cl);

      auto theta_log_alpha_m_1_sum

          = sum_2d(elt_multiply(log_theta_bc, alpha_m_1));


      auto theta_deriv

          = elt_divide(alpha_m_1, rowwise_optional_broadcast(theta_val));

      auto alpha_deriv

          = log_theta_bc - rowwise_optional_broadcast(digamma(alpha_val));


      results(check_alpha_positive, alpha_csum_cl, lgamma_alpha_csum_cl,

              theta_log_alpha_m_1_sum_cl, theta_deriv_cl, alpha_deriv_cl)

          = expressions(

              alpha_positive,

              calc_if<include_summand<propto, T_prior_size_cl>::value>(

                  alpha_csum),

              calc_if<include_summand<propto, T_prior_size_cl>::value>(

                  lgamma_alpha_csum),

              calc_if<

                  include_summand<propto, T_prob_cl, T_prior_size_cl>::value>(

                  theta_log_alpha_m_1_sum),

              calc_if<!is_constant<T_prob_cl>::value>(theta_deriv),

              calc_if<!is_constant<T_prior_size_cl>::value>(alpha_deriv));


      while (alpha_csum_cl.rows() > 1) {

        matrix_cl<double> alpha_csum_cl2;

        matrix_cl<double> lgamma_alpha_csum_cl2;

        results(alpha_csum_cl2, lgamma_alpha_csum_cl2) = expressions(

            calc_if<include_summand<propto, T_prior_size_cl>::value>(

                colwise_sum(alpha_csum_cl)),

            calc_if<include_summand<propto, T_prior_size_cl>::value>(

                colwise_sum(lgamma_alpha_csum_cl)));

        if (include_summand<propto, T_prior_size_cl>::value) {

          alpha_csum_cl = std::move(alpha_csum_cl2);

          lgamma_alpha_csum_cl = std::move(lgamma_alpha_csum_cl2);

        }

      }

      double theta_sum = sum(from_matrix_cl(theta_csum));

      check_cl(function, "sum of probabilities", theta_sum, "equal to 1")

          = (fabs(theta_sum - 1.0) <= CONSTRAINT_TOLERANCE);

    } else {

      auto theta_log_alpha_m_1_sum = sum_2d(elt_multiply(theta_log, alpha_m_1));


      auto theta_deriv

          = elt_divide(alpha_m_1, rowwise_optional_broadcast(theta_val));

      auto alpha_deriv

          = theta_log - rowwise_optional_broadcast(digamma(alpha_val));


      results(check_alpha_positive, check_theta_nonnegative, theta_csum_cl,

              alpha_csum_cl, lgamma_alpha_csum_cl, theta_log_alpha_m_1_sum_cl,

              theta_deriv_cl, alpha_deriv_cl)

          = expressions(

              alpha_positive, theta_nonnegative, theta_csum,

              calc_if<include_summand<propto, T_prior_size_cl>::value>(

                  alpha_csum),

              calc_if<include_summand<propto, T_prior_size_cl>::value>(

                  lgamma_alpha_csum),

              calc_if<

                  include_summand<propto, T_prob_cl, T_prior_size_cl>::value>(

                  theta_log_alpha_m_1_sum),

              calc_if<!is_constant<T_prob_cl>::value>(theta_deriv),

              calc_if<!is_constant<T_prior_size_cl>::value>(alpha_deriv));


      while (theta_csum_cl.rows() > 1) {

        matrix_cl<double> theta_csum_cl2;

        matrix_cl<double> alpha_csum_cl2;

        matrix_cl<double> lgamma_alpha_csum_cl2;

        results(theta_csum_cl2, alpha_csum_cl2, lgamma_alpha_csum_cl2)

            = expressions(

                colwise_sum(theta_csum_cl),

                calc_if<include_summand<propto, T_prior_size_cl>::value>(

                    colwise_sum(alpha_csum_cl)),

                calc_if<include_summand<propto, T_prior_size_cl>::value>(

                    colwise_sum(lgamma_alpha_csum_cl)));

        theta_csum_cl = std::move(theta_csum_cl2);

        if (include_summand<propto, T_prior_size_cl>::value) {

          alpha_csum_cl = std::move(alpha_csum_cl2);

          lgamma_alpha_csum_cl = std::move(lgamma_alpha_csum_cl2);

        }

      }

    }

  }


  if (theta.cols() >= alpha.cols()) {

    // transpose is there just because working on col vectors is more efficient

    // than on row vectors with kernel generator

    check_cl(function, "sum of probabilities", transpose(theta_csum_cl),

             "equal to 1")

        = (fabs(transpose(theta_csum_cl) - 1.0) <= CONSTRAINT_TOLERANCE);

  }


  double lp = 0.0;


  if (include_summand<propto, T_prior_size_cl>::value) {

    if (theta.cols() > alpha.cols()) {

      lp += (lgamma(from_matrix_cl(alpha_csum_cl)) * theta.cols()

             - from_matrix_cl(lgamma_alpha_csum_cl) * theta.cols())

                .sum();

    } else {

      lp += (lgamma(from_matrix_cl(alpha_csum_cl))

             - from_matrix_cl(lgamma_alpha_csum_cl))

                .sum();

    }

  }

  if (include_summand<propto, T_prob_cl, T_prior_size_cl>::value) {

    lp += from_matrix_cl(theta_log_alpha_m_1_sum_cl).sum();

  }


  auto ops_partials = make_partials_propagator(theta, alpha);


  if (!is_constant<T_prob_cl>::value) {

    if (theta.cols() < alpha.cols()) {

      partials<0>(ops_partials) = rowwise_sum(theta_deriv_cl);

    } else {

      partials<0>(ops_partials) = std::move(theta_deriv_cl);

    }

  }

  if (!is_constant<T_prior_size_cl>::value) {

    if (theta.cols() > alpha.cols()) {

      matrix_cl<double> tmp_cl

          = digamma(alpha_csum_cl) * static_cast<double>(theta.cols());

      partials<1>(ops_partials)

          = colwise_broadcast(tmp_cl) + rowwise_sum(alpha_deriv_cl);

    } else {

      matrix_cl<double> tmp_cl = digamma(alpha_csum_cl);

      partials<1>(ops_partials) = colwise_broadcast(tmp_cl) + alpha_deriv_cl;

    }

  }

  return ops_partials.build(lp);

}


}  // namespace math

}  // namespace stan

#endif

#endif

stan::math::matrix_cl::rows
int rows() const
Definition matrix_cl.hpp:64

stan::math::matrix_cl
Represents an arithmetic matrix on the OpenCL device.
Definition matrix_cl.hpp:47

constants.hpp

constraint_tolerance.hpp

stan::math::elt_multiply
elt_multiply_< as_operation_cl_t< T_a >, as_operation_cl_t< T_b > > elt_multiply(T_a &&a, T_b &&b)
Definition binary_operation.hpp:204

stan::math::sum_2d
auto sum_2d(T &&a)
Two dimensional sum - reduction of a kernel generator expression.
Definition reduction_2d.hpp:233

stan::math::check_cl
auto check_cl(const char *function, const char *var_name, T &&y, const char *must_be)
Constructs a check on opencl matrix or expression.
Definition check_cl.hpp:219

stan::math::results
results_cl< T_results... > results(T_results &&... results)
Deduces types for constructing results_cl object.
Definition multi_result_kernel.hpp:668

stan::math::transpose
auto transpose(Arg &&a)
Transposes a kernel generator expression.
Definition transpose.hpp:139

stan::math::elt_divide
elt_divide_< as_operation_cl_t< T_a >, as_operation_cl_t< T_b > > elt_divide(T_a &&a, T_b &&b)
Definition binary_operation.hpp:209

stan::math::rowwise_sum
auto rowwise_sum(T &&a)
Rowwise sum reduction of a kernel generator expression.
Definition rowwise_reduction.hpp:330

stan::math::calc_if
calc_if_< true, as_operation_cl_t< T > > calc_if(T &&a)
Definition calc_if.hpp:121

stan::math::colwise_broadcast
auto colwise_broadcast(T &&a)
Broadcast an expression in colwise dimmension.
Definition broadcast.hpp:163

stan::math::colwise_sum
auto colwise_sum(T &&a)
Column wise sum - reduction of a kernel generator expression.
Definition colwise_reduction.hpp:224

stan::math::expressions
expressions_cl< T_expressions... > expressions(T_expressions &&... expressions)
Deduces types for constructing expressions_cl object.
Definition multi_result_kernel.hpp:289

stan::math::dirichlet_lpdf
return_type_t< T_prob_cl, T_prior_size_cl > dirichlet_lpdf(const T_prob_cl &theta, const T_prior_size_cl &alpha)
The log of the Dirichlet density for the given theta and a vector of prior sample sizes,...
Definition dirichlet_lpdf.hpp:61

stan::math::from_matrix_cl
auto from_matrix_cl(const T &src)
Copies the source matrix that is stored on the OpenCL device to the destination Eigen matrix.
Definition copy.hpp:61

stan::require_all_prim_or_rev_kernel_expression_t
require_all_t< is_prim_or_rev_kernel_expression< std::decay_t< Types > >... > require_all_prim_or_rev_kernel_expression_t
Require type satisfies is_prim_or_rev_kernel_expression.
Definition is_kernel_expression.hpp:148

stan::return_type_t
typename return_type< Ts... >::type return_type_t
Convenience type for the return type of the specified template parameters.
Definition return_type.hpp:218

kernel_generator.hpp

stan::math::value_of
T value_of(const fvar< T > &v)
Return the value of the specified variable.
Definition value_of.hpp:18

stan::math::log
fvar< T > log(const fvar< T > &x)
Definition log.hpp:18

stan::math::rowwise_optional_broadcast
auto rowwise_optional_broadcast(T &&a)
Broadcast an expression in rowwise dimmension if the number of columns equals to 1.
Definition optional_broadcast.hpp:223

stan::math::check_consistent_sizes
void check_consistent_sizes(const char *)
Trivial no input case, this function is a no-op.
Definition check_consistent_sizes.hpp:15

stan::math::lgamma
fvar< T > lgamma(const fvar< T > &x)
Return the natural logarithm of the gamma function applied to the specified argument.
Definition lgamma.hpp:21

stan::math::sum
auto sum(const std::vector< T > &m)
Return the sum of the entries of the specified standard vector.
Definition sum.hpp:23

stan::math::max_size
int64_t max_size(const T1 &x1, const Ts &... xs)
Calculate the size of the largest input.
Definition max_size.hpp:20

stan::math::CONSTRAINT_TOLERANCE
const double CONSTRAINT_TOLERANCE
The tolerance for checking arithmetic bounds in rank and in simplexes.
Definition constraint_tolerance.hpp:18

stan::math::make_partials_propagator
auto make_partials_propagator(Ops &&... ops)
Construct an partials_propagator.
Definition partials_propagator.hpp:119

stan::math::digamma
fvar< T > digamma(const fvar< T > &x)
Return the derivative of the log gamma function at the specified argument.
Definition digamma.hpp:23

stan::math::fabs
fvar< T > fabs(const fvar< T > &x)
Definition fabs.hpp:16

stan
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Definition unit_vector_constrain.hpp:15

err.hpp

elt_divide.hpp

elt_multiply.hpp

lgamma.hpp

partials_propagator.hpp

meta.hpp

stan::is_constant
Metaprogramming struct to detect whether a given type is constant in the mathematical sense (not the ...
Definition is_constant.hpp:30

stan::math::include_summand
Template metaprogram to calculate whether a summand needs to be included in a proportional (log) prob...
Definition include_summand.hpp:39