math/prim_2prob_2categorical__logit__glm__lpmf_8hpp_source.html

#ifndef STAN_MATH_PRIM_PROB_CATEGORICAL_LOGIT_GLM_LPMF_HPP

#define STAN_MATH_PRIM_PROB_CATEGORICAL_LOGIT_GLM_LPMF_HPP


#include <stan/math/prim/meta.hpp>

#include <stan/math/prim/err.hpp>

#include <stan/math/prim/fun/as_column_vector_or_scalar.hpp>

#include <stan/math/prim/fun/as_array_or_scalar.hpp>

#include <stan/math/prim/fun/exp.hpp>

#include <stan/math/prim/fun/isfinite.hpp>

#include <stan/math/prim/fun/log.hpp>

#include <stan/math/prim/fun/scalar_seq_view.hpp>

#include <stan/math/prim/fun/size.hpp>

#include <stan/math/prim/fun/size_zero.hpp>

#include <stan/math/prim/fun/to_ref.hpp>

#include <stan/math/prim/fun/value_of.hpp>

#include <stan/math/prim/functor/partials_propagator.hpp>

#include <stan/math/prim/fun/Eigen.hpp>

#include <cmath>


namespace stan {

namespace math {


template <bool propto, typename T_y, typename T_x, typename T_alpha,

          typename T_beta, require_matrix_t<T_x>* = nullptr,

          require_col_vector_t<T_alpha>* = nullptr,

          require_matrix_t<T_beta>* = nullptr>

return_type_t<T_x, T_alpha, T_beta> categorical_logit_glm_lpmf(

    const T_y& y, const T_x& x, const T_alpha& alpha, const T_beta& beta) {

  using T_partials_return = partials_return_t<T_x, T_alpha, T_beta>;

  using Eigen::Array;

  using Eigen::Dynamic;

  using Eigen::Matrix;

  using std::exp;

  using std::isfinite;

  using std::log;

  using T_y_ref = ref_type_t<T_y>;

  using T_x_ref = ref_type_if_not_constant_t<T_x>;

  using T_alpha_ref = ref_type_if_not_constant_t<T_alpha>;

  using T_beta_ref = ref_type_if_not_constant_t<T_beta>;

  using T_beta_partials = partials_type_t<scalar_type_t<T_beta>>;

  constexpr int T_x_rows = T_x::RowsAtCompileTime;


  const size_t N_instances = T_x_rows == 1 ? stan::math::size(y) : x.rows();

  const size_t N_attributes = x.cols();

  const size_t N_classes = beta.cols();


  static constexpr const char* function = "categorical_logit_glm_lpmf";

  check_consistent_size(function, "Vector of dependent variables", y,

                        N_instances);

  check_consistent_size(function, "Intercept vector", alpha, N_classes);

  check_size_match(function, "x.cols()", N_attributes, "beta.rows()",

                   beta.rows());

  if (size_zero(y) || N_classes == 1) {

    return 0;

  }

  T_y_ref y_ref = y;

  check_bounded(function, "categorical outcome out of support", y_ref, 1,

                N_classes);


  if (!include_summand<propto, T_x, T_alpha, T_beta>::value) {

    return 0;

  }


  T_x_ref x_ref = x;

  T_alpha_ref alpha_ref = alpha;

  T_beta_ref beta_ref = beta;


  const auto& x_val = to_ref_if<!is_constant<T_beta>::value>(value_of(x_ref));

  const auto& alpha_val = value_of(alpha_ref);

  const auto& beta_val

      = to_ref_if<!is_constant<T_x>::value>(value_of(beta_ref));


  const auto& alpha_val_vec = as_column_vector_or_scalar(alpha_val).transpose();


  Array<T_partials_return, T_x_rows, Dynamic> lin

      = (x_val * beta_val).rowwise() + alpha_val_vec;

  Array<T_partials_return, T_x_rows, 1> lin_max

      = lin.rowwise().maxCoeff();  // This is used to prevent overflow when

                                   // calculating softmax/log_sum_exp and

                                   // similar expressions

  Array<T_partials_return, T_x_rows, Dynamic> exp_lin

      = exp(lin.colwise() - lin_max);

  Array<T_partials_return, T_x_rows, 1> inv_sum_exp_lin

      = 1 / exp_lin.rowwise().sum();


  T_partials_return logp = log(inv_sum_exp_lin).sum() - lin_max.sum();

  if (T_x_rows == 1) {

    logp *= N_instances;

  }

  scalar_seq_view<T_y_ref> y_seq(y_ref);

  for (int i = 0; i < N_instances; i++) {

    if (T_x_rows == 1) {

      logp += lin(0, y_seq[i] - 1);

    } else {

      logp += lin(i, y_seq[i] - 1);

    }

  }

  // TODO(Tadej) maybe we can replace previous block with the following line

  // when we have newer Eigen  T_partials_return logp =

  // lin(Eigen::all,y-1).sum() + log(inv_sum_exp_lin).sum() - lin_max.sum();


  if (!isfinite(logp)) {

    check_finite(function, "Weight vector", beta_ref);

    check_finite(function, "Intercept", alpha_ref);

    check_finite(function, "Matrix of independent variables", x_ref);

  }


  // Compute the derivatives.

  auto ops_partials = make_partials_propagator(x_ref, alpha_ref, beta_ref);


  if (!is_constant_all<T_x>::value) {

    if (T_x_rows == 1) {

      Array<T_beta_partials, 1, Dynamic> beta_y = beta_val.col(y_seq[0] - 1);

      for (int i = 1; i < N_instances; i++) {

        beta_y += beta_val.col(y_seq[i] - 1).array();

      }

      edge<0>(ops_partials).partials_

          = beta_y

            - (exp_lin.matrix() * beta_val.transpose()).array().colwise()

                  * inv_sum_exp_lin * N_instances;

    } else {

      Array<T_beta_partials, Dynamic, Dynamic> beta_y(N_instances,

                                                      N_attributes);

      for (int i = 0; i < N_instances; i++) {

        beta_y.row(i) = beta_val.col(y_seq[i] - 1);

      }

      edge<0>(ops_partials).partials_

          = beta_y

            - (exp_lin.matrix() * beta_val.transpose()).array().colwise()

                  * inv_sum_exp_lin;

      // TODO(Tadej) maybe we can replace previous block with the following

      // line when we have newer Eigen  partials<0>(ops_partials) = beta_val(y

      // - 1, all) - (exp_lin.matrix() * beta.transpose()).colwise() *

      // inv_sum_exp_lin;

    }

  }

  if (!is_constant_all<T_alpha, T_beta>::value) {

    Array<T_partials_return, T_x_rows, Dynamic> neg_softmax_lin

        = exp_lin.colwise() * -inv_sum_exp_lin;

    if (!is_constant_all<T_alpha>::value) {

      if (T_x_rows == 1) {

        edge<1>(ops_partials).partials_

            = neg_softmax_lin.colwise().sum() * N_instances;

      } else {

        partials<1>(ops_partials) = neg_softmax_lin.colwise().sum();

      }

      for (int i = 0; i < N_instances; i++) {

        partials<1>(ops_partials)[y_seq[i] - 1] += 1;

      }

    }

    if (!is_constant_all<T_beta>::value) {

      Matrix<T_partials_return, Dynamic, Dynamic> beta_derivative

          = x_val.transpose().template cast<T_partials_return>()

            * neg_softmax_lin.matrix();

      if (T_x_rows == 1) {

        beta_derivative *= N_instances;

      }


      for (int i = 0; i < N_instances; i++) {

        if (T_x_rows == 1) {

          beta_derivative.col(y_seq[i] - 1) += x_val;

        } else {

          beta_derivative.col(y_seq[i] - 1) += x_val.row(i);

        }

      }

      // TODO(Tadej) maybe we can replace previous loop with the following

      // line when we have newer Eigen  partials<2>(ops_partials)(Eigen::all,

      // y

      // - 1) += x_val.colwise.sum().transpose();


      partials<2>(ops_partials) = std::move(beta_derivative);

    }

  }

  return ops_partials.build(logp);

}


template <typename T_y, typename T_x, typename T_alpha, typename T_beta>

return_type_t<T_x, T_alpha, T_beta> categorical_logit_glm_lpmf(

    const T_y& y, const T_x& x, const T_alpha& alpha, const T_beta& beta) {

  return categorical_logit_glm_lpmf<false>(y, x, alpha, beta);

}


}  // namespace math

}  // namespace stan


#endif

Eigen.hpp

stan::scalar_seq_view
scalar_seq_view provides a uniform sequence-like wrapper around either a scalar or a sequence of scal...
Definition scalar_seq_view.hpp:18

stan::math::isfinite
isfinite_< as_operation_cl_t< T > > isfinite(T &&a)
Definition elt_function_cl.hpp:332

stan::math::as_column_vector_or_scalar
auto as_column_vector_or_scalar(T &&a)
as_column_vector_or_scalar of a kernel generator expression.
Definition as_column_vector_or_scalar.hpp:325

stan::math::categorical_logit_glm_lpmf
return_type_t< T_x, T_alpha, T_beta > categorical_logit_glm_lpmf(const T_y &y, const T_x &x, const T_alpha &alpha, const T_beta &beta)
Returns the log PMF of the Generalized Linear Model (GLM) with categorical distribution and logit (so...
Definition categorical_logit_glm_lpmf.hpp:49

stan::partials_type_t
typename partials_type< T >::type partials_type_t
Helper alias for accessing the partial type.
Definition partials_type.hpp:20

stan::return_type_t
typename return_type< Ts... >::type return_type_t
Convenience type for the return type of the specified template parameters.
Definition return_type.hpp:218

stan::math::size
int64_t size(const T &m)
Returns the size (number of the elements) of a matrix_cl or var_value<matrix_cl<T>>.
Definition size.hpp:19

isfinite.hpp

stan::math::size_zero
bool size_zero(const T &x)
Returns 1 if input is of length 0, returns 0 otherwise.
Definition size_zero.hpp:19

stan::math::check_bounded
void check_bounded(const char *function, const char *name, const T_y &y, const T_low &low, const T_high &high)
Check if the value is between the low and high values, inclusively.
Definition check_bounded.hpp:75

stan::math::check_consistent_size
void check_consistent_size(const char *function, const char *name, const T &x, size_t expected_size)
Check if x is consistent with size expected_size.
Definition check_consistent_size.hpp:25

stan::math::value_of
T value_of(const fvar< T > &v)
Return the value of the specified variable.
Definition value_of.hpp:18

stan::math::log
fvar< T > log(const fvar< T > &x)
Definition log.hpp:18

stan::math::check_finite
void check_finite(const char *function, const char *name, const T_y &y)
Return true if all values in y are finite.
Definition check_finite.hpp:28

stan::math::check_size_match
void check_size_match(const char *function, const char *name_i, T_size1 i, const char *name_j, T_size2 j)
Check if the provided sizes match.
Definition check_size_match.hpp:24

stan::math::beta
fvar< T > beta(const fvar< T > &x1, const fvar< T > &x2)
Return fvar with the beta function applied to the specified arguments and its gradient.
Definition beta.hpp:51

stan::math::make_partials_propagator
auto make_partials_propagator(Ops &&... ops)
Construct an partials_propagator.
Definition partials_propagator.hpp:119

stan::math::exp
fvar< T > exp(const fvar< T > &x)
Definition exp.hpp:15

stan::ref_type_if_not_constant_t
typename ref_type_if<!is_constant< T >::value, T >::type ref_type_if_not_constant_t
Definition ref_type.hpp:62

stan::ref_type_t
typename ref_type_if< true, T >::type ref_type_t
Definition ref_type.hpp:55

stan::partials_return_t
typename partials_return_type< Args... >::type partials_return_t
Definition partials_return_type.hpp:44

stan
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Definition unit_vector_constrain.hpp:15

err.hpp

as_array_or_scalar.hpp

as_column_vector_or_scalar.hpp

exp.hpp

log.hpp

size.hpp

value_of.hpp

partials_propagator.hpp

meta.hpp

scalar_seq_view.hpp

size_zero.hpp

stan::math::conjunction
Extends std::true_type when instantiated with zero or more template parameters, all of which extend t...
Definition conjunction.hpp:14

stan::math::include_summand
Template metaprogram to calculate whether a summand needs to be included in a proportional (log) prob...
Definition include_summand.hpp:39

to_ref.hpp