Automatic Differentiation
 
Loading...
Searching...
No Matches
dirichlet_lpdf.hpp
Go to the documentation of this file.
1#ifndef STAN_MATH_OPENCL_PRIM_DIRICHLET_LPDF_HPP
2#define STAN_MATH_OPENCL_PRIM_DIRICHLET_LPDF_HPP
3#ifdef STAN_OPENCL
4
15
16namespace stan {
17namespace math {
18
58template <bool propto, typename T_prob_cl, typename T_prior_size_cl,
60 T_prob_cl, T_prior_size_cl>* = nullptr,
61 require_any_not_stan_scalar_t<T_prob_cl, T_prior_size_cl>* = nullptr>
63 const T_prob_cl& theta, const T_prior_size_cl& alpha) {
64 static constexpr const char* function = "dirichlet_lpdf(OpenCL)";
65
66 check_consistent_sizes(function, "probabilities", theta, "prior sample sizes",
67 alpha);
68
69 if (max_size(theta, alpha) == 0) {
70 return 0.0;
71 }
73 return 0.0;
74 }
75
76 const auto& theta_val = value_of(theta);
77 const auto& alpha_val = value_of(alpha);
78
79 auto check_alpha_positive
80 = check_cl(function, "prior sample sizes", alpha_val, "positive");
81 auto alpha_positive = alpha_val > 0.0;
82 auto check_theta_nonnegative
83 = check_cl(function, "probabilities", theta_val, "nonnegative");
84 auto theta_nonnegative = theta_val >= 0.0;
85
86 auto theta_csum = colwise_sum(theta_val);
87 auto alpha_m_1 = rowwise_optional_broadcast(alpha_val - 1.0);
88 auto theta_log = rowwise_optional_broadcast(log(theta_val));
89
90 matrix_cl<double> theta_csum_cl;
91 matrix_cl<double> alpha_csum_cl;
92 matrix_cl<double> lgamma_alpha_csum_cl;
93 matrix_cl<double> theta_log_alpha_m_1_sum_cl;
94 matrix_cl<double> theta_deriv_cl;
95 matrix_cl<double> alpha_deriv_cl;
96 if (theta.cols() > alpha.cols()) {
97 auto alpha_csum = colwise_sum(alpha_val);
98 auto lgamma_alpha_csum = colwise_sum(lgamma(alpha_val));
99 matrix_cl<double> digamma_alpha_cl(alpha.rows(), alpha.cols());
100 results(check_alpha_positive, alpha_csum_cl, lgamma_alpha_csum_cl,
101 digamma_alpha_cl)
102 = expressions(
103 alpha_positive,
105 alpha_csum),
107 lgamma_alpha_csum),
109
110 auto theta_log_alpha_m_1_sum = sum_2d(elt_multiply(theta_log, alpha_m_1));
111
112 auto theta_deriv
113 = elt_divide(alpha_m_1, rowwise_optional_broadcast(theta_val));
114 auto alpha_deriv = theta_log - rowwise_optional_broadcast(digamma_alpha_cl);
115
116 results(check_theta_nonnegative, theta_csum_cl, theta_log_alpha_m_1_sum_cl,
117 theta_deriv_cl, alpha_deriv_cl)
118 = expressions(
119 theta_nonnegative, theta_csum,
121 theta_log_alpha_m_1_sum),
124
126 matrix_cl<double> alpha_csum_cl2;
127 matrix_cl<double> lgamma_alpha_csum_cl2;
128 while (alpha_csum_cl.rows() > 1) {
129 results(alpha_csum_cl2, lgamma_alpha_csum_cl2) = expressions(
131 colwise_sum(alpha_csum_cl)),
133 colwise_sum(lgamma_alpha_csum_cl)));
134 alpha_csum_cl = std::move(alpha_csum_cl2);
135 lgamma_alpha_csum_cl = std::move(lgamma_alpha_csum_cl2);
136 }
137 }
138 while (theta_csum_cl.rows() > 1) {
139 theta_csum_cl = colwise_sum(theta_csum_cl).eval();
140 }
141 } else {
142 auto alpha_csum = colwise_sum(alpha_val);
143 auto lgamma_alpha_csum = colwise_sum(lgamma(alpha_val));
144 if (alpha.cols() > theta.cols()) {
145 matrix_cl<double> log_theta_cl(theta.rows(), theta.cols());
146 results(check_theta_nonnegative, theta_csum_cl, log_theta_cl)
147 = expressions(
148 theta_nonnegative, theta_csum,
149 calc_if<
151 theta_log));
152
153 auto log_theta_bc = rowwise_optional_broadcast(log_theta_cl);
154 auto theta_log_alpha_m_1_sum
155 = sum_2d(elt_multiply(log_theta_bc, alpha_m_1));
156
157 auto theta_deriv
158 = elt_divide(alpha_m_1, rowwise_optional_broadcast(theta_val));
159 auto alpha_deriv
160 = log_theta_bc - rowwise_optional_broadcast(digamma(alpha_val));
161
162 results(check_alpha_positive, alpha_csum_cl, lgamma_alpha_csum_cl,
163 theta_log_alpha_m_1_sum_cl, theta_deriv_cl, alpha_deriv_cl)
164 = expressions(
165 alpha_positive,
167 alpha_csum),
169 lgamma_alpha_csum),
170 calc_if<
172 theta_log_alpha_m_1_sum),
175
176 while (alpha_csum_cl.rows() > 1) {
177 matrix_cl<double> alpha_csum_cl2;
178 matrix_cl<double> lgamma_alpha_csum_cl2;
179 results(alpha_csum_cl2, lgamma_alpha_csum_cl2) = expressions(
181 colwise_sum(alpha_csum_cl)),
183 colwise_sum(lgamma_alpha_csum_cl)));
185 alpha_csum_cl = std::move(alpha_csum_cl2);
186 lgamma_alpha_csum_cl = std::move(lgamma_alpha_csum_cl2);
187 }
188 }
189 double theta_sum = sum(from_matrix_cl(theta_csum));
190 check_cl(function, "sum of probabilities", theta_sum, "equal to 1")
191 = (fabs(theta_sum - 1.0) <= CONSTRAINT_TOLERANCE);
192 } else {
193 auto theta_log_alpha_m_1_sum = sum_2d(elt_multiply(theta_log, alpha_m_1));
194
195 auto theta_deriv
196 = elt_divide(alpha_m_1, rowwise_optional_broadcast(theta_val));
197 auto alpha_deriv
198 = theta_log - rowwise_optional_broadcast(digamma(alpha_val));
199
200 results(check_alpha_positive, check_theta_nonnegative, theta_csum_cl,
201 alpha_csum_cl, lgamma_alpha_csum_cl, theta_log_alpha_m_1_sum_cl,
202 theta_deriv_cl, alpha_deriv_cl)
203 = expressions(
204 alpha_positive, theta_nonnegative, theta_csum,
206 alpha_csum),
208 lgamma_alpha_csum),
209 calc_if<
211 theta_log_alpha_m_1_sum),
214
215 while (theta_csum_cl.rows() > 1) {
216 matrix_cl<double> theta_csum_cl2;
217 matrix_cl<double> alpha_csum_cl2;
218 matrix_cl<double> lgamma_alpha_csum_cl2;
219 results(theta_csum_cl2, alpha_csum_cl2, lgamma_alpha_csum_cl2)
220 = expressions(
221 colwise_sum(theta_csum_cl),
223 colwise_sum(alpha_csum_cl)),
225 colwise_sum(lgamma_alpha_csum_cl)));
226 theta_csum_cl = std::move(theta_csum_cl2);
228 alpha_csum_cl = std::move(alpha_csum_cl2);
229 lgamma_alpha_csum_cl = std::move(lgamma_alpha_csum_cl2);
230 }
231 }
232 }
233 }
234
235 if (theta.cols() >= alpha.cols()) {
236 // transpose is there just because working on col vectors is more efficient
237 // than on row vectors with kernel generator
238 check_cl(function, "sum of probabilities", transpose(theta_csum_cl),
239 "equal to 1")
240 = (fabs(transpose(theta_csum_cl) - 1.0) <= CONSTRAINT_TOLERANCE);
241 }
242
243 double lp = 0.0;
244
246 if (theta.cols() > alpha.cols()) {
247 lp += (lgamma(from_matrix_cl(alpha_csum_cl)) * theta.cols()
248 - from_matrix_cl(lgamma_alpha_csum_cl) * theta.cols())
249 .sum();
250 } else {
251 lp += (lgamma(from_matrix_cl(alpha_csum_cl))
252 - from_matrix_cl(lgamma_alpha_csum_cl))
253 .sum();
254 }
255 }
257 lp += from_matrix_cl(theta_log_alpha_m_1_sum_cl).sum();
258 }
259
260 auto ops_partials = make_partials_propagator(theta, alpha);
261
263 if (theta.cols() < alpha.cols()) {
264 partials<0>(ops_partials) = rowwise_sum(theta_deriv_cl);
265 } else {
266 partials<0>(ops_partials) = std::move(theta_deriv_cl);
267 }
268 }
270 if (theta.cols() > alpha.cols()) {
271 matrix_cl<double> tmp_cl
272 = digamma(alpha_csum_cl) * static_cast<double>(theta.cols());
273 partials<1>(ops_partials)
274 = colwise_broadcast(tmp_cl) + rowwise_sum(alpha_deriv_cl);
275 } else {
276 matrix_cl<double> tmp_cl = digamma(alpha_csum_cl);
277 partials<1>(ops_partials) = colwise_broadcast(tmp_cl) + alpha_deriv_cl;
278 }
279 }
280 return ops_partials.build(lp);
281}
282
283} // namespace math
284} // namespace stan
285#endif
286#endif
Represents an arithmetic matrix on the OpenCL device.
Definition matrix_cl.hpp:47
elt_multiply_< as_operation_cl_t< T_a >, as_operation_cl_t< T_b > > elt_multiply(T_a &&a, T_b &&b)
auto sum_2d(T &&a)
Two dimensional sum - reduction of a kernel generator expression.
auto check_cl(const char *function, const char *var_name, T &&y, const char *must_be)
Constructs a check on opencl matrix or expression.
Definition check_cl.hpp:219
results_cl< T_results... > results(T_results &&... results)
Deduces types for constructing results_cl object.
auto transpose(Arg &&a)
Transposes a kernel generator expression.
elt_divide_< as_operation_cl_t< T_a >, as_operation_cl_t< T_b > > elt_divide(T_a &&a, T_b &&b)
auto rowwise_sum(T &&a)
Rowwise sum reduction of a kernel generator expression.
calc_if_< true, as_operation_cl_t< T > > calc_if(T &&a)
Definition calc_if.hpp:121
auto colwise_broadcast(T &&a)
Broadcast an expression in colwise dimmension.
auto colwise_sum(T &&a)
Column wise sum - reduction of a kernel generator expression.
expressions_cl< T_expressions... > expressions(T_expressions &&... expressions)
Deduces types for constructing expressions_cl object.
return_type_t< T_prob_cl, T_prior_size_cl > dirichlet_lpdf(const T_prob_cl &theta, const T_prior_size_cl &alpha)
The log of the Dirichlet density for the given theta and a vector of prior sample sizes,...
auto from_matrix_cl(const T &src)
Copies the source matrix that is stored on the OpenCL device to the destination Eigen matrix.
Definition copy.hpp:61
require_all_t< is_prim_or_rev_kernel_expression< std::decay_t< Types > >... > require_all_prim_or_rev_kernel_expression_t
Require type satisfies is_prim_or_rev_kernel_expression.
typename return_type< Ts... >::type return_type_t
Convenience type for the return type of the specified template parameters.
T value_of(const fvar< T > &v)
Return the value of the specified variable.
Definition value_of.hpp:18
fvar< T > log(const fvar< T > &x)
Definition log.hpp:18
auto rowwise_optional_broadcast(T &&a)
Broadcast an expression in rowwise dimmension if the number of columns equals to 1.
void check_consistent_sizes(const char *)
Trivial no input case, this function is a no-op.
fvar< T > lgamma(const fvar< T > &x)
Return the natural logarithm of the gamma function applied to the specified argument.
Definition lgamma.hpp:21
auto sum(const std::vector< T > &m)
Return the sum of the entries of the specified standard vector.
Definition sum.hpp:23
int64_t max_size(const T1 &x1, const Ts &... xs)
Calculate the size of the largest input.
Definition max_size.hpp:20
const double CONSTRAINT_TOLERANCE
The tolerance for checking arithmetic bounds in rank and in simplexes.
auto make_partials_propagator(Ops &&... ops)
Construct an partials_propagator.
fvar< T > digamma(const fvar< T > &x)
Return the derivative of the log gamma function at the specified argument.
Definition digamma.hpp:23
fvar< T > fabs(const fvar< T > &x)
Definition fabs.hpp:16
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Metaprogramming struct to detect whether a given type is constant in the mathematical sense (not the ...
Template metaprogram to calculate whether a summand needs to be included in a proportional (log) prob...