Automatic Differentiation
 
Loading...
Searching...
No Matches
dirichlet_lpdf.hpp
Go to the documentation of this file.
1#ifndef STAN_MATH_OPENCL_PRIM_DIRICHLET_LPDF_HPP
2#define STAN_MATH_OPENCL_PRIM_DIRICHLET_LPDF_HPP
3#ifdef STAN_OPENCL
4
14
15namespace stan {
16namespace math {
17
57template <bool propto, typename T_prob_cl, typename T_prior_size_cl,
59 T_prob_cl, T_prior_size_cl>* = nullptr,
60 require_any_not_stan_scalar_t<T_prob_cl, T_prior_size_cl>* = nullptr>
62 const T_prob_cl& theta, const T_prior_size_cl& alpha) {
63 static constexpr const char* function = "dirichlet_lpdf(OpenCL)";
64
65 check_consistent_sizes(function, "probabilities", theta, "prior sample sizes",
66 alpha);
67
68 if (max_size(theta, alpha) == 0) {
69 return 0.0;
70 }
72 return 0.0;
73 }
74
75 const auto& theta_val = value_of(theta);
76 const auto& alpha_val = value_of(alpha);
77
78 auto check_alpha_positive
79 = check_cl(function, "prior sample sizes", alpha_val, "positive");
80 auto alpha_positive = alpha_val > 0.0;
81 auto check_theta_nonnegative
82 = check_cl(function, "probabilities", theta_val, "nonnegative");
83 auto theta_nonnegative = theta_val >= 0.0;
84
85 auto theta_csum = colwise_sum(theta_val);
86 auto alpha_m_1 = rowwise_optional_broadcast(alpha_val - 1.0);
87 auto theta_log = rowwise_optional_broadcast(log(theta_val));
88
89 matrix_cl<double> theta_csum_cl;
90 matrix_cl<double> alpha_csum_cl;
91 matrix_cl<double> lgamma_alpha_csum_cl;
92 matrix_cl<double> theta_log_alpha_m_1_sum_cl;
93 matrix_cl<double> theta_deriv_cl;
94 matrix_cl<double> alpha_deriv_cl;
95 if (theta.cols() > alpha.cols()) {
96 auto alpha_csum = colwise_sum(alpha_val);
97 auto lgamma_alpha_csum = colwise_sum(lgamma(alpha_val));
98 matrix_cl<double> digamma_alpha_cl(alpha.rows(), alpha.cols());
99 results(check_alpha_positive, alpha_csum_cl, lgamma_alpha_csum_cl,
100 digamma_alpha_cl)
101 = expressions(
102 alpha_positive,
104 alpha_csum),
106 lgamma_alpha_csum),
108
109 auto theta_log_alpha_m_1_sum = sum_2d(elt_multiply(theta_log, alpha_m_1));
110
111 auto theta_deriv
112 = elt_divide(alpha_m_1, rowwise_optional_broadcast(theta_val));
113 auto alpha_deriv = theta_log - rowwise_optional_broadcast(digamma_alpha_cl);
114
115 results(check_theta_nonnegative, theta_csum_cl, theta_log_alpha_m_1_sum_cl,
116 theta_deriv_cl, alpha_deriv_cl)
117 = expressions(
118 theta_nonnegative, theta_csum,
120 theta_log_alpha_m_1_sum),
123
125 matrix_cl<double> alpha_csum_cl2;
126 matrix_cl<double> lgamma_alpha_csum_cl2;
127 while (alpha_csum_cl.rows() > 1) {
128 results(alpha_csum_cl2, lgamma_alpha_csum_cl2) = expressions(
130 colwise_sum(alpha_csum_cl)),
132 colwise_sum(lgamma_alpha_csum_cl)));
133 alpha_csum_cl = std::move(alpha_csum_cl2);
134 lgamma_alpha_csum_cl = std::move(lgamma_alpha_csum_cl2);
135 }
136 }
137 while (theta_csum_cl.rows() > 1) {
138 theta_csum_cl = colwise_sum(theta_csum_cl).eval();
139 }
140 } else {
141 auto alpha_csum = colwise_sum(alpha_val);
142 auto lgamma_alpha_csum = colwise_sum(lgamma(alpha_val));
143 if (alpha.cols() > theta.cols()) {
144 matrix_cl<double> log_theta_cl(theta.rows(), theta.cols());
145 results(check_theta_nonnegative, theta_csum_cl, log_theta_cl)
146 = expressions(
147 theta_nonnegative, theta_csum,
148 calc_if<
150 theta_log));
151
152 auto log_theta_bc = rowwise_optional_broadcast(log_theta_cl);
153 auto theta_log_alpha_m_1_sum
154 = sum_2d(elt_multiply(log_theta_bc, alpha_m_1));
155
156 auto theta_deriv
157 = elt_divide(alpha_m_1, rowwise_optional_broadcast(theta_val));
158 auto alpha_deriv
159 = log_theta_bc - rowwise_optional_broadcast(digamma(alpha_val));
160
161 results(check_alpha_positive, alpha_csum_cl, lgamma_alpha_csum_cl,
162 theta_log_alpha_m_1_sum_cl, theta_deriv_cl, alpha_deriv_cl)
163 = expressions(
164 alpha_positive,
166 alpha_csum),
168 lgamma_alpha_csum),
169 calc_if<
171 theta_log_alpha_m_1_sum),
174
175 while (alpha_csum_cl.rows() > 1) {
176 matrix_cl<double> alpha_csum_cl2;
177 matrix_cl<double> lgamma_alpha_csum_cl2;
178 results(alpha_csum_cl2, lgamma_alpha_csum_cl2) = expressions(
180 colwise_sum(alpha_csum_cl)),
182 colwise_sum(lgamma_alpha_csum_cl)));
184 alpha_csum_cl = std::move(alpha_csum_cl2);
185 lgamma_alpha_csum_cl = std::move(lgamma_alpha_csum_cl2);
186 }
187 }
188 double theta_sum = sum(from_matrix_cl(theta_csum));
189 check_cl(function, "sum of probabilities", theta_sum, "equal to 1")
190 = (fabs(theta_sum - 1.0) <= CONSTRAINT_TOLERANCE);
191 } else {
192 auto theta_log_alpha_m_1_sum = sum_2d(elt_multiply(theta_log, alpha_m_1));
193
194 auto theta_deriv
195 = elt_divide(alpha_m_1, rowwise_optional_broadcast(theta_val));
196 auto alpha_deriv
197 = theta_log - rowwise_optional_broadcast(digamma(alpha_val));
198
199 results(check_alpha_positive, check_theta_nonnegative, theta_csum_cl,
200 alpha_csum_cl, lgamma_alpha_csum_cl, theta_log_alpha_m_1_sum_cl,
201 theta_deriv_cl, alpha_deriv_cl)
202 = expressions(
203 alpha_positive, theta_nonnegative, theta_csum,
205 alpha_csum),
207 lgamma_alpha_csum),
208 calc_if<
210 theta_log_alpha_m_1_sum),
213
214 while (theta_csum_cl.rows() > 1) {
215 matrix_cl<double> theta_csum_cl2;
216 matrix_cl<double> alpha_csum_cl2;
217 matrix_cl<double> lgamma_alpha_csum_cl2;
218 results(theta_csum_cl2, alpha_csum_cl2, lgamma_alpha_csum_cl2)
219 = expressions(
220 colwise_sum(theta_csum_cl),
222 colwise_sum(alpha_csum_cl)),
224 colwise_sum(lgamma_alpha_csum_cl)));
225 theta_csum_cl = std::move(theta_csum_cl2);
227 alpha_csum_cl = std::move(alpha_csum_cl2);
228 lgamma_alpha_csum_cl = std::move(lgamma_alpha_csum_cl2);
229 }
230 }
231 }
232 }
233
234 if (theta.cols() >= alpha.cols()) {
235 // transpose is there just because working on col vectors is more efficient
236 // than on row vectors with kernel generator
237 check_cl(function, "sum of probabilities", transpose(theta_csum_cl),
238 "equal to 1")
239 = (fabs(transpose(theta_csum_cl) - 1.0) <= CONSTRAINT_TOLERANCE);
240 }
241
242 double lp = 0.0;
243
245 if (theta.cols() > alpha.cols()) {
246 lp += (lgamma(from_matrix_cl(alpha_csum_cl)) * theta.cols()
247 - from_matrix_cl(lgamma_alpha_csum_cl) * theta.cols())
248 .sum();
249 } else {
250 lp += (lgamma(from_matrix_cl(alpha_csum_cl))
251 - from_matrix_cl(lgamma_alpha_csum_cl))
252 .sum();
253 }
254 }
256 lp += from_matrix_cl(theta_log_alpha_m_1_sum_cl).sum();
257 }
258
259 auto ops_partials = make_partials_propagator(theta, alpha);
260
262 if (theta.cols() < alpha.cols()) {
263 partials<0>(ops_partials) = rowwise_sum(theta_deriv_cl);
264 } else {
265 partials<0>(ops_partials) = std::move(theta_deriv_cl);
266 }
267 }
269 if (theta.cols() > alpha.cols()) {
270 matrix_cl<double> tmp_cl
271 = digamma(alpha_csum_cl) * static_cast<double>(theta.cols());
272 partials<1>(ops_partials)
273 = colwise_broadcast(tmp_cl) + rowwise_sum(alpha_deriv_cl);
274 } else {
275 matrix_cl<double> tmp_cl = digamma(alpha_csum_cl);
276 partials<1>(ops_partials) = colwise_broadcast(tmp_cl) + alpha_deriv_cl;
277 }
278 }
279 return ops_partials.build(lp);
280}
281
282} // namespace math
283} // namespace stan
284#endif
285#endif
Represents an arithmetic matrix on the OpenCL device.
Definition matrix_cl.hpp:47
elt_multiply_< as_operation_cl_t< T_a >, as_operation_cl_t< T_b > > elt_multiply(T_a &&a, T_b &&b)
auto sum_2d(T &&a)
Two dimensional sum - reduction of a kernel generator expression.
auto check_cl(const char *function, const char *var_name, T &&y, const char *must_be)
Constructs a check on opencl matrix or expression.
Definition check_cl.hpp:219
results_cl< T_results... > results(T_results &&... results)
Deduces types for constructing results_cl object.
auto transpose(Arg &&a)
Transposes a kernel generator expression.
elt_divide_< as_operation_cl_t< T_a >, as_operation_cl_t< T_b > > elt_divide(T_a &&a, T_b &&b)
auto rowwise_sum(T &&a)
Rowwise sum reduction of a kernel generator expression.
calc_if_< true, as_operation_cl_t< T > > calc_if(T &&a)
Definition calc_if.hpp:121
auto colwise_broadcast(T &&a)
Broadcast an expression in colwise dimmension.
auto colwise_sum(T &&a)
Column wise sum - reduction of a kernel generator expression.
expressions_cl< T_expressions... > expressions(T_expressions &&... expressions)
Deduces types for constructing expressions_cl object.
return_type_t< T_prob_cl, T_prior_size_cl > dirichlet_lpdf(const T_prob_cl &theta, const T_prior_size_cl &alpha)
The log of the Dirichlet density for the given theta and a vector of prior sample sizes,...
auto from_matrix_cl(const T &src)
Copies the source matrix that is stored on the OpenCL device to the destination Eigen matrix.
Definition copy.hpp:61
require_all_t< is_prim_or_rev_kernel_expression< std::decay_t< Types > >... > require_all_prim_or_rev_kernel_expression_t
Require type satisfies is_prim_or_rev_kernel_expression.
typename return_type< Ts... >::type return_type_t
Convenience type for the return type of the specified template parameters.
T value_of(const fvar< T > &v)
Return the value of the specified variable.
Definition value_of.hpp:18
fvar< T > log(const fvar< T > &x)
Definition log.hpp:15
auto rowwise_optional_broadcast(T &&a)
Broadcast an expression in rowwise dimmension if the number of columns equals to 1.
void check_consistent_sizes(const char *)
Trivial no input case, this function is a no-op.
fvar< T > lgamma(const fvar< T > &x)
Return the natural logarithm of the gamma function applied to the specified argument.
Definition lgamma.hpp:21
auto sum(const std::vector< T > &m)
Return the sum of the entries of the specified standard vector.
Definition sum.hpp:23
int64_t max_size(const T1 &x1, const Ts &... xs)
Calculate the size of the largest input.
Definition max_size.hpp:20
const double CONSTRAINT_TOLERANCE
The tolerance for checking arithmetic bounds in rank and in simplexes.
auto make_partials_propagator(Ops &&... ops)
Construct an partials_propagator.
fvar< T > digamma(const fvar< T > &x)
Return the derivative of the log gamma function at the specified argument.
Definition digamma.hpp:23
fvar< T > fabs(const fvar< T > &x)
Definition fabs.hpp:15
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Metaprogramming struct to detect whether a given type is constant in the mathematical sense (not the ...
Template metaprogram to calculate whether a summand needs to be included in a proportional (log) prob...