1#ifndef STAN_MATH_OPENCL_PRIM_NORMAL_LCDF_HPP
2#define STAN_MATH_OPENCL_PRIM_NORMAL_LCDF_HPP
17 double x2 = normal_lcdf_scaled_diff * normal_lcdf_scaled_diff;
18 double normal_lcdf_n = 0;
26 if (normal_lcdf_scaled_diff > 0.0) {
28 normal_lcdf_n =
log1p(-0.5 *
erfc(normal_lcdf_scaled_diff));
29 if (
isnan(normal_lcdf_n)) {
32 }
else if (normal_lcdf_scaled_diff > -20.0) {
34 normal_lcdf_n =
log(
erfc(-normal_lcdf_scaled_diff)) - M_LN2;
35 }
else if (10.0 *
log(fabs(normal_lcdf_scaled_diff)) <
log(DBL_MAX)) {
40 double x4 =
pow(normal_lcdf_scaled_diff, 4);
41 double x6 =
pow(normal_lcdf_scaled_diff, 6);
42 double x8 =
pow(normal_lcdf_scaled_diff, 8);
43 double x10 =
pow(normal_lcdf_scaled_diff, 10);
45 = 0.000658749161529837803157 + 0.0160837851487422766278 / x2
46 + 0.125781726111229246204 / x4 + 0.360344899949804439429 / x6
47 + 0.305326634961232344035 / x8 + 0.0163153871373020978498 / x10;
48 double temp_q = -0.00233520497626869185443 - 0.0605183413124413191178 / x2
49 - 0.527905102951428412248 / x4
50 - 1.87295284992346047209 / x6
51 - 2.56852019228982242072 / x8 - 1.0 / x10;
52 normal_lcdf_n = -M_LN2 +
log(0.5 * M_2_SQRTPI + (temp_p / temp_q) / x2)
53 -
log(-normal_lcdf_scaled_diff) - x2;
56 normal_lcdf_n = -INFINITY;
60 double normal_ldncdf = 0.0;
double t = 0.0;
double t2 = 0.0;
65 if (normal_lcdf_deriv_scaled_diff > 2.9) {
67 t = 1.0 / (1.0 + 0.3275911 * normal_lcdf_deriv_scaled_diff);
72 / (
exp(x2) - 0.254829592 + 0.284496736 * t - 1.421413741 * t2
73 + 1.453152027 * t2 * t - 1.061405429 * t4);
74 }
else if (normal_lcdf_deriv_scaled_diff > 2.5) {
79 t = normal_lcdf_deriv_scaled_diff - 2.7;
82 normal_ldncdf = 0.0003849882382 - 0.002079084702 * t + 0.005229340880 * t2
83 - 0.008029540137 * t2 * t + 0.008232190507 * t4
84 - 0.005692364250 * t4 * t + 0.002399496363 *
pow(t, 6);
85 }
else if (normal_lcdf_deriv_scaled_diff > 2.1) {
87 t = normal_lcdf_deriv_scaled_diff - 2.3;
90 normal_ldncdf = 0.002846135439 - 0.01310032351 * t + 0.02732189391 * t2
91 - 0.03326906904 * t2 * t + 0.02482478940 * t4
92 - 0.009883071924 * t4 * t - 0.0002771362254 *
pow(t, 6);
93 }
else if (normal_lcdf_deriv_scaled_diff > 1.5) {
95 t = normal_lcdf_deriv_scaled_diff - 1.85;
98 normal_ldncdf = 0.01849212058 - 0.06876280470 * t + 0.1099906382 * t2
99 - 0.09274533184 * t2 * t + 0.03543327418 * t4
100 + 0.005644855518 * t4 * t - 0.01111434424 *
pow(t, 6);
101 }
else if (normal_lcdf_deriv_scaled_diff > 0.8) {
103 t = normal_lcdf_deriv_scaled_diff - 1.15;
106 normal_ldncdf = 0.1585747034 - 0.3898677543 * t + 0.3515963775 * t2
107 - 0.09748053605 * t2 * t - 0.04347986191 * t4
108 + 0.02182506378 * t4 * t + 0.01074751427 *
pow(t, 6);
109 }
else if (normal_lcdf_deriv_scaled_diff > 0.1) {
111 t = normal_lcdf_deriv_scaled_diff - 0.45;
114 normal_ldncdf = 0.6245634904 - 0.9521866949 * t + 0.3986215682 * t2
115 + 0.04700850676 * t2 * t - 0.03478651979 * t4
116 - 0.01772675404 * t4 * t + 0.0006577254811 *
pow(t, 6);
117 }
else if (10.0 *
log(
fabs(normal_lcdf_deriv_scaled_diff)) <
log(DBL_MAX)) {
122 t = 1.0 / (1.0 - 0.3275911 * normal_lcdf_deriv_scaled_diff);
127 / (0.254829592 * t - 0.284496736 * t2 + 1.421413741 * t2 * t
128 - 1.453152027 * t4 + 1.061405429 * t4 * t);
131 if (normal_lcdf_deriv_scaled_diff < -29.0) {
132 normal_ldncdf += 0.0015065154280332 * x2
133 - 0.3993154819705530 * normal_lcdf_deriv_scaled_diff
134 - 4.2919418242931700;
135 }
else if (normal_lcdf_deriv_scaled_diff < -17.0) {
136 normal_ldncdf += 0.0001263257217272 * x2 * normal_lcdf_deriv_scaled_diff
137 + 0.0123586859488623 * x2
138 - 0.0860505264736028 * normal_lcdf_deriv_scaled_diff
140 }
else if (normal_lcdf_deriv_scaled_diff < -7.0) {
142 += 0.000471585349920831 * x2 * normal_lcdf_deriv_scaled_diff
143 + 0.0296839305424034 * x2
144 + 0.207402143352332 * normal_lcdf_deriv_scaled_diff
146 }
else if (normal_lcdf_deriv_scaled_diff < -3.9) {
148 += -0.0006972280656443 * x2 * normal_lcdf_deriv_scaled_diff
149 + 0.0068218494628567 * x2
150 + 0.0585761964460277 * normal_lcdf_deriv_scaled_diff
151 + 0.1034397670201370;
152 }
else if (normal_lcdf_deriv_scaled_diff < -2.1) {
154 += -0.0018742199480885 * x2 * normal_lcdf_deriv_scaled_diff
155 - 0.0097119598291202 * x2
156 - 0.0170137970924080 * normal_lcdf_deriv_scaled_diff
157 - 0.0100428567412041;
159 }
else { normal_ldncdf = INFINITY; });
176 typename T_y_cl,
typename T_loc_cl,
typename T_scale_cl,
178 T_scale_cl>* =
nullptr,
179 require_any_not_stan_scalar_t<T_y_cl, T_loc_cl, T_scale_cl>* =
nullptr>
181 const T_y_cl& y,
const T_loc_cl& mu,
const T_scale_cl& sigma) {
182 static constexpr const char* function =
"normal_lcdf(OpenCL)";
187 mu,
"Scale parameter", sigma);
188 const size_t N =
max_size(y, mu, sigma);
197 const auto& y_val =
value_of(y_col);
198 const auto& mu_val =
value_of(mu_col);
199 const auto& sigma_val =
value_of(sigma_col);
202 =
check_cl(function,
"Random variable", y_val,
"not NaN");
203 auto y_not_nan_expr = !isnan(y_val);
205 =
check_cl(function,
"Location parameter", mu_val,
"finite");
206 auto mu_finite_expr =
isfinite(mu_val);
207 auto check_sigma_positive
208 =
check_cl(function,
"Scale parameter", sigma_val,
"positive");
209 auto sigma_positive_expr = 0 < sigma_val;
213 auto sigma_sqrt2 = sigma_val *
SQRT_TWO;
215 auto lcdf_n = opencl_code<internal::opencl_normal_lcdf_impl>(
216 std::make_tuple(
"normal_lcdf_scaled_diff"), scaled_diff)
217 .template output<double>(
"normal_lcdf_n");
221 = opencl_code<internal::opencl_normal_lcdf_ldncdf_impl>(
222 std::make_tuple(
"normal_lcdf_deriv_scaled_diff"), scaled_diff)
223 .template output<double>(
"normal_ldncdf");
224 auto y_deriv =
elt_divide(ldncdf, sigma_sqrt2);
225 auto mu_deriv = -y_deriv;
233 results(check_y_not_nan, check_mu_finite, check_sigma_positive, lcdf_cl,
234 y_deriv_cl, mu_deriv_cl, sigma_deriv_cl)
235 =
expressions(y_not_nan_expr, mu_finite_expr, sigma_positive_expr,
245 partials<0>(ops_partials) = std::move(y_deriv_cl);
248 partials<1>(ops_partials) = std::move(mu_deriv_cl);
251 partials<2>(ops_partials) = std::move(sigma_deriv_cl);
253 return ops_partials.build(lcdf);
Represents an arithmetic matrix on the OpenCL device.
elt_multiply_< as_operation_cl_t< T_a >, as_operation_cl_t< T_b > > elt_multiply(T_a &&a, T_b &&b)
isfinite_< as_operation_cl_t< T > > isfinite(T &&a)
auto check_cl(const char *function, const char *var_name, T &&y, const char *must_be)
Constructs a check on opencl matrix or expression.
results_cl< T_results... > results(T_results &&... results)
Deduces types for constructing results_cl object.
auto as_column_vector_or_scalar(T &&a)
as_column_vector_or_scalar of a kernel generator expression.
elt_divide_< as_operation_cl_t< T_a >, as_operation_cl_t< T_b > > elt_divide(T_a &&a, T_b &&b)
calc_if_< true, as_operation_cl_t< T > > calc_if(T &&a)
auto colwise_sum(T &&a)
Column wise sum - reduction of a kernel generator expression.
expressions_cl< T_expressions... > expressions(T_expressions &&... expressions)
Deduces types for constructing expressions_cl object.
return_type_t< T_y_cl, T_loc_cl, T_scale_cl > normal_lcdf(const T_y_cl &y, const T_loc_cl &mu, const T_scale_cl &sigma)
Returns the normal log complementary cumulative distribution function for the given location,...
auto from_matrix_cl(const T &src)
Copies the source matrix that is stored on the OpenCL device to the destination Eigen matrix.
require_all_t< is_prim_or_rev_kernel_expression< std::decay_t< Types > >... > require_all_prim_or_rev_kernel_expression_t
Require type satisfies is_prim_or_rev_kernel_expression.
typename return_type< Ts... >::type return_type_t
Convenience type for the return type of the specified template parameters.
const char opencl_normal_lcdf_ldncdf_impl[]
const char opencl_normal_lcdf_impl[]
auto pow(const T1 &x1, const T2 &x2)
T value_of(const fvar< T > &v)
Return the value of the specified variable.
fvar< T > log(const fvar< T > &x)
static constexpr double SQRT_TWO
The value of the square root of 2, .
void check_consistent_sizes(const char *)
Trivial no input case, this function is a no-op.
fvar< T > erfc(const fvar< T > &x)
fvar< T > log1p(const fvar< T > &x)
auto sum(const std::vector< T > &m)
Return the sum of the entries of the specified standard vector.
int64_t max_size(const T1 &x1, const Ts &... xs)
Calculate the size of the largest input.
auto make_partials_propagator(Ops &&... ops)
Construct an partials_propagator.
fvar< T > fabs(const fvar< T > &x)
fvar< T > exp(const fvar< T > &x)
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
bool isnan(const stan::math::var &a)
Checks if the given number is NaN.
Metaprogramming struct to detect whether a given type is constant in the mathematical sense (not the ...