Loading [MathJax]/extensions/TeX/AMSsymbols.js
Automatic Differentiation
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Loading...
Searching...
No Matches
laplace_marginal_density.hpp
Go to the documentation of this file.
1#ifndef STAN_MATH_MIX_FUNCTOR_LAPLACE_MARGINAL_DENSITY_HPP
2#define STAN_MATH_MIX_FUNCTOR_LAPLACE_MARGINAL_DENSITY_HPP
13#include <unsupported/Eigen/MatrixFunctions>
14#include <cmath>
15#include <optional>
16
24namespace stan {
25namespace math {
26
31 /* Size of the blocks in block diagonal hessian*/
40 int solver{1};
41 /* Maximum number of steps in line search */
43 /* iterations end when difference in objective function is less than tolerance
44 */
45 double tolerance{1e-6};
46 /* Maximum number of steps*/
47 int max_num_steps{100};
48};
49
50template <bool HasInitTheta>
52
53template <>
54struct laplace_options<false> : public laplace_options_base {};
55
56template <>
58 /* Value for user supplied initial theta */
59 Eigen::VectorXd theta_0{0};
60};
61
64namespace internal {
65
66template <typename Covar, typename ThetaVec, typename WR, typename L_t,
67 typename A_vec, typename ThetaGrad, typename LU_t, typename KRoot>
69 /* log marginal density */
70 double lmd{std::numeric_limits<double>::infinity()};
71 /* Evaluated covariance function for the latent gaussian variable */
73 /* ThetaVec at the mode */
74 ThetaVec theta;
75 /* negative hessian or sqrt of negative hessian */
76 WR W_r;
77 /* Lower left of cholesky decomposition of stabilized inverse covariance */
78 L_t L;
79 /* inverse covariance times theta at the mode */
80 A_vec a;
81 /* the gradient of the log density with respect to theta */
82 ThetaGrad theta_grad;
83 /* LU matrix from solver 3 */
84 LU_t LU;
85 /* Cholesky of the covariance matrix */
86 KRoot K_root;
87 laplace_density_estimates(double lmd_, Covar&& covariance_, ThetaVec&& theta_,
88 WR&& W_r_, L_t&& L_, A_vec&& a_,
89 ThetaGrad&& theta_grad_, LU_t&& LU_,
90 KRoot&& K_root_)
91 : lmd(lmd_),
92 covariance(std::move(covariance_)),
93 theta(std::move(theta_)),
94 W_r(std::move(W_r_)),
95 L(std::move(L_)),
96 a(std::move(a_)),
97 theta_grad(std::move(theta_grad_)),
98 LU(std::move(LU_)),
99 K_root(std::move(K_root_)) {}
100};
101
109template <typename WRootMat>
110inline void block_matrix_sqrt(WRootMat& W_root,
111 const Eigen::SparseMatrix<double>& W,
112 const Eigen::Index block_size) {
113 int n_block = W.cols() / block_size;
114 Eigen::MatrixXd local_block(block_size, block_size);
115 Eigen::MatrixXd local_block_sqrt(block_size, block_size);
116 Eigen::MatrixXd sqrt_t_mat = Eigen::MatrixXd::Zero(block_size, block_size);
117 // No block operation available for sparse matrices, so we have to loop
118 // See https://eigen.tuxfamily.org/dox/group__TutorialSparse.html#title7
119 for (int i = 0; i < n_block; i++) {
120 sqrt_t_mat.setZero();
121 local_block
122 = W.block(i * block_size, i * block_size, block_size, block_size);
123 if (Eigen::isnan(local_block.array()).any()) {
124 throw std::domain_error(
125 std::string("Error in block_matrix_sqrt: "
126 "NaNs detected in block diagonal starting at (")
127 + std::to_string(i) + ", " + std::to_string(i) + ")");
128 }
129 // Issue here, sqrt is done over T of the complex schur
130 Eigen::RealSchur<Eigen::MatrixXd> schurOfA(local_block);
131 // Compute Schur decomposition of arg
132 const auto& t_mat = schurOfA.matrixT();
133 const auto& u_mat = schurOfA.matrixU();
134 // Check if diagonal of schur is not positive
135 if ((t_mat.diagonal().array() < 0).any()) {
136 throw std::domain_error(
137 std::string("Error in block_matrix_sqrt: "
138 "values less than 0 detected in block diagonal's schur "
139 "decomposition starting at (")
140 + std::to_string(i) + ", " + std::to_string(i) + ")");
141 }
142 try {
143 // Compute square root of T
144 Eigen::matrix_sqrt_quasi_triangular(t_mat, sqrt_t_mat);
145 // Compute square root of arg
146 local_block_sqrt = u_mat * sqrt_t_mat * u_mat.adjoint();
147 } catch (const std::exception& e) {
148 throw std::domain_error(
149 "Error in block_matrix_sqrt: "
150 "The matrix is not positive definite");
151 }
152 for (int k = 0; k < block_size; k++) {
153 for (int j = 0; j < block_size; j++) {
154 W_root.coeffRef(i * block_size + j, i * block_size + k)
155 = local_block_sqrt(j, k);
156 }
157 }
158 }
159}
160
168template <typename WRootMat>
169inline void block_matrix_chol_L(WRootMat& W_root,
170 const Eigen::SparseMatrix<double>& W,
171 const Eigen::Index block_size) {
172 int n_block = W.cols() / block_size;
173 Eigen::MatrixXd local_block(block_size, block_size);
174 Eigen::MatrixXd local_block_sqrt(block_size, block_size);
175 Eigen::MatrixXd sqrt_t_mat = Eigen::MatrixXd::Zero(block_size, block_size);
176 // No block operation available for sparse matrices, so we have to loop
177 // See https://eigen.tuxfamily.org/dox/group__TutorialSparse.html#title7
178 for (int i = 0; i < n_block; i++) {
179 sqrt_t_mat.setZero();
180 local_block
181 = W.block(i * block_size, i * block_size, block_size, block_size);
182 if (Eigen::isnan(local_block.array()).any()) {
183 throw std::domain_error(
184 std::string("Error in block_matrix_sqrt: "
185 "NaNs detected in block diagonal starting at (")
186 + std::to_string(i) + ", " + std::to_string(i) + ")");
187 }
188 try {
189 // Compute square root of T
190 Eigen::LLT<Eigen::Ref<Eigen::MatrixXd>> llt(local_block);
191 if (llt.info() != Eigen::Success) {
192 throw std::runtime_error("Cholesky failed on block "
193 + std::to_string(i));
194 }
195 const auto Lb = llt.matrixL();
196 for (int k = 0; k < block_size; k++) {
197 for (int j = k; j < block_size; j++) {
198 W_root.coeffRef(i * block_size + j, i * block_size + k) = Lb(j, k);
199 }
200 }
201 } catch (const std::exception& e) {
202 // As a backup do the schur decomposition for this block diagonal
203 local_block
204 = W.block(i * block_size, i * block_size, block_size, block_size);
205 // Issue here, sqrt is done over T of the complex schur
206 Eigen::RealSchur<Eigen::MatrixXd> schurOfA(local_block);
207 // Compute Schur decomposition of arg
208 const auto& t_mat = schurOfA.matrixT();
209 const auto& u_mat = schurOfA.matrixU();
210 // Check if diagonal of schur is not positive
211 if ((t_mat.diagonal().array() < 0).any()) {
212 throw std::domain_error(
213 std::string("Error in block_matrix_sqrt: "
214 "values less than 0 detected in block diagonal's schur "
215 "decomposition starting at (")
216 + std::to_string(i) + ", " + std::to_string(i) + ")");
217 }
218 try {
219 // Compute square root of T
220 Eigen::matrix_sqrt_quasi_triangular(t_mat, sqrt_t_mat);
221 // Compute square root of arg
222 local_block_sqrt.noalias() = u_mat * sqrt_t_mat * u_mat.adjoint();
223 } catch (const std::exception& e) {
224 throw std::domain_error(
225 "Error in block_matrix_sqrt: "
226 "The matrix is not positive definite");
227 }
228 for (int k = 0; k < block_size; k++) {
229 for (int j = 0; j < block_size; j++) {
230 W_root.coeffRef(i * block_size + j, i * block_size + k)
231 = local_block_sqrt(j, k);
232 }
233 }
234 throw std::domain_error(
235 "Error in block_matrix_sqrt: "
236 "The matrix is not positive definite");
237 }
238 }
239}
240
273template <typename AVec, typename APrev, typename ThetaVec, typename LLFun,
274 typename LLArgs, typename Covar, typename Msgs>
275inline void line_search(double& objective_new, AVec& a, ThetaVec& theta,
276 APrev& a_prev, LLFun&& ll_fun, LLArgs&& ll_args,
277 Covar&& covariance, const int max_steps_line_search,
278 const double objective_old, double tolerance,
279 Msgs* msgs) {
280 Eigen::VectorXd a_tmp(a.size());
281 double objective_new_tmp = 0.0;
282 double objective_old_tmp = objective_old;
283 Eigen::VectorXd theta_tmp(covariance.rows());
284 for (int j = 0;
285 j < max_steps_line_search && (objective_new < objective_old_tmp); ++j) {
286 a_tmp.noalias() = a_prev + 0.5 * (a - a_prev);
287 theta_tmp.noalias() = covariance * a_tmp;
288 if (!theta_tmp.allFinite()) {
289 break;
290 } else {
291 objective_new_tmp = -0.5 * a_tmp.dot(theta_tmp)
293 ll_fun, theta_tmp, ll_args, msgs);
294 if (objective_new_tmp < objective_new) {
295 a_prev.swap(a);
296 a.swap(a_tmp);
297 theta.swap(theta_tmp);
298 objective_old_tmp = objective_new;
299 objective_new = objective_new_tmp;
300 } else {
301 break;
302 }
303 }
304 }
305}
306
310template <typename Output>
311inline void set_zero_adjoint(Output&& output) {
312 if constexpr (is_all_arithmetic_scalar_v<Output>) {
313 return;
314 } else {
315 return iter_tuple_nested(
316 [](auto&& output_i) {
317 using output_i_t = std::decay_t<decltype(output_i)>;
318 if constexpr (is_all_arithmetic_scalar_v<output_i_t>) {
319 return;
320 } else if constexpr (is_std_vector<output_i_t>::value) {
321 for (Eigen::Index i = 0; i < output_i.size(); ++i) {
322 output_i[i].adj() = 0;
323 }
324 } else if constexpr (is_eigen_v<output_i_t>) {
325 output_i.adj().setZero();
326 } else if constexpr (is_stan_scalar_v<output_i_t>) {
327 output_i.adj() = 0;
328 } else {
329 static_assert(
330 sizeof(std::decay_t<output_i_t>*) == 0,
331 "INTERNAL ERROR:(laplace_marginal_lpdf) set_zero_adjoints was "
332 "not able to deduce the actions needed for the given type. "
333 "This is an internal error, please report it: "
334 "https://github.com/stan-dev/math/issues");
335 }
336 },
337 std::forward<Output>(output));
338 }
339}
340
349template <bool ZeroInput = false, typename Output, typename Input,
352inline void collect_adjoints(Output& output, Input&& input) {
353 return iter_tuple_nested(
354 [](auto&& output_i, auto&& input_i) {
355 using output_i_t = std::decay_t<decltype(output_i)>;
356 if constexpr (is_std_vector_v<output_i_t>) {
357 Eigen::Map<Eigen::Matrix<double, -1, 1>> output_map(output_i.data(),
358 output_i.size());
359 Eigen::Map<Eigen::Matrix<var, -1, 1>> input_map(input_i.data(),
360 input_i.size());
361 output_map.array() += input_map.adj().array();
362 if constexpr (ZeroInput) {
363 input_map.adj().setZero();
364 }
365 } else if constexpr (is_eigen_v<output_i_t>) {
366 output_i.array() += input_i.adj().array();
367 if constexpr (ZeroInput) {
368 input_i.adj().setZero();
369 }
370 } else if constexpr (is_stan_scalar_v<output_i_t>) {
371 output_i += input_i.adj();
372 if constexpr (ZeroInput) {
373 input_i.adj() = 0;
374 }
375 } else {
376 static_assert(
377 sizeof(std::decay_t<output_i_t>*) == 0,
378 "INTERNAL ERROR:(laplace_marginal_lpdf) set_zero_adjoints was "
379 "not able to deduce the actions needed for the given type. "
380 "This is an internal error, please report it: "
381 "https://github.com/stan-dev/math/issues");
382 }
383 },
384 std::forward<Output>(output), std::forward<Input>(input));
385}
386
396template <typename NameStr, typename ParamStr, typename Param>
397inline STAN_COLD_PATH void throw_nan(NameStr&& name_str, ParamStr&& param_str,
398 Param&& param) {
399 std::string msg = std::string("Error in ") + name_str + ": "
400 + std::string(param_str) + " contains NaN values";
401 if ((Eigen::isnan(param.array()) || Eigen::isinf(param.array())).all()) {
402 msg += " for all values.";
403 throw std::domain_error(msg);
404 }
405 msg += " at indices [";
406 for (int i = 0; i < param.size(); ++i) {
407 if (std::isnan(param(i) || std::isinf(param(i)))) {
408 msg += std::to_string(i) + ", ";
409 }
410 }
411 msg.pop_back();
412 msg.pop_back();
413 msg += "].";
414 throw std::domain_error(msg);
415}
416
465template <typename LLFun, typename LLTupleArgs, typename CovarFun,
466 typename CovarArgs, bool InitTheta,
469 LLFun&& ll_fun, LLTupleArgs&& ll_args, CovarFun&& covariance_function,
470 CovarArgs&& covar_args, const laplace_options<InitTheta>& options,
471 std::ostream* msgs) {
472 using Eigen::MatrixXd;
473 using Eigen::SparseMatrix;
474 using Eigen::VectorXd;
475 if constexpr (InitTheta) {
476 check_nonzero_size("laplace_marginal", "initial guess", options.theta_0);
477 check_finite("laplace_marginal", "initial guess", options.theta_0);
478 }
479 check_nonnegative("laplace_marginal", "tolerance", options.tolerance);
480 check_positive("laplace_marginal", "max_num_steps", options.max_num_steps);
481 check_positive("laplace_marginal", "hessian_block_size",
482 options.hessian_block_size);
483 check_nonnegative("laplace_marginal", "max_steps_line_search",
484 options.max_steps_line_search);
485
486 Eigen::MatrixXd covariance = stan::math::apply(
487 [msgs, &covariance_function](auto&&... args) {
488 return covariance_function(args..., msgs);
489 },
490 covar_args);
491 check_square("laplace_marginal", "covariance", covariance);
492
493 const Eigen::Index theta_size = covariance.rows();
494
495 if (unlikely(theta_size % options.hessian_block_size != 0)) {
496 [&]() STAN_COLD_PATH {
497 std::stringstream msg;
498 msg << "laplace_marginal_density: The hessian size (" << theta_size
499 << ", " << theta_size
500 << ") is not divisible by the hessian block size ("
501 << options.hessian_block_size
502 << ")"
503 ". Try a hessian block size such as [1, ";
504 for (int i = 2; i < 12; ++i) {
505 if (theta_size % i == 0) {
506 msg << i << ", ";
507 }
508 }
509 msg.str().pop_back();
510 msg.str().pop_back();
511 msg << "].";
512 throw std::domain_error(msg.str());
513 }();
514 }
515
516 auto throw_overstep = [](const auto max_num_steps) STAN_COLD_PATH {
517 throw std::domain_error(
518 std::string("laplace_marginal_density: max number of iterations: ")
519 + std::to_string(max_num_steps) + " exceeded.");
520 };
521 auto ll_args_vals = value_of(ll_args);
522 Eigen::VectorXd theta = [theta_size, &options]() {
523 if constexpr (InitTheta) {
524 return options.theta_0;
525 } else {
526 return Eigen::VectorXd::Zero(theta_size);
527 }
528 }();
529 double objective_old = std::numeric_limits<double>::lowest();
530 double objective_new = std::numeric_limits<double>::lowest() + 1;
531 Eigen::VectorXd a_prev = Eigen::VectorXd::Zero(theta_size);
532 Eigen::MatrixXd B(theta_size, theta_size);
533 Eigen::VectorXd a(theta_size);
534 Eigen::VectorXd b(theta_size);
535 if (options.solver == 1) {
536 if (options.hessian_block_size == 1) {
537 for (Eigen::Index i = 0; i <= options.max_num_steps; i++) {
538 auto [theta_grad, W] = laplace_likelihood::diff(
539 ll_fun, theta, options.hessian_block_size, ll_args, msgs);
540 Eigen::VectorXd W_r(W.rows());
541 // Compute matrix square-root of W. If all elements of W are positive,
542 // do an element wise square-root. Else try a matrix square-root
543 for (Eigen::Index i = 0; i < W.rows(); i++) {
544 if (W.coeff(i, i) < 0) {
545 throw std::domain_error(
546 "laplace_marginal_density: Hessian matrix is not positive "
547 "definite");
548 } else {
549 W_r.coeffRef(i) = std::sqrt(W.coeff(i, i));
550 }
551 }
552 B.noalias() = MatrixXd::Identity(theta_size, theta_size)
553 + W_r.asDiagonal() * covariance * W_r.asDiagonal();
554 Eigen::LLT<Eigen::Ref<Eigen::MatrixXd>> llt_B(B);
555 auto L = llt_B.matrixL();
556 auto LT = llt_B.matrixU();
557 b.noalias() = W.diagonal().cwiseProduct(theta) + theta_grad;
558 a.noalias()
559 = b
560 - W_r.asDiagonal()
561 * LT.solve(L.solve(W_r.cwiseProduct(covariance * b)));
562 // Simple Newton step
563 theta.noalias() = covariance * a;
564 objective_old = objective_new;
565 if (unlikely(
566 (Eigen::isinf(theta.array()) || Eigen::isnan(theta.array()))
567 .any())) {
568 throw_nan("laplace_marginal_density", "theta", theta);
569 }
570 objective_new = -0.5 * a.dot(theta)
572 ll_fun, theta, ll_args_vals, msgs);
573 if (options.max_steps_line_search) {
574 line_search(objective_new, a, theta, a_prev, ll_fun, ll_args_vals,
575 covariance, options.max_steps_line_search, objective_old,
576 options.tolerance, msgs);
577 }
578 // Check for convergence
579 if (abs(objective_new - objective_old) < options.tolerance) {
580 const double B_log_determinant
581 = 2.0 * llt_B.matrixLLT().diagonal().array().log().sum();
582 // Overwrite W instead of making a new sparse matrix
583 W.diagonal() = W_r;
585 objective_new - 0.5 * B_log_determinant,
586 std::move(covariance),
587 std::move(theta),
588 std::move(W),
589 Eigen::MatrixXd(L),
590 std::move(a),
591 std::move(theta_grad),
592 Eigen::PartialPivLU<Eigen::MatrixXd>{},
593 Eigen::MatrixXd(0, 0)};
594 } else {
595 a_prev = std::move(a);
596 set_zero_adjoint(ll_args);
597 }
598 }
599 } else {
600 Eigen::SparseMatrix<double> W_r(theta_size, theta_size);
601 Eigen::Index block_size = options.hessian_block_size;
602 W_r.reserve(Eigen::VectorXi::Constant(W_r.cols(), block_size));
603 const Eigen::Index n_block = W_r.cols() / block_size;
604 // Prefill W_r so we only make space once
605 for (Eigen::Index i = 0; i < n_block; i++) {
606 for (Eigen::Index k = 0; k < block_size; k++) {
607 for (Eigen::Index j = 0; j < block_size; j++) {
608 W_r.insert(i * block_size + j, i * block_size + k) = 1.0;
609 }
610 }
611 }
612 W_r.makeCompressed();
613 for (Eigen::Index i = 0; i <= options.max_num_steps; i++) {
614 auto [theta_grad, W] = laplace_likelihood::diff(
615 ll_fun, theta, options.hessian_block_size, ll_args, msgs);
616 for (Eigen::Index i = 0; i < W.rows(); i++) {
617 if (W.coeff(i, i) < 0) {
618 throw std::domain_error(
619 "laplace_marginal_density: Hessian matrix is not positive "
620 "definite");
621 }
622 }
623 block_matrix_chol_L(W_r, W, options.hessian_block_size);
624 B.noalias() = MatrixXd::Identity(theta_size, theta_size)
625 + W_r * (covariance * W_r);
626 Eigen::LLT<Eigen::Ref<Eigen::MatrixXd>> llt_B(B);
627 auto L = llt_B.matrixL();
628 auto LT = llt_B.matrixU();
629 b.noalias() = W * theta + theta_grad;
630 a.noalias() = b - W_r * LT.solve(L.solve(W_r * (covariance * b)));
631 // Simple Newton step
632 theta.noalias() = covariance * a;
633 objective_old = objective_new;
634 if (unlikely(
635 (Eigen::isinf(theta.array()) || Eigen::isnan(theta.array()))
636 .any())) {
637 throw_nan("laplace_marginal_density", "theta", theta);
638 }
639 objective_new = -0.5 * a.dot(value_of(theta))
641 ll_fun, value_of(theta), ll_args_vals, msgs);
642 if (options.max_steps_line_search > 0) {
643 line_search(objective_new, a, theta, a_prev, ll_fun, ll_args_vals,
644 covariance, options.max_steps_line_search, objective_old,
645 options.tolerance, msgs);
646 }
647 // Check for convergence
648 if (abs(objective_new - objective_old) < options.tolerance) {
649 const double B_log_determinant
650 = 2.0 * llt_B.matrixLLT().diagonal().array().log().sum();
652 objective_new - 0.5 * B_log_determinant,
653 std::move(covariance),
654 std::move(theta),
655 std::move(W_r),
656 Eigen::MatrixXd(L),
657 std::move(a),
658 std::move(theta_grad),
659 Eigen::PartialPivLU<Eigen::MatrixXd>{},
660 Eigen::MatrixXd(0, 0)};
661 } else {
662 a_prev = a;
663 set_zero_adjoint(ll_args);
664 }
665 }
666 }
667 throw_overstep(options.max_num_steps);
668 } else if (options.solver == 2) {
669 Eigen::MatrixXd K_root
670 = covariance.template selfadjointView<Eigen::Lower>().llt().matrixL();
671 for (Eigen::Index i = 0; i <= options.max_num_steps; i++) {
672 auto [theta_grad, W] = laplace_likelihood::diff(
673 ll_fun, theta, options.hessian_block_size, ll_args, msgs);
674 B.noalias() = MatrixXd::Identity(theta_size, theta_size)
675 + K_root.transpose() * W * K_root;
676 Eigen::LLT<Eigen::Ref<Eigen::MatrixXd>> llt_B(B);
677 auto L = llt_B.matrixL();
678 auto LT = llt_B.matrixU();
679 b.noalias() = W * theta + theta_grad;
680 a.noalias()
681 = K_root.transpose().template triangularView<Eigen::Upper>().solve(
682 LT.solve(L.solve(K_root.transpose() * b)));
683 // Simple Newton step
684 theta.noalias() = covariance * a;
685 objective_old = objective_new;
686 if (unlikely((Eigen::isinf(theta.array()) || Eigen::isnan(theta.array()))
687 .any())) {
688 throw_nan("laplace_marginal_density", "theta", theta);
689 }
690 objective_new = -0.5 * a.dot(theta)
692 ll_args_vals, msgs);
693 // linesearch
694 if (options.max_steps_line_search > 0) {
695 line_search(objective_new, a, theta, a_prev, ll_fun, ll_args_vals,
696 covariance, options.max_steps_line_search, objective_old,
697 options.tolerance, msgs);
698 }
699 // Check for convergence
700 if (abs(objective_new - objective_old) < options.tolerance) {
701 const double B_log_determinant
702 = 2.0 * llt_B.matrixLLT().diagonal().array().log().sum();
704 objective_new - 0.5 * B_log_determinant,
705 std::move(covariance),
706 std::move(theta),
707 std::move(W),
708 std::move(Eigen::MatrixXd(L)),
709 std::move(a),
710 std::move(theta_grad),
711 Eigen::PartialPivLU<Eigen::MatrixXd>{},
712 std::move(K_root)};
713 } else {
714 a_prev = a;
715 set_zero_adjoint(ll_args);
716 }
717 }
718 throw_overstep(options.max_num_steps);
719 } else if (options.solver == 3) {
720 for (Eigen::Index i = 0; i <= options.max_num_steps; i++) {
721 auto [theta_grad, W] = laplace_likelihood::diff(
722 ll_fun, theta, options.hessian_block_size, ll_args, msgs);
723 Eigen::PartialPivLU<Eigen::MatrixXd> LU(
724 MatrixXd::Identity(theta_size, theta_size) + covariance * W);
725 // L on upper and U on lower triangular
726 b.noalias() = W * theta + theta_grad;
727 a.noalias() = b - W * LU.solve(covariance * b);
728 // Simple Newton step
729 theta.noalias() = covariance * a;
730 objective_old = objective_new;
731 if (((Eigen::isinf(theta.array()) || Eigen::isnan(theta.array()))
732 .any())) {
733 throw_nan("laplace_marginal_density", "theta", theta);
734 }
735 objective_new = -0.5 * a.dot(value_of(theta))
737 ll_fun, value_of(theta), ll_args_vals, msgs);
738
739 if (options.max_steps_line_search > 0) {
740 line_search(objective_new, a, theta, a_prev, ll_fun, ll_args_vals,
741 covariance, options.max_steps_line_search, objective_old,
742 options.tolerance, msgs);
743 }
744 if (abs(objective_new - objective_old) < options.tolerance) {
745 // TODO(Charles): There has to be a simple trick for this
746 const double B_log_determinant = log(LU.determinant());
748 objective_new - 0.5 * B_log_determinant,
749 std::move(covariance),
750 std::move(theta),
751 std::move(W),
752 Eigen::MatrixXd(0, 0),
753 std::move(a),
754 std::move(theta_grad),
755 std::move(LU),
756 Eigen::MatrixXd(0, 0)};
757 } else {
758 a_prev = a;
759 set_zero_adjoint(ll_args);
760 }
761 }
762 throw_overstep(options.max_num_steps);
763 }
764 throw std::domain_error(
765 std::string("You chose a solver (") + std::to_string(options.solver)
766 + ") that is not valid. Please choose either 1, 2, or 3.");
767}
768} // namespace internal
796template <
797 typename LLFun, typename LLTupleArgs, typename CovarFun, typename CovarArgs,
798 bool InitTheta,
801 LLFun&& ll_fun, LLTupleArgs&& ll_args, CovarFun&& covariance_function,
802 CovarArgs&& covar_args, const laplace_options<InitTheta>& options,
803 std::ostream* msgs) {
805 std::forward<LLFun>(ll_fun), std::forward<LLTupleArgs>(ll_args),
806 std::forward<CovarFun>(covariance_function),
807 std::forward<CovarArgs>(covar_args), options, msgs)
808 .lmd;
809}
810
811namespace internal {
812
820template <typename Output, typename Input,
823inline void collect_adjoints(Output&& output, Input&& input) {
824 return iter_tuple_nested(
825 [](auto&& output_i, auto&& input_i) {
826 using output_i_t = std::decay_t<decltype(output_i)>;
827 if constexpr (is_std_vector_v<output_i_t>) {
828 Eigen::Map<Eigen::Matrix<double, -1, 1>> output_map(output_i.data(),
829 output_i.size());
830 Eigen::Map<Eigen::Matrix<double, -1, 1>> input_map(input_i.data(),
831 input_i.size());
832 output_map.array() += input_map.array();
833 } else if constexpr (is_eigen_v<output_i_t>) {
834 output_i.array() += input_i.array();
835 } else if constexpr (is_stan_scalar_v<output_i_t>) {
836 output_i += input_i;
837 } else {
838 static_assert(
839 sizeof(std::decay_t<output_i_t>*) == 0,
840 "INTERNAL ERROR:(laplace_marginal_lpdf) set_zero_adjoints was "
841 "not able to deduce the actions needed for the given type. "
842 "This is an internal error, please report it: "
843 "https://github.com/stan-dev/math/issues");
844 }
845 },
846 std::forward<Output>(output), std::forward<Input>(input));
847}
851template <bool ZeroInput = false>
852inline void constexpr copy_compute_s2(const std::tuple<>& output,
853 const std::tuple<>& input) noexcept {}
854
863template <bool ZeroInput = false, typename Output, typename Input,
866inline void copy_compute_s2(Output&& output, Input&& input) {
867 return iter_tuple_nested(
868 [](auto&& output_i, auto&& input_i) {
869 using output_i_t = std::decay_t<decltype(output_i)>;
870 if constexpr (is_std_vector_v<output_i_t>) {
871 Eigen::Map<Eigen::Matrix<double, -1, 1>> output_map(output_i.data(),
872 output_i.size());
873 Eigen::Map<Eigen::Matrix<var, -1, 1>> input_map(input_i.data(),
874 input_i.size());
875 output_map.array() += 0.5 * input_map.adj().array();
876 if constexpr (ZeroInput) {
877 input_map.adj().setZero();
878 }
879 } else if constexpr (is_eigen_v<output_i_t>) {
880 output_i.array() += 0.5 * input_i.adj().array();
881 if constexpr (ZeroInput) {
882 input_i.adj().setZero();
883 }
884 } else if constexpr (is_stan_scalar_v<output_i_t>) {
885 output_i += (0.5 * input_i.adj());
886 if constexpr (ZeroInput) {
887 input_i.adj() = 0;
888 }
889 } else {
890 static_assert(
891 sizeof(std::decay_t<output_i_t>*) == 0,
892 "INTERNAL ERROR:(laplace_marginal_lpdf) set_zero_adjoints was "
893 "not able to deduce the actions needed for the given type. "
894 "This is an internal error, please report it: "
895 "https://github.com/stan-dev/math/issues");
896 }
897 },
898 std::forward<Output>(output), std::forward<Input>(input));
899}
900
901template <typename T>
902inline constexpr decltype(auto) filter_var_scalar_types(T&& t) {
903 return stan::math::filter_map<is_any_var_scalar>(
904 [](auto&& arg) -> decltype(auto) {
905 return std::forward<decltype(arg)>(arg);
906 },
907 std::forward<T>(t));
908}
914template <typename Input>
915inline constexpr auto make_zeroed_arena(Input&& input) {
916 if constexpr (is_tuple_v<Input>) {
917 return stan::math::filter_map<is_any_var_scalar>(
918 [](auto&& output_i) { return make_zeroed_arena(output_i); }, input);
919 } else if constexpr (is_std_vector_v<Input>) {
920 if constexpr (!is_var_v<value_type_t<Input>>) {
921 const auto output_size = input.size();
922 arena_t<std::vector<decltype(make_zeroed_arena(input[0]))>> ret;
923 ret.reserve(output_size);
924 for (Eigen::Index i = 0; i < output_size; ++i) {
925 ret.push_back(make_zeroed_arena(input[i]));
926 }
927 return ret;
928 } else {
929 return arena_t<std::vector<double>>(input.size(), 0.0);
930 }
931 } else if constexpr (is_eigen_v<Input>) {
934 input.cols()));
935 } else if constexpr (is_var<Input>::value) {
936 return static_cast<double>(0.0);
937 }
938}
939
948template <typename Output, typename Input>
949inline void collect_adjoints(Output&& output, const vari* ret, Input&& input) {
950 if constexpr (is_tuple_v<Output>) {
951 static_assert(sizeof(std::decay_t<Output>*) == 0,
952 "INTERNAL ERROR:(laplace_marginal_lpdf) "
953 "Accumulate Adjoints called on a tuple, but tuples cannot be "
954 "on the reverse mode stack! "
955 "This is an internal error, please report it: "
956 "https://github.com/stan-dev/math/issues");
957 } else if constexpr (is_std_vector_v<Output>) {
958 if constexpr (!is_var_v<value_type_t<Output>>) {
959 const auto output_size = output.size();
960 for (std::size_t i = 0; i < output_size; ++i) {
961 collect_adjoints(output[i], ret, input[i]);
962 }
963 } else {
964 Eigen::Map<Eigen::Matrix<var, -1, 1>> output_map(output.data(),
965 output.size());
966 Eigen::Map<const Eigen::Matrix<double, -1, 1>> input_map(input.data(),
967 input.size());
968 output_map.array().adj() += ret->adj_ * input_map.array();
969 }
970 } else if constexpr (is_eigen_v<Output>) {
971 output.adj().array() += ret->adj_ * input.array();
972 } else if constexpr (is_var_v<Output>) {
973 output.adj() += ret->adj_ * input;
974 }
975}
976
987template <typename Output, typename Input>
988inline void reverse_pass_collect_adjoints(var ret, Output&& output,
989 Input&& input) {
990 if constexpr (is_tuple_v<Output>) {
992 [ret](auto&& inner_arg, auto&& inner_input) mutable {
994 ret, std::forward<decltype(inner_arg)>(inner_arg),
995 std::forward<decltype(inner_input)>(inner_input));
996 },
997 std::forward<Output>(output), std::forward<Input>(input));
998 } else if constexpr (is_std_vector_containing_tuple_v<Output>) {
999 for (std::size_t i = 0; i < output.size(); ++i) {
1000 reverse_pass_collect_adjoints(ret, output[i], input[i]);
1001 }
1002 } else {
1004 [vi = ret.vi_, arg_arena = to_arena(std::forward<Output>(output)),
1005 input_arena = to_arena(std::forward<Input>(input))]() mutable {
1006 collect_adjoints(arg_arena, vi, input_arena);
1007 });
1008 }
1009}
1010} // namespace internal
1038template <typename LLFun, typename LLTupleArgs, typename CovarFun,
1039 typename CovarArgs, bool InitTheta,
1041inline auto laplace_marginal_density(const LLFun& ll_fun, LLTupleArgs&& ll_args,
1042 CovarFun&& covariance_function,
1043 CovarArgs&& covar_args,
1044 const laplace_options<InitTheta>& options,
1045 std::ostream* msgs) {
1046 auto covar_args_refs = to_ref(std::forward<CovarArgs>(covar_args));
1047 auto ll_args_refs = to_ref(std::forward<LLTupleArgs>(ll_args));
1048 // Solver 1, 2, 3
1049 constexpr bool ll_args_contain_var = is_any_var_scalar<LLTupleArgs>::value;
1050 auto partial_parm = internal::make_zeroed_arena(ll_args_refs);
1051 auto covar_args_adj = internal::make_zeroed_arena(covar_args_refs);
1052 double lmd = 0.0;
1053 {
1054 nested_rev_autodiff nested;
1055
1056 // Make one hard copy here
1059 auto ll_args_copy
1060 = conditional_copy_and_promote<is_any_var_scalar, var, COPY_TYPE::DEEP>(
1061 ll_args_refs);
1062
1064 ll_fun, ll_args_copy, covariance_function, value_of(covar_args_refs),
1065 options, msgs);
1066
1067 // Solver 1, 2
1068 arena_t<Eigen::MatrixXd> R(md_est.theta.size(), md_est.theta.size());
1069 // Solver 3
1070 arena_t<Eigen::MatrixXd> LU_solve_covariance;
1071 // Solver 1, 2, 3
1072 arena_t<Eigen::VectorXd> s2(md_est.theta.size());
1073
1074 // Return references to var types
1075 auto ll_args_filter = internal::filter_var_scalar_types(ll_args_copy);
1077 [](auto&& output_i, auto&& ll_arg_i) {
1078 if (is_any_var_scalar_v<decltype(ll_arg_i)>) {
1079 internal::collect_adjoints<true>(output_i, ll_arg_i);
1080 }
1081 },
1082 partial_parm, ll_args_filter);
1083 if (options.solver == 1) {
1084 if (options.hessian_block_size == 1) {
1085 // TODO(Steve): Solve without casting from sparse to dense
1086 Eigen::MatrixXd tmp
1087 = md_est.L.template triangularView<Eigen::Lower>().solve(
1088 md_est.W_r.toDense());
1089 R = tmp.transpose() * tmp;
1091 = md_est.L.template triangularView<Eigen::Lower>().solve(
1092 md_est.W_r * md_est.covariance);
1093 if constexpr (!ll_args_contain_var) {
1094 s2.deep_copy(
1095 (0.5
1096 * (md_est.covariance.diagonal() - (C.transpose() * C).diagonal())
1097 .cwiseProduct(laplace_likelihood::third_diff(
1098 ll_fun, md_est.theta, value_of(ll_args_copy), msgs))));
1099 } else {
1100 arena_t<Eigen::MatrixXd> A = md_est.covariance - C.transpose() * C;
1101 auto s2_tmp = laplace_likelihood::compute_s2(
1102 ll_fun, md_est.theta, A, options.hessian_block_size, ll_args_copy,
1103 msgs);
1104 s2.deep_copy(s2_tmp);
1105 internal::copy_compute_s2<true>(partial_parm, ll_args_filter);
1106 }
1107
1108 } else {
1109 Eigen::MatrixXd tmp
1110 = md_est.L.template triangularView<Eigen::Lower>().solve(
1111 md_est.W_r.toDense());
1112 R = tmp.transpose() * tmp;
1114 = md_est.L.template triangularView<Eigen::Lower>().solve(
1115 md_est.W_r * md_est.covariance);
1116 arena_t<Eigen::MatrixXd> A = md_est.covariance - C.transpose() * C;
1117 auto s2_tmp = laplace_likelihood::compute_s2(ll_fun, md_est.theta, A,
1118 options.hessian_block_size,
1119 ll_args_copy, msgs);
1120 s2.deep_copy(s2_tmp);
1121 internal::copy_compute_s2<true>(partial_parm, ll_args_filter);
1122 }
1123 } else if (options.solver == 2) {
1124 R = md_est.W_r
1125 - md_est.W_r * md_est.K_root
1126 * md_est.L.transpose()
1127 .template triangularView<Eigen::Upper>()
1128 .solve(
1129 md_est.L.template triangularView<Eigen::Lower>()
1130 .solve(md_est.K_root.transpose() * md_est.W_r));
1131
1133 = md_est.L.template triangularView<Eigen::Lower>().solve(
1134 md_est.K_root.transpose());
1135 auto s2_tmp = laplace_likelihood::compute_s2(
1136 ll_fun, md_est.theta, (C.transpose() * C).eval(),
1137 options.hessian_block_size, ll_args_copy, msgs);
1138 s2.deep_copy(s2_tmp);
1139 internal::copy_compute_s2<true>(partial_parm, ll_args_filter);
1140 } else { // options.solver with LU decomposition
1141 LU_solve_covariance = md_est.LU.solve(md_est.covariance);
1142 R = md_est.W_r - md_est.W_r * LU_solve_covariance * md_est.W_r;
1144 = md_est.covariance
1145 - md_est.covariance * md_est.W_r * LU_solve_covariance;
1146 auto s2_tmp = laplace_likelihood::compute_s2(ll_fun, md_est.theta, A,
1147 options.hessian_block_size,
1148 ll_args_copy, msgs);
1149 s2.deep_copy(s2_tmp);
1150 internal::copy_compute_s2<true>(partial_parm, ll_args_filter);
1151 }
1152 lmd = md_est.lmd;
1153 if constexpr (is_any_var_scalar_v<scalar_type_t<CovarArgs>>) {
1154 [&covar_args_refs, &covar_args_adj, &md_est, &R, &s2,
1155 &covariance_function, &msgs]() mutable {
1156 const nested_rev_autodiff nested;
1157 auto covar_args_copy
1161
1163 [&covariance_function, &msgs](auto&&... args) {
1164 return covariance_function(args..., msgs);
1165 },
1166 covar_args_copy));
1167 arena_t<Eigen::MatrixXd> K_adj_arena
1168 = 0.5 * md_est.a * md_est.a.transpose() - 0.5 * R
1169 + s2 * md_est.theta_grad.transpose()
1170 - (R * (K_var.val() * s2)) * md_est.theta_grad.transpose();
1171 var Z = make_callback_var(0.0, [K_var, K_adj_arena](auto&& vi) mutable {
1172 K_var.adj().array() += vi.adj() * K_adj_arena.array();
1173 });
1174 grad(Z.vi_);
1175 auto covar_args_filter
1176 = internal::filter_var_scalar_types(covar_args_copy);
1177 internal::collect_adjoints(covar_args_adj, covar_args_filter);
1178 }();
1179 }
1180 if constexpr (ll_args_contain_var) {
1182 if (options.solver == 1 || options.solver == 2) {
1183 v = md_est.covariance * s2
1184 - md_est.covariance * R * md_est.covariance * s2;
1185 } else {
1186 v = LU_solve_covariance * s2;
1187 }
1188 laplace_likelihood::diff_eta_implicit(ll_fun, v, md_est.theta,
1189 ll_args_copy, msgs);
1190 internal::collect_adjoints<true>(partial_parm, ll_args_filter);
1191 }
1192 }
1193 var ret(lmd);
1194 if constexpr (is_any_var_scalar_v<CovarArgs>) {
1195 auto covar_args_filter = internal::filter_var_scalar_types(covar_args_refs);
1196 internal::reverse_pass_collect_adjoints(ret, covar_args_filter,
1197 covar_args_adj);
1198 }
1199 if constexpr (ll_args_contain_var) {
1200 auto ll_args_filter = internal::filter_var_scalar_types(ll_args_refs);
1201 internal::reverse_pass_collect_adjoints(ret, ll_args_filter, partial_parm);
1202 }
1203 return ret;
1204}
1205
1206} // namespace math
1207} // namespace stan
1208
1209#endif
A class following the RAII idiom to start and recover nested autodiff scopes.
#define STAN_COLD_PATH
#define unlikely(x)
void throw_nan(NameStr &&name_str, ParamStr &&param_str, Param &&param)
Throws an error if the parameter contains NaN or Inf values.
void set_zero_adjoint(Output &&output)
Set all adjoints of the output to zero.
constexpr decltype(auto) filter_var_scalar_types(T &&t)
void reverse_pass_collect_adjoints(var ret, Output &&output, Input &&input)
Collects adjoints from a tuple or std::vector of tuples.
void block_matrix_chol_L(WRootMat &W_root, const Eigen::SparseMatrix< double > &W, const Eigen::Index block_size)
Performs a Cholesky decomposition on a block diagonal matrix.
void line_search(double &objective_new, AVec &a, ThetaVec &theta, APrev &a_prev, LLFun &&ll_fun, LLArgs &&ll_args, Covar &&covariance, const int max_steps_line_search, const double objective_old, double tolerance, Msgs *msgs)
Performs a simple line search.
void constexpr copy_compute_s2(const std::tuple<> &output, const std::tuple<> &input) noexcept
Base case for zero sized tuples.
constexpr auto make_zeroed_arena(Input &&input)
Creates an arena type from the input with initialized with zeros.
auto laplace_marginal_density_est(LLFun &&ll_fun, LLTupleArgs &&ll_args, CovarFun &&covariance_function, CovarArgs &&covar_args, const laplace_options< InitTheta > &options, std::ostream *msgs)
For a latent Gaussian model with hyperparameters phi and latent variables theta, and observations y,...
void collect_adjoints(Output &output, Input &&input)
Collect the adjoints from the input and add them to the output.
void block_matrix_sqrt(WRootMat &W_root, const Eigen::SparseMatrix< double > &W, const Eigen::Index block_size)
Returns the principal square root of a block diagonal matrix.
auto conditional_copy_and_promote(Args &&... args)
Conditional copy and promote a type's scalar type to a PromotedType.
COPY_TYPE
Decide if object should be deep or shallow copied when using conditional_copy_and_promote .
auto compute_s2(F &&f, Theta &&theta, AMat &&A, int hessian_block_size, TupleArgs &&ll_args, Stream *msgs)
A wrapper that accepts a tuple as arguments.
auto diff(F &&f, Theta &&theta, const Eigen::Index hessian_block_size, TupleArgs &&ll_tuple, Stream *msgs)
A wrapper that accepts a tuple as arguments.
auto diff_eta_implicit(F &&f, V_t &&v, Theta &&theta, TupleArgs &&ll_args, Stream *msgs)
A wrapper that accepts a tuple as arguments.
auto log_likelihood(F &&f, Theta &&theta, TupleArgs &&ll_tup, Stream *msgs)
A wrapper that accepts a tuple as arguments.
Eigen::VectorXd third_diff(F &&f, Theta &&theta, TupleArgs &&ll_args, Stream *msgs)
A wrapper that accepts a tuple as arguments.
fvar< T > abs(const fvar< T > &x)
Definition abs.hpp:15
void iter_tuple_nested(F &&f, Types &&... args)
Iterate and nest into a tuple or std::vector to apply f to each matrix or scalar type.
void check_square(const char *function, const char *name, const T_y &y)
Check if the specified matrix is square.
void check_nonnegative(const char *function, const char *name, const T_y &y)
Check if y is non-negative.
static constexpr double e()
Return the base of the natural logarithm.
Definition constants.hpp:20
var_value< plain_type_t< T > > make_callback_var(T &&value, F &&functor)
Creates a new var initialized with a callback_vari with a given value and reverse-pass callback funct...
void reverse_pass_callback(F &&functor)
Puts a callback on the autodiff stack to be called in reverse pass.
fvar< T > arg(const std::complex< fvar< T > > &z)
Return the phase angle of the complex argument.
Definition arg.hpp:19
constexpr void for_each(F &&f, const std::tuple<> &)
Apply a function to each element of a tuple.
Definition for_each.hpp:80
T value_of(const fvar< T > &v)
Return the value of the specified variable.
Definition value_of.hpp:18
fvar< T > log(const fvar< T > &x)
Definition log.hpp:18
arena_t< T > to_arena(const T &a)
Converts given argument into a type that either has any dynamic allocation on AD stack or schedules i...
Definition to_arena.hpp:25
void check_finite(const char *function, const char *name, const T_y &y)
Return true if all values in y are finite.
var_value< Eigen::Matrix< double, T::RowsAtCompileTime, T::ColsAtCompileTime > > to_var_value(const T &a)
Converts an Eigen matrix (or vector or row_vector) or expression of vars into var_value.
var_value< double > var
Definition var.hpp:1187
void check_nonzero_size(const char *function, const char *name, const T_y &y)
Check if the specified matrix/vector is of non-zero size.
void check_positive(const char *function, const char *name, const T_y &y)
Check if y is positive.
ref_type_t< T && > to_ref(T &&a)
This evaluates expensive Eigen expressions.
Definition to_ref.hpp:18
static void grad()
Compute the gradient for all variables starting from the end of the AD tape.
Definition grad.hpp:26
constexpr decltype(auto) apply(F &&f, Tuple &&t, PreArgs &&... pre_args)
Definition apply.hpp:51
double laplace_marginal_density(LLFun &&ll_fun, LLTupleArgs &&ll_args, CovarFun &&covariance_function, CovarArgs &&covar_args, const laplace_options< InitTheta > &options, std::ostream *msgs)
For a latent Gaussian model with global parameters phi, latent variables theta, and observations y,...
typename plain_type< std::decay_t< T > >::type plain_type_t
constexpr bool is_any_var_scalar_v
Definition is_var.hpp:50
typename internal::arena_type_impl< std::decay_t< T > >::type arena_t
Determines a type that can be used in place of T that does any dynamic allocations on the AD stack.
std::enable_if_t< Check::value > require_t
If condition is true, template is enabled.
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
bool isnan(const stan::math::var &a)
Checks if the given number is NaN.
Definition std_isnan.hpp:18
bool isinf(const stan::math::var &a)
Return 1 if the specified argument is positive infinity or negative infinity and 0 otherwise.
Definition std_isinf.hpp:16
STL namespace.
Base implementation for checking if type is std vector.
Definition is_vector.hpp:19
Defines a static member named value which is defined to be false as the primitive scalar types cannot...
Definition is_var.hpp:14
laplace_density_estimates(double lmd_, Covar &&covariance_, ThetaVec &&theta_, WR &&W_r_, L_t &&L_, A_vec &&a_, ThetaGrad &&theta_grad_, LU_t &&LU_, KRoot &&K_root_)
int solver
Which Newton solver to use: (B matrix in equation 1 of https://arxiv.org/pdf/2306....
Options for the laplace sampler.