Automatic Differentiation
 
Loading...
Searching...
No Matches
laplace_marginal_density_estimator.hpp
Go to the documentation of this file.
1#ifndef STAN_MATH_MIX_FUNCTOR_LAPLACE_MARGINAL_DENSITY_ESTIMATOR_HPP
2#define STAN_MATH_MIX_FUNCTOR_LAPLACE_MARGINAL_DENSITY_ESTIMATOR_HPP
13#include <unsupported/Eigen/MatrixFunctions>
14#include <cmath>
15#include <mutex>
16#include <iomanip>
17
25namespace stan {
26namespace math {
27
32 /* Size of the blocks in block diagonal hessian*/
58 /* Maximum number of steps*/
64 laplace_options_base(int hessian_block_size_, int solver_, double tolerance_,
65 int max_num_steps_, bool allow_fallthrough_,
66 int max_steps_line_search_)
67 : hessian_block_size(hessian_block_size_),
68 solver(solver_),
69 tolerance(tolerance_),
70 max_num_steps(max_num_steps_),
71 allow_fallthrough(allow_fallthrough_),
72 line_search(max_steps_line_search_) {}
73};
74
75template <bool HasInitTheta>
77
78template <>
79struct laplace_options<false> : public laplace_options_base {
80 laplace_options() = default;
81
82 explicit laplace_options(int hessian_block_size_) {
83 hessian_block_size = hessian_block_size_;
84 }
85};
86
87template <>
89 /* Value for user supplied initial theta */
90 Eigen::VectorXd theta_0{0}; // 6
91
92 template <typename ThetaVec>
93 laplace_options(ThetaVec&& theta_0_, double tolerance_, int max_num_steps_,
94 int hessian_block_size_, int solver_,
95 int max_steps_line_search_, bool allow_fallthrough_)
96 : laplace_options_base(hessian_block_size_, solver_, tolerance_,
97 max_num_steps_, allow_fallthrough_,
98 max_steps_line_search_),
99 theta_0(value_of(std::forward<ThetaVec>(theta_0_))) {}
100};
101
104
105namespace internal {
106
107template <typename Options>
108inline constexpr auto tuple_to_laplace_options(Options&& ops) {
109 using Ops = std::decay_t<Options>;
110 if constexpr (is_tuple_v<Ops>) {
111 if constexpr (!is_eigen_v<std::tuple_element_t<0, std::decay_t<Ops>>>) {
112 static_assert(
113 sizeof(std::decay_t<Ops>*) == 0,
114 "ERROR:(laplace_marginal_lpdf) The first laplace argument is "
115 "expected to be an Eigen vector of dynamic size representing the "
116 "initial theta_0.");
117 }
118 if constexpr (!stan::is_inner_tuple_type_v<1, Ops, double>) {
119 static_assert(
120 sizeof(std::decay_t<Ops>*) == 0,
121 "ERROR:(laplace_marginal_lpdf) The second laplace argument is "
122 "expected to be a double representing the tolerance.");
123 }
124 if constexpr (!stan::is_inner_tuple_type_v<2, Ops, int>) {
125 static_assert(
126 sizeof(std::decay_t<Ops>*) == 0,
127 "ERROR:(laplace_marginal_lpdf) The third laplace argument is "
128 "expected to be an int representing the maximum number of steps for "
129 "the laplace approximation.");
130 }
131 if constexpr (!stan::is_inner_tuple_type_v<3, Ops, int>) {
132 static_assert(
133 sizeof(std::decay_t<Ops>*) == 0,
134 "ERROR:(laplace_marginal_lpdf) The fourth laplace argument is "
135 "expected to be an int representing the solver.");
136 }
137 if constexpr (!stan::is_inner_tuple_type_v<4, Ops, int>) {
138 static_assert(
139 sizeof(std::decay_t<Ops>*) == 0,
140 "ERROR:(laplace_marginal_lpdf) The fifth laplace argument is "
141 "expected to be an int representing the max steps for the laplace "
142 "approximaton's wolfe line search.");
143 }
144 constexpr bool is_fallthrough
146 5, Ops, int> || stan::is_inner_tuple_type_v<5, Ops, bool>;
147 if constexpr (!is_fallthrough) {
148 static_assert(
149 sizeof(std::decay_t<Ops>*) == 0,
150 "ERROR:(laplace_marginal_lpdf) The sixth laplace argument is "
151 "expected to be an int representing allow fallthrough (0/1).");
152 }
153 auto defaults = laplace_options_default{};
155 value_of(std::get<0>(std::forward<Options>(ops))),
156 std::get<1>(ops),
157 std::get<2>(ops),
158 defaults.hessian_block_size,
159 std::get<3>(ops),
160 std::get<4>(ops),
161 (std::get<5>(ops) > 0) ? true : false,
162 };
163 } else {
164 return std::forward<Options>(ops);
165 }
166}
167
168template <typename ThetaVec, typename WR, typename L_t, typename A_vec,
169 typename ThetaGrad, typename LU_t, typename KRoot>
171 /* log marginal density */
172 double lmd{std::numeric_limits<double>::infinity()};
173 /* ThetaVec at the mode */
174 ThetaVec theta;
182 WR W_r;
189 L_t L;
194 A_vec a;
196 ThetaGrad theta_grad;
197 /* LU matrix from solver 3 */
198 LU_t LU;
205 KRoot K_root;
207 laplace_density_estimates(double lmd_, ThetaVec&& theta_, WR&& W_r_, L_t&& L_,
208 A_vec&& a_, ThetaGrad&& theta_grad_, LU_t&& LU_,
209 KRoot&& K_root_, int solver_used_)
210 : lmd(lmd_),
211 theta(std::move(theta_)),
212 W_r(std::move(W_r_)),
213 L(std::move(L_)),
214 a(std::move(a_)),
215 theta_grad(std::move(theta_grad_)),
216 LU(std::move(LU_)),
217 K_root(std::move(K_root_)),
218 solver_used(solver_used_) {}
219};
220
228template <typename WRootMat>
229inline void block_matrix_sqrt(WRootMat& W_root,
230 const Eigen::SparseMatrix<double>& W,
231 const Eigen::Index block_size) {
232 int n_block = W.cols() / block_size;
233 Eigen::MatrixXd local_block(block_size, block_size);
234 Eigen::MatrixXd local_block_sqrt(block_size, block_size);
235 Eigen::MatrixXd sqrt_t_mat = Eigen::MatrixXd::Zero(block_size, block_size);
236 // No block operation available for sparse matrices, so we have to loop
237 // See https://eigen.tuxfamily.org/dox/group__TutorialSparse.html#title7
238 for (int i = 0; i < n_block; i++) {
239 sqrt_t_mat.setZero();
240 local_block
241 = W.block(i * block_size, i * block_size, block_size, block_size);
242 if (!local_block.array().isFinite().any()) {
243 throw std::domain_error(
244 std::string("Error in block_matrix_sqrt: "
245 "NaNs detected in block diagonal starting at (")
246 + std::to_string(i) + ", " + std::to_string(i) + ")");
247 }
248 // Issue here, sqrt is done over T of the complex schur
249 Eigen::RealSchur<Eigen::MatrixXd> schurOfA(local_block);
250 // Compute Schur decomposition of arg
251 const auto& t_mat = schurOfA.matrixT();
252 const auto& u_mat = schurOfA.matrixU();
253 // Check if diagonal of schur is not positive
254 if ((t_mat.diagonal().array() < 0).any()) {
255 throw std::domain_error(
256 std::string("Error in block_matrix_sqrt: "
257 "values less than 0 detected in block diagonal's schur "
258 "decomposition starting at (")
259 + std::to_string(i) + ", " + std::to_string(i) + ")");
260 }
261 try {
262 // Compute square root of T
263 Eigen::matrix_sqrt_quasi_triangular(t_mat, sqrt_t_mat);
264 // Compute square root of arg
265 local_block_sqrt = u_mat * sqrt_t_mat * u_mat.adjoint();
266 } catch (const std::exception& e) {
267 throw std::domain_error(
268 "Error in block_matrix_sqrt: "
269 "The matrix is not positive definite");
270 }
271 for (int k = 0; k < block_size; k++) {
272 for (int j = 0; j < block_size; j++) {
273 W_root.coeffRef(i * block_size + j, i * block_size + k)
274 = local_block_sqrt(j, k);
275 }
276 }
277 }
278}
279
289template <bool InitTheta, typename CovarMat>
290inline void validate_laplace_options(const char* frame_name,
291 const laplace_options<InitTheta>& options,
292 const CovarMat& covariance) {
293 if constexpr (InitTheta) {
294 check_nonzero_size(frame_name, "initial guess", options.theta_0);
295 check_finite(frame_name, "initial guess", options.theta_0);
296 if (unlikely(options.theta_0.size() != covariance.rows())) {
297 std::stringstream msg;
298 msg << frame_name << ": The size of the initial theta ("
299 << options.theta_0.size()
300 << ") vector must match the rows and columns of the covariance "
301 "matrix ("
302 << covariance.rows() << ", " << covariance.cols() << ").";
303 throw std::domain_error(msg.str());
304 }
305 }
306 check_nonnegative(frame_name, "tolerance", options.tolerance);
307 check_positive(frame_name, "max_num_steps", options.max_num_steps);
308 check_positive(frame_name, "hessian_block_size", options.hessian_block_size);
309 check_square(frame_name, "covariance", covariance);
310
311 const Eigen::Index theta_size = covariance.rows();
312 if (unlikely(theta_size % options.hessian_block_size != 0
313 || theta_size < options.hessian_block_size)) {
314 throw std::domain_error(
315 "laplace_marginal_density: Hessian block size mismatch.");
316 }
317
318 if (unlikely(options.solver < 1 || options.solver > 3)) {
319 throw std::domain_error(
320 "laplace_marginal_density: solver must be 1, 2, or 3. Got: "
321 + std::to_string(options.solver));
322 }
323}
324
339
342
345
347 Eigen::VectorXd b;
348
350 Eigen::MatrixXd B;
351
353 Eigen::VectorXd prev_g;
361 bool final_loop = false;
362
380 template <typename ObjFun, typename ThetaGradFun, typename CovarianceT,
381 typename ThetaInitializer>
382 NewtonState(int theta_size, ObjFun&& obj_fun, ThetaGradFun&& theta_grad_f,
383 CovarianceT&& covariance, ThetaInitializer&& theta_init)
384 : wolfe_info(std::forward<ObjFun>(obj_fun),
385 covariance.llt().solve(theta_init),
386 std::forward<ThetaInitializer>(theta_init),
387 std::forward<ThetaGradFun>(theta_grad_f)),
388 proposal(theta_size),
389 b(theta_size),
390 B(theta_size, theta_size),
391 prev_g(theta_size) {
392 wolfe_status.num_backtracks_ = -1; // Safe initial value for BB step
393 }
394
399 auto& curr() & { return wolfe_info.curr_; }
400
405 const auto& curr() const& { return wolfe_info.curr_; }
406 auto&& curr() && { return std::move(wolfe_info).curr(); }
411 auto& prev() & { return wolfe_info.prev_; }
412
417 const auto& prev() const& { return wolfe_info.prev_; }
418 auto&& prev() && { return std::move(wolfe_info).prev(); }
419 auto& proposal_step() & { return proposal; }
420 const auto& proposal_step() const& { return proposal; }
421 auto&& proposal_step() && { return std::move(proposal); }
422 template <typename Options>
423 inline void update_next_step(const Options& options) {
424 this->prev().swap(this->curr());
425 this->curr().alpha()
426 = std::clamp(this->curr().alpha(), 0.0, options.line_search.max_alpha);
427 }
428};
429
439template <typename LLT, typename B_t>
440inline void llt_with_jitter(LLT& llt_B, B_t& B, double min_jitter = 1e-10,
441 double max_jitter = 1e-5) {
442 llt_B.compute(B);
443 if (llt_B.info() != Eigen::Success) {
444 double prev_jitter = 0.0;
445 double jitter_try = min_jitter;
446 for (; jitter_try < max_jitter; jitter_try *= 10) {
447 // Remove previously added jitter before adding the new (larger) amount,
448 // so that the total diagonal perturbation is exactly jitter_try.
449 B.diagonal().array() += (jitter_try - prev_jitter);
450 prev_jitter = jitter_try;
451 llt_B.compute(B);
452 if (llt_B.info() == Eigen::Success) {
453 break;
454 }
455 }
456 if (llt_B.info() != Eigen::Success) {
457 throw std::domain_error(
458 "laplace_marginal_density: Cholesky failed after adding jitter up to "
459 + std::to_string(jitter_try));
460 }
461 }
462}
463
479 Eigen::VectorXd W_r_diag;
480
482 Eigen::VectorXd W_diag;
483
485 Eigen::LLT<Eigen::MatrixXd> llt_B;
486
487 template <typename NewtonStateT, typename CovarMat>
488 CholeskyWSolverDiag(const NewtonStateT& state, const CovarMat& covariance)
489 : W_r_diag(Eigen::VectorXd::Zero(state.b.size())), W_diag(0), llt_B() {}
510 template <typename NewtonStateT, typename LLFun, typename LLTupleArgs,
511 typename CovarMat>
512 void solve_step(NewtonStateT& state, const LLFun& ll_fun,
513 const LLTupleArgs& ll_args, const CovarMat& covariance,
514 int /*hessian_block_size*/, std::ostream* msgs) {
515 const Eigen::Index theta_size = state.b.size();
516
517 // 1. Compute diagonal Hessian
518 W_diag = laplace_likelihood::diagonal_hessian(ll_fun, state.prev().theta(),
519 ll_args, msgs);
520 for (Eigen::Index j = 0; j < W_diag.size(); j++) {
521 if (W_diag.coeff(j) < 0 || !std::isfinite(W_diag.coeff(j))) {
522 throw std::domain_error(
523 "laplace_marginal_density: Hessian matrix is not positive "
524 "definite");
525 } else {
526 W_r_diag.coeffRef(j) = std::sqrt(W_diag.coeff(j));
527 }
528 }
529
530 // 2. Formulate B = I + W_r * Sigma * W_r
531 state.B.noalias()
532 = Eigen::MatrixXd::Identity(theta_size, theta_size)
533 + W_r_diag.asDiagonal() * covariance * W_r_diag.asDiagonal();
534
535 // 3. Factorize B with jittering fallback
536 llt_with_jitter(llt_B, state.B);
537 // 4. Solve for the raw Newton proposal in a-space.
538 state.b.noalias() = (W_diag.array() * state.prev().theta().array()).matrix()
539 + state.prev().theta_grad();
540 auto L = llt_B.matrixL();
541 auto LT = llt_B.matrixU();
542 state.proposal_step().a().noalias()
543 = state.b
544 - W_r_diag.asDiagonal()
545 * LT.solve(
546 L.solve(W_r_diag.cwiseProduct(covariance * state.b)));
547 }
548
553 double compute_log_determinant() const {
554 return 2.0 * llt_B.matrixLLT().diagonal().array().log().sum();
555 }
556
565 template <typename NewtonStateT>
566 auto build_result(NewtonStateT& state, double log_det) {
568 state.prev().obj() - 0.5 * log_det,
569 std::move(state).prev().theta(),
570 Eigen::SparseMatrix<double>(W_r_diag.asDiagonal()),
571 Eigen::MatrixXd(llt_B.matrixL()),
572 std::move(state).prev().a(),
573 std::move(state).prev().theta_grad(),
574 Eigen::PartialPivLU<Eigen::MatrixXd>{},
575 Eigen::MatrixXd(0, 0),
576 1};
577 }
578};
579
595 Eigen::SparseMatrix<double> W_r;
596
598 Eigen::SparseMatrix<double> W_block;
599
601 Eigen::LLT<Eigen::MatrixXd> llt_B;
602
603 template <typename NewtonStateT>
604 CholeskyWSolverBlock(const NewtonStateT& state, int hessian_block_size)
605 : W_r(state.b.size(), state.b.size()) {
606 const Eigen::Index theta_size = state.b.size();
607 W_r.reserve(Eigen::VectorXi::Constant(theta_size, hessian_block_size));
608 const Eigen::Index n_block = theta_size / hessian_block_size;
609 for (Eigen::Index ii = 0; ii < n_block; ii++) {
610 for (Eigen::Index k = 0; k < hessian_block_size; k++) {
611 for (Eigen::Index j = 0; j < hessian_block_size; j++) {
612 W_r.insert(ii * hessian_block_size + j, ii * hessian_block_size + k)
613 = 1.0;
614 }
615 }
616 }
617 W_r.makeCompressed();
618 }
619
642 template <typename NewtonStateT, typename LLFun, typename LLTupleArgs,
643 typename CovarMat>
644 void solve_step(NewtonStateT& state, const LLFun& ll_fun,
645 const LLTupleArgs& ll_args, const CovarMat& covariance,
646 int hessian_block_size, std::ostream* msgs) {
647 const Eigen::Index theta_size = state.b.size();
648 // 1. Compute block Hessian
650 ll_fun, state.prev().theta(), hessian_block_size, ll_args, msgs);
651
652 for (Eigen::Index j = 0; j < W_block.rows(); j++) {
653 if (W_block.coeff(j, j) < 0 || !std::isfinite(W_block.coeff(j, j))) {
654 throw std::domain_error(
655 "laplace_marginal_density: Hessian matrix is not positive "
656 "definite");
657 }
658 }
659
660 // 2. Compute W_r = sqrt(W)
661 block_matrix_sqrt(W_r, W_block, hessian_block_size);
662
663 // 3. Formulate B = I + W_r * Sigma * W_r
664 state.B.noalias() = Eigen::MatrixXd::Identity(theta_size, theta_size)
665 + W_r * (covariance * W_r);
666
667 // 4. Factorize B with jittering fallback
668 llt_with_jitter(llt_B, state.B);
669
670 // 5. Solve for the raw Newton proposal in a-space.
671 state.b.noalias()
672 = W_block * state.prev().theta() + state.prev().theta_grad();
673 auto L = llt_B.matrixL();
674 auto LT = llt_B.matrixU();
675 state.proposal_step().a().noalias()
676 = state.b - W_r * LT.solve(L.solve(W_r * (covariance * state.b)));
677 }
678
683 double compute_log_determinant() const {
684 return 2.0 * llt_B.matrixLLT().diagonal().array().log().sum();
685 }
686
695 template <typename NewtonStateT>
696 auto build_result(NewtonStateT& state, double log_det) {
697 return laplace_density_estimates{state.prev().obj() - 0.5 * log_det,
698 std::move(state).prev().theta(),
699 std::move(W_r),
700 Eigen::MatrixXd(llt_B.matrixL()),
701 std::move(state).prev().a(),
702 std::move(state).prev().theta_grad(),
703 Eigen::PartialPivLU<Eigen::MatrixXd>{},
704 Eigen::MatrixXd(0, 0),
705 1};
706 }
707};
708
723 Eigen::MatrixXd K_root;
724
726 Eigen::SparseMatrix<double> W_full;
727
729 Eigen::LLT<Eigen::MatrixXd> llt_B;
730
731 template <typename NewtonStateT, typename CovarMat>
732 CholeskyKSolver(const NewtonStateT& state, const CovarMat& covariance)
733 : K_root(0, 0), W_full(0, 0), llt_B() {
734 auto K_root_llt = covariance.template selfadjointView<Eigen::Lower>().llt();
735 if (K_root_llt.info() != Eigen::Success) {
736 throw std::domain_error(
737 "laplace_marginal_density: Cholesky of covariance failed at start");
738 }
739 K_root = std::move(K_root_llt.matrixL());
740 }
741
762 template <typename NewtonStateT, typename LLFun, typename LLTupleArgs,
763 typename CovarMat>
764 void solve_step(NewtonStateT& state, const LLFun& ll_fun,
765 const LLTupleArgs& ll_args, const CovarMat& covariance,
766 int hessian_block_size, std::ostream* msgs) {
767 const Eigen::Index theta_size = state.b.size();
768
769 // 1. Compute Hessian
771 ll_fun, state.prev().theta(), hessian_block_size, ll_args, msgs);
772
773 // 2. Formulate B = I + K^T * W * K
774 state.B.noalias() = Eigen::MatrixXd::Identity(theta_size, theta_size)
775 + K_root.transpose() * (W_full * K_root);
776
777 // 3. Factorize B with jittering fallback
778 llt_with_jitter(llt_B, state.B);
779
780 // 4. Solve for the raw Newton proposal in a-space.
781 state.b.noalias()
782 = W_full * state.prev().theta() + state.prev().theta_grad();
783 auto L = llt_B.matrixL();
784 auto LT = llt_B.matrixU();
785 state.proposal_step().a().noalias()
786 = K_root.transpose().template triangularView<Eigen::Upper>().solve(
787 LT.solve(L.solve(K_root.transpose() * state.b)));
788 }
789
794 double compute_log_determinant() const {
795 return 2.0 * llt_B.matrixLLT().diagonal().array().log().sum();
796 }
797
806 template <typename NewtonStateT>
807 auto build_result(NewtonStateT& state, double log_det) {
808 return laplace_density_estimates{state.prev().obj() - 0.5 * log_det,
809 std::move(state.prev().theta()),
810 std::move(W_full),
811 Eigen::MatrixXd(llt_B.matrixL()),
812 std::move(state.prev().a()),
813 std::move(state.prev().theta_grad()),
814 Eigen::PartialPivLU<Eigen::MatrixXd>{},
815 std::move(K_root),
816 2};
817 }
818};
819
833struct LUSolver {
835 Eigen::PartialPivLU<Eigen::MatrixXd> lu;
836
838 Eigen::SparseMatrix<double> W_full;
839
857 template <typename NewtonStateT, typename LLFun, typename LLTupleArgs,
858 typename CovarMat>
859 void solve_step(NewtonStateT& state, const LLFun& ll_fun,
860 const LLTupleArgs& ll_args, const CovarMat& covariance,
861 int hessian_block_size, std::ostream* msgs) {
862 const Eigen::Index theta_size = state.b.size();
863
864 // 1. Compute Hessian
866 ll_fun, state.prev().theta(), hessian_block_size, ll_args, msgs);
867
868 // 2. Factorize B = I + Sigma * W
869 lu.compute(Eigen::MatrixXd::Identity(theta_size, theta_size)
870 + covariance * W_full);
871
872 // 3. Solve for the raw Newton proposal in a-space.
873 state.b.noalias()
874 = W_full * state.prev().theta() + state.prev().theta_grad();
875 state.proposal_step().a().noalias()
876 = state.b - W_full * lu.solve(covariance * state.b);
877 }
878
888 double compute_log_determinant() const {
889 return lu.matrixLU().diagonal().array().log().sum();
890 }
891
900 template <typename NewtonStateT>
901 auto build_result(NewtonStateT& state, double log_det) {
902 return laplace_density_estimates{state.prev().obj() - 0.5 * log_det,
903 std::move(state).prev().theta(),
904 std::move(W_full),
905 Eigen::MatrixXd(0, 0),
906 std::move(state).prev().a(),
907 std::move(state).prev().theta_grad(),
908 std::move(lu),
909 Eigen::MatrixXd(0, 0),
910 3};
911 }
912};
913
936template <typename SolverPolicy, typename NewtonStateT, typename OptionsT,
937 typename LLFunT, typename LLTupleArgsT, typename CovarMatT,
938 typename UpdateFun>
939inline auto run_newton_loop(SolverPolicy& solver, NewtonStateT& state,
940 const OptionsT& options, Eigen::Index& step_iter,
941 const LLFunT& ll_fun, const LLTupleArgsT& ll_args,
942 const CovarMatT& covariance, UpdateFun&& update_fun,
943 std::ostream* msgs) {
944 bool finish_update = false;
945 for (; step_iter <= options.max_num_steps; step_iter++) {
946 solver.solve_step(state, ll_fun, ll_args, covariance,
947 options.hessian_block_size, msgs);
948 if (!state.final_loop) {
949 auto&& proposal = state.proposal_step();
950 state.wolfe_info.p_ = proposal.a() - state.prev().a();
951 state.prev_g.noalias() = -covariance * state.prev().a()
952 + covariance * state.prev().theta_grad();
953 state.wolfe_info.init_dir_ = state.prev_g.dot(state.wolfe_info.p_);
954 // Flip direction if not ascending
955 state.wolfe_info.flip_direction();
956 auto&& scratch = state.wolfe_info.scratch_;
957 proposal.eval_.alpha() = 1.0;
958 const bool proposal_valid
959 = update_fun(proposal, state.curr(), state.prev(), proposal.eval_,
960 state.wolfe_info.p_);
961 const bool cached_proposal_ok
962 = proposal_valid && std::isfinite(proposal.obj())
963 && std::isfinite(proposal.dir())
964 && proposal.alpha() > options.line_search.min_alpha;
965 if (!cached_proposal_ok) {
966 state.wolfe_status
968 } else if (options.line_search.max_iterations == 0) {
969 state.curr().update(proposal);
970 state.wolfe_status = WolfeStatus{WolfeReturn::Continue, 1, 0, true};
971 } else {
972 Eigen::VectorXd s = proposal.a() - state.prev().a();
973 auto full_step_grad
974 = (-covariance * proposal.a() + covariance * proposal.theta_grad())
975 .eval();
976 state.curr().alpha() = barzilai_borwein_step_size(
977 s, full_step_grad, state.prev_g, state.prev().alpha(),
978 state.wolfe_status.num_backtracks_, options.line_search.min_alpha,
979 options.line_search.max_alpha);
980 state.wolfe_status = internal::wolfe_line_search(
981 state.wolfe_info, update_fun, options.line_search, msgs);
982 }
983 bool search_failed = !state.wolfe_status.accept_;
984 const bool proposal_armijo_ok
985 = cached_proposal_ok
987 proposal.obj(), state.prev().obj(), proposal.alpha(),
988 state.wolfe_info.init_dir_, options.line_search);
989 if (search_failed && proposal_armijo_ok) {
990 state.curr().update(proposal);
991 state.wolfe_status
992 = WolfeStatus{WolfeReturn::Armijo, state.wolfe_status.num_evals_,
993 state.wolfe_status.num_backtracks_, true};
994 search_failed = false;
995 }
996 bool objective_converged
997 = state.wolfe_status.accept_
998 && std::abs(state.curr().obj() - state.prev().obj())
999 < options.tolerance;
1000 finish_update = objective_converged || search_failed;
1001 }
1002 if (finish_update) {
1003 if (!state.final_loop && state.wolfe_status.accept_) {
1004 // Do one final loop with exact wolfe conditions
1005 state.final_loop = true;
1006 state.update_next_step(options);
1007 continue;
1008 }
1009 return solver.build_result(state, solver.compute_log_determinant());
1010 } else {
1011 state.update_next_step(options);
1012 }
1013 }
1014 if (msgs) {
1015 (*msgs)
1016 << std::string(
1017 "WARNING(laplace_marginal_density): max number of iterations: ")
1018 + std::to_string(options.max_num_steps) + " exceeded.";
1019 }
1020 return solver.build_result(state, solver.compute_log_determinant());
1021}
1022
1033inline void log_solver_fallback(const bool allow_fallthrough,
1034 std::ostream* msgs, std::string_view context,
1035 Eigen::Index iter,
1036 std::string_view failed_solver,
1037 std::string_view next_solver,
1038 const std::exception& e) {
1039 // Build once so we don't interleave with other logs.
1040 std::ostringstream os;
1041 std::string msg_type = allow_fallthrough ? "WARNING" : "ERROR";
1042 os << "[" << context << "] " << msg_type << ": solver fallback\n"
1043 << " " << std::left << std::setw(12) << "iteration:" << iter << "\n"
1044 << " " << std::left << std::setw(12) << "failed:" << failed_solver << "\n"
1045 << " " << std::left << std::setw(12) << "reason:" << e.what() << "\n"
1046 << " " << std::left << std::setw(12) << "action:"
1047 << "trying " << next_solver << "\n"
1048 << "note: this warning message will only be displayed once."
1049 << "\n";
1050 if (allow_fallthrough && msgs) {
1051 (*msgs) << os.str();
1052 } else {
1053 throw std::domain_error(std::string("[") + std::string(context) + "]");
1054 }
1055}
1056
1057template <bool InitTheta, typename Opts>
1058inline decltype(auto) theta_init_impl(Eigen::Index theta_size, Opts&& options) {
1059 if constexpr (InitTheta) {
1060 // If requested, use the prior mean as the initial value
1061 return std::decay_t<decltype(options)>(options).theta_0;
1062 } else {
1063 return Eigen::MatrixXd::Zero(theta_size, 1);
1064 }
1065}
1066
1085template <typename ObjFun, typename ThetaGradFun, typename Covariance,
1086 typename Options>
1087inline auto create_update_fun(ObjFun&& obj_fun, ThetaGradFun&& theta_grad_f,
1088 Covariance&& covariance, Options&& options) {
1089 auto update_step = [&covariance, &obj_fun, &theta_grad_f](
1090 auto& proposal, auto&& /* curr */, auto&& prev,
1091 auto& eval_in, auto&& p) {
1092 try {
1093 proposal.a() = prev.a() + eval_in.alpha() * p;
1094 proposal.theta().noalias() = covariance * proposal.a();
1095 proposal.theta_grad() = theta_grad_f(proposal.theta());
1096 eval_in.obj() = obj_fun(proposal.a(), proposal.theta());
1097 eval_in.dir()
1098 = (-covariance * proposal.a() + covariance * proposal.theta_grad())
1099 .dot(p);
1100 return std::isfinite(eval_in.obj()) && std::isfinite(eval_in.dir());
1101 } catch (const std::exception&) {
1102 return false;
1103 }
1104 };
1105 auto backoff = [&options](auto& eval) {
1106 eval.alpha() *= options.line_search.tau;
1107 return eval.alpha() > options.line_search.min_alpha;
1108 };
1109 return
1110 [update_step_ = std::move(update_step), backoff_ = std::move(backoff)](
1111 auto& proposal, auto&& curr, auto&& prev, auto& eval_in, auto&& p) {
1112 return internal::retry_evaluate(update_step_, proposal, curr, prev,
1113 eval_in, p, backoff_);
1114 };
1115}
1116
1169template <typename LLFun, typename LLTupleArgs, typename CovarMat,
1170 bool InitTheta,
1173 LLFun&& ll_fun, LLTupleArgs&& ll_args, CovarMat&& covariance,
1174 const laplace_options<InitTheta>& options, std::ostream* msgs) {
1175 internal::validate_laplace_options("laplace_marginal_density", options,
1176 covariance);
1177 const Eigen::Index theta_size = covariance.rows();
1178 // Wolfe optimizes over the latent 'a' space
1179 auto obj_fun = [&ll_fun, &ll_args, &msgs](const Eigen::VectorXd& a_val,
1180 auto&& theta_val) -> double {
1181 return -0.5 * a_val.dot(theta_val)
1182 + laplace_likelihood::log_likelihood(ll_fun, theta_val, ll_args,
1183 msgs);
1184 };
1185 auto theta_grad_f = [&ll_fun, &ll_args, &msgs](auto&& theta_val) {
1186 return laplace_likelihood::theta_grad(ll_fun, theta_val, ll_args, msgs);
1187 };
1188 decltype(auto) theta_init = theta_init_impl<InitTheta>(theta_size, options);
1189 // When the user supplies a non-zero theta_init, we must initialise a
1190 // consistently so that the invariant theta = Sigma * a holds. Otherwise
1191 // the prior term -0.5 * a'*theta vanishes (a=0 while theta!=0), inflating
1192 // the initial objective and causing the Wolfe line search to reject the
1193 // first Newton step.
1194 auto state
1195 = NewtonState(theta_size, obj_fun, theta_grad_f, covariance, theta_init);
1196 // Start with safe step size
1197 auto update_fun = create_update_fun(
1198 std::move(obj_fun), std::move(theta_grad_f), covariance, options);
1199 Eigen::Index step_iter = 0;
1200 try {
1201 if (options.solver == 1) {
1202 if (options.hessian_block_size == 1) {
1203 CholeskyWSolverDiag solver(state, covariance);
1204 return run_newton_loop(solver, state, options, step_iter, ll_fun,
1205 ll_args, covariance, update_fun, msgs);
1206 } else {
1207 CholeskyWSolverBlock solver(state, options.hessian_block_size);
1208 return run_newton_loop(solver, state, options, step_iter, ll_fun,
1209 ll_args, covariance, update_fun, msgs);
1210 }
1211 }
1212 } catch (const std::exception& e) {
1213 const std::string solver_type
1214 = (options.hessian_block_size == 1) ? "Diagonal" : "Block";
1215 std::string failed = "solver 1 (" + solver_type + " Hessian-root Cholesky)";
1216 std::call_once(
1218 [](auto&&... args) {
1219 log_solver_fallback(std::forward<decltype(args)>(args)...);
1220 },
1221 options.allow_fallthrough, msgs, "laplace_marginal_density", step_iter,
1222 std::move(failed), "solver 2 (Covariance-root Cholesky)", e);
1223 }
1224 try {
1225 if (options.solver == 2 || options.allow_fallthrough) {
1226 CholeskyKSolver solver(state, covariance);
1227 return run_newton_loop(solver, state, options, step_iter, ll_fun, ll_args,
1228 covariance, update_fun, msgs);
1229 }
1230 } catch (const std::exception& e) {
1231 std::call_once(
1233 [](auto&&... args) {
1234 log_solver_fallback(std::forward<decltype(args)>(args)...);
1235 },
1236 options.allow_fallthrough, msgs, "laplace_marginal_density", step_iter,
1237 "solver 2 (Covariance-root Cholesky)", "solver 3 (General LU solver)",
1238 e);
1239 }
1240 if (options.solver == 3 || options.allow_fallthrough) {
1241 LUSolver solver;
1242 return run_newton_loop(solver, state, options, step_iter, ll_fun, ll_args,
1243 covariance, update_fun, msgs);
1244 }
1245 throw std::domain_error(
1246 std::string("You chose a solver (") + std::to_string(options.solver)
1247 + ") that is not valid. Please choose either 1, 2, or 3.");
1248}
1249} // namespace internal
1250} // namespace math
1251} // namespace stan
1252#endif
#define STAN_THREADS_DEF
#define unlikely(x)
int64_t size(const T &m)
Returns the size (number of the elements) of a matrix_cl or var_value<matrix_cl<T>>.
Definition size.hpp:19
(Expert) Numerical traits for algorithmic differentiation variables.
WolfeStatus wolfe_line_search(Info &wolfe_info, UpdateFun &&update_fun, Options &&opt, Stream *msgs)
Strong Wolfe line search for maximization.
auto create_update_fun(ObjFun &&obj_fun, ThetaGradFun &&theta_grad_f, Covariance &&covariance, Options &&options)
Create the update function for the line search, capturing necessary references.
auto run_newton_loop(SolverPolicy &solver, NewtonStateT &state, const OptionsT &options, Eigen::Index &step_iter, const LLFunT &ll_fun, const LLTupleArgsT &ll_args, const CovarMatT &covariance, UpdateFun &&update_fun, std::ostream *msgs)
Run a Newton loop with a solver policy, updating the shared state.
double barzilai_borwein_step_size(const Eigen::VectorXd &s, const Eigen::VectorXd &g_curr, const Eigen::VectorXd &g_prev, double prev_step, int last_backtracks, double min_alpha, double max_alpha)
Curvature-aware Barzilai–Borwein (BB) step length with robust safeguards.
void log_solver_fallback(const bool allow_fallthrough, std::ostream *msgs, std::string_view context, Eigen::Index iter, std::string_view failed_solver, std::string_view next_solver, const std::exception &e)
Log a solver fallback event to the provided stream.
decltype(auto) theta_init_impl(Eigen::Index theta_size, Opts &&options)
auto check_armijo(double obj_next, double obj_init, double alpha_next, double dir0, Option &&opt)
constexpr double laplace_default_tolerance
constexpr auto tuple_to_laplace_options(Options &&ops)
constexpr int laplace_default_max_steps_line_search
constexpr int laplace_default_hessian_block_size
void validate_laplace_options(const char *frame_name, const laplace_options< InitTheta > &options, const CovarMat &covariance)
Validates the options for the Laplace approximation.
constexpr int laplace_default_allow_fallthrough
auto retry_evaluate(Update &&update, Proposal &&proposal, Curr &&curr, Prev &&prev, Eval &eval, P &&p, Backoff &&backoff)
Retry evaluation of a step until it passes a validity check.
auto laplace_marginal_density_est(LLFun &&ll_fun, LLTupleArgs &&ll_args, CovarMat &&covariance, const laplace_options< InitTheta > &options, std::ostream *msgs)
For a latent Gaussian model with hyperparameters phi and latent variables theta, and observations y,...
void llt_with_jitter(LLT &llt_B, B_t &B, double min_jitter=1e-10, double max_jitter=1e-5)
Factorize B with jittering fallback.
void block_matrix_sqrt(WRootMat &W_root, const Eigen::SparseMatrix< double > &W, const Eigen::Index block_size)
Returns the principal square root of a block diagonal matrix.
static thread_local std::once_flag fallback_warning
auto diagonal_hessian(F &&f, Theta &&theta, TupleArgs &&ll_tuple, Stream *msgs)
auto log_likelihood(F &&f, Theta &&theta, TupleArgs &&ll_tup, Stream *msgs)
A wrapper that accepts a tuple as arguments.
auto block_hessian(F &&f, Theta &&theta, const Eigen::Index hessian_block_size, TupleArgs &&ll_tuple, Stream *msgs)
auto theta_grad(F &&f, Theta &&theta, TupleArgs &&ll_tup, Stream *msgs=nullptr)
A wrapper that accepts a tuple as arguments.
void check_square(const char *function, const char *name, const T_y &y)
Check if the specified matrix is square.
void check_nonnegative(const char *function, const char *name, const T_y &y)
Check if y is non-negative.
static constexpr double e()
Return the base of the natural logarithm.
Definition constants.hpp:20
T eval(T &&arg)
Inputs which have a plain_type equal to the own time are forwarded unmodified (for Eigen expressions ...
Definition eval.hpp:20
T value_of(const fvar< T > &v)
Return the value of the specified variable.
Definition value_of.hpp:18
void check_finite(const char *function, const char *name, const T_y &y)
Return true if all values in y are finite.
void check_nonzero_size(const char *function, const char *name, const T_y &y)
Check if the specified matrix/vector is of non-zero size.
void check_positive(const char *function, const char *name, const T_y &y)
Check if y is positive.
double dot(const std::vector< double > &x, const std::vector< double > &y)
Definition dot.hpp:11
constexpr bool is_inner_tuple_type_v
Checks if the N-th element of a tuple is of the same type as CheckType.
Definition is_tuple.hpp:82
std::enable_if_t< Check::value > require_t
If condition is true, template is enabled.
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
STL namespace.
void solve_step(NewtonStateT &state, const LLFun &ll_fun, const LLTupleArgs &ll_args, const CovarMat &covariance, int hessian_block_size, std::ostream *msgs)
Perform one Newton step using covariance Cholesky solver.
Eigen::MatrixXd K_root
Lower Cholesky factor of covariance: Sigma = K_root * K_root^T.
Eigen::LLT< Eigen::MatrixXd > llt_B
Cholesky factorization of B = I + K_root^T * W * K_root.
Eigen::SparseMatrix< double > W_full
Full (block) Hessian matrix from likelihood.
double compute_log_determinant() const
Compute log determinant of B from Cholesky factor.
auto build_result(NewtonStateT &state, double log_det)
Build the final result structure.
CholeskyKSolver(const NewtonStateT &state, const CovarMat &covariance)
Solver Policy 2: Cholesky decomposition of K (Covariance).
Eigen::LLT< Eigen::MatrixXd > llt_B
Cholesky factorization of B = I + W_r * Sigma * W_r.
Eigen::SparseMatrix< double > W_block
Sparse block-diagonal Hessian from likelihood.
Eigen::SparseMatrix< double > W_r
Sparse square root of block Hessian.
double compute_log_determinant() const
Compute log determinant of B from Cholesky factor.
void solve_step(NewtonStateT &state, const LLFun &ll_fun, const LLTupleArgs &ll_args, const CovarMat &covariance, int hessian_block_size, std::ostream *msgs)
Perform one Newton step using block-diagonal Hessian solver.
auto build_result(NewtonStateT &state, double log_det)
Build the final result structure.
CholeskyWSolverBlock(const NewtonStateT &state, int hessian_block_size)
Solver Policy 1 (Block): Cholesky decomposition using block W.
void solve_step(NewtonStateT &state, const LLFun &ll_fun, const LLTupleArgs &ll_args, const CovarMat &covariance, int, std::ostream *msgs)
Perform one Newton step using diagonal Hessian solver.
Eigen::LLT< Eigen::MatrixXd > llt_B
Cholesky factorization of B = I + W_r * Sigma * W_r.
CholeskyWSolverDiag(const NewtonStateT &state, const CovarMat &covariance)
Eigen::VectorXd W_r_diag
Square root of diagonal Hessian: W_r[j] = sqrt(W[j])
auto build_result(NewtonStateT &state, double log_det)
Build the final result structure.
Eigen::VectorXd W_diag
Diagonal Hessian values from the likelihood.
double compute_log_determinant() const
Compute log determinant of B from Cholesky factor.
Solver Policy 1 (Diagonal): Cholesky decomposition using W.
auto build_result(NewtonStateT &state, double log_det)
Build the final result structure.
void solve_step(NewtonStateT &state, const LLFun &ll_fun, const LLTupleArgs &ll_args, const CovarMat &covariance, int hessian_block_size, std::ostream *msgs)
Perform one Newton step using LU decomposition solver.
double compute_log_determinant() const
Compute log determinant from LU factorization.
Eigen::SparseMatrix< double > W_full
Full Hessian matrix from likelihood.
Eigen::PartialPivLU< Eigen::MatrixXd > lu
LU factorization of B = I + Sigma * W.
WolfeData proposal
Cached proposal evaluated before the Wolfe line search.
auto & prev() &
Access the previous step state (mutable).
Eigen::MatrixXd B
Workspace matrix: B = I + W_r * Sigma * W_r (or similar)
WolfeStatus wolfe_status
Status of the most recent Wolfe line search.
auto & curr() &
Access the current step state (mutable).
Eigen::VectorXd b
Workspace vector: b = W * theta + grad(log_lik)
WolfeInfo wolfe_info
Wolfe line search state including current/previous steps.
const auto & curr() const &
Access the current step state (const).
NewtonState(int theta_size, ObjFun &&obj_fun, ThetaGradFun &&theta_grad_f, CovarianceT &&covariance, ThetaInitializer &&theta_init)
Constructs Newton state with a consistent (a_init, theta_init) pair.
Eigen::VectorXd prev_g
Previous gradient for Barzilai-Borwein step calculation.
bool final_loop
On the final loop if we found a better wolfe step, but we are going to exit, we want to make sure all...
const auto & prev() const &
Access the previous step state (const).
Holds the state for the Newton-Raphson optimization loop.
Data used in current evaluation of wolfe line search at a particular stepsize.
Data object used in wolfe line search.
Struct to hold the result status of the Wolfe line search.
L_t L
Solver-dependent factorization of the system matrix B.
KRoot K_root
Lower Cholesky factor of the covariance matrix.
A_vec a
Mode in the a parameterization, where theta = covariance * a.
ThetaGrad theta_grad
Gradient of the log-likelihood with respect to theta at the mode.
laplace_density_estimates(double lmd_, ThetaVec &&theta_, WR &&W_r_, L_t &&L_, A_vec &&a_, ThetaGrad &&theta_grad_, LU_t &&LU_, KRoot &&K_root_, int solver_used_)
Options for Wolfe line search during optimization.
laplace_options(ThetaVec &&theta_0_, double tolerance_, int max_num_steps_, int hessian_block_size_, int solver_, int max_steps_line_search_, bool allow_fallthrough_)
double tolerance
Iterations end when the absolute change in the optimization objective is less than this tolerance.
int solver
Which linear solver to use inside the Newton step.
laplace_options_base(int hessian_block_size_, int solver_, double tolerance_, int max_num_steps_, bool allow_fallthrough_, int max_steps_line_search_)
Options for the Laplace approximation.