Automatic Differentiation
 
Loading...
Searching...
No Matches
wolfe_line_search.hpp
Go to the documentation of this file.
1#ifndef STAN_MATH_MIX_FUNCTOR_WOLFE_LINE_SEARCH_HPP
2#define STAN_MATH_MIX_FUNCTOR_WOLFE_LINE_SEARCH_HPP
3
14#include <algorithm>
15#include <cmath>
16#include <limits>
17#include <tuple>
18#include <type_traits>
19
20namespace stan::math {
21
30 constexpr explicit laplace_line_search_options(int max_iter)
31 : max_iterations(max_iter) {}
32 constexpr laplace_line_search_options() = default;
33 int max_iterations{1000};
41 double c1{1e-4};
42
50 double c2{0.9};
51
58 double tau{0.5};
59
66 double min_alpha{1e-8};
67
73 double max_alpha{16.0};
74
82 double scale_up{2.0};
83
90 double abs_grad_threshold{1e-12};
91
99 double abs_obj_threshold{1e-12};
100
101 double rel_grad_threshold{1e-3}; // off by default
102 double rel_obj_threshold{1e-10}; // off by default
103};
104namespace internal {
105
158template <typename Scalar>
159[[nodiscard]] inline Scalar cubic_spline(Scalar x_left, Scalar f_left,
160 Scalar df_left, Scalar x_right,
161 Scalar f_right,
162 Scalar df_right) noexcept {
163 const Scalar midpoint = (x_left + x_right) / Scalar(2);
164
165 // Basic validation: ordering + finiteness.
166 if (!(x_right > x_left) || !std::isfinite(f_left) || !std::isfinite(f_right)
167 || !std::isfinite(df_left) || !std::isfinite(df_right)) {
168 return midpoint;
169 }
170
171 const Scalar width = x_right - x_left;
172 const Scalar eps = std::numeric_limits<Scalar>::epsilon();
173
174 // If the bracket is extremely tight, just bisect.
175 {
176 const Scalar x_scale
177 = std::max(std::max(std::abs(x_left), std::abs(x_right)), Scalar(1));
178 if (width <= eps * x_scale) {
179 return midpoint;
180 }
181 }
182
183 // Derivatives with respect to s, where x = x_left + s * width.
184 const Scalar df_left_s = width * df_left; // F'(0)
185 const Scalar df_right_s = width * df_right; // F'(1)
186
187 // Cubic Hermite coefficients in $s \in [0,1]$:
188 // F(s) = a3*s^3 + a2*s^2 + a1*s + a0
189 // with F(0) = f_left, F'(0) = df_left_s, F(1) = f_right, F'(1) = df_right_s.
190 const Scalar a0 = f_left;
191 const Scalar a1 = df_left_s;
192 const Scalar a2
193 = Scalar(3) * (f_right - f_left) - Scalar(2) * df_left_s - df_right_s;
194 const Scalar a3 = Scalar(2) * (f_left - f_right) + df_left_s + df_right_s;
195
196 auto eval = [&](Scalar s) -> Scalar {
197 // Horner evaluation of F(s).
198 return ((a3 * s + a2) * s + a1) * s + a0;
199 };
200
201 // Candidates are restricted to a trimmed interior [edge_guard, 1 -
202 // edge_guard].
203 constexpr Scalar edge_guard = Scalar(1e-9);
204
205 Scalar best_s = 0.5;
206 Scalar best_val = eval(0.5);
207 auto consider = [&](Scalar s) {
208 if (!std::isfinite(s)) {
209 return;
210 }
211 if (!(s > edge_guard && s < Scalar(1) - edge_guard)) {
212 return;
213 }
214 const Scalar value = eval(s);
215 if (value > best_val) {
216 best_s = s;
217 best_val = value;
218 }
219 };
220
221 // 1) Secant estimate for the derivative root between s = 0 and s = 1.
222 {
223 const Scalar denom = df_left_s - df_right_s;
224 const Scalar deriv_scale = std::max(
225 std::max(std::abs(df_left_s), std::abs(df_right_s)), Scalar(1));
226 if (std::abs(denom) > eps * deriv_scale) {
227 const Scalar s_secant
228 = df_left_s / denom; // Root of linear interpolation of F'.
229 consider(s_secant);
230 }
231 }
232
233 // 2) Stationary points of the cubic model (F'(s) = 0).
234 {
235 // F'(s) = 3*a3*s^2 + 2*a2*s + a1 = 0.
236 const Scalar A = Scalar(3) * a3;
237 const Scalar B = Scalar(2) * a2;
238 const Scalar C = a1;
239
240 const Scalar scale
241 = std::max(std::max(std::abs(B), std::abs(C)), Scalar(1));
242 const Scalar A_tol = eps * scale;
243
244 if (std::abs(A) <= A_tol) {
245 // Degenerate to (approximately) linear: B*s + C = 0.
246 const Scalar B_tol = eps * scale;
247 if (std::abs(B) > B_tol) {
248 consider(-C / B);
249 }
250 } else {
251 // Proper quadratic: A*s^2 + B*s + C = 0.
252 Scalar disc = std::fma(-Scalar(4) * A, C, B * B); // B^2 - 4AC
253 const Scalar disc_scale
254 = std::max(B * B + std::abs(Scalar(4) * A * C), Scalar(1));
255 const Scalar disc_tol = Scalar(10) * eps * disc_scale;
256
257 // Treat tiny negative discriminants as zero.
258 if (disc < Scalar(0) && -disc <= disc_tol) {
259 disc = Scalar(0);
260 }
261
262 if (disc >= Scalar(0)) {
263 const Scalar r = std::sqrt(disc);
264 const Scalar q = -Scalar(0.5) * (B + std::copysign(r, B));
265 const Scalar q_scale = std::max(std::abs(B) + r, Scalar(1));
266 const Scalar q_tol = eps * q_scale;
267
268 if (std::abs(q) > q_tol) {
269 const Scalar s1 = q / A;
270 const Scalar s2 = C / q;
271 consider(s1);
272 consider(s2);
273 } else {
274 // Fallback: vertex of the quadratic derivative.
275 const Scalar s_vertex = -B / (Scalar(2) * A);
276 consider(s_vertex);
277 }
278 }
279 }
280 }
281
282 return x_left + best_s * width;
283}
284
285template <typename Eval, typename Options>
286inline auto cubic_spline(Eval&& low, Eval&& high, Options&& opt) {
287 auto alpha = cubic_spline(low.alpha(), low.obj(), low.dir(), high.alpha(),
288 high.obj(), high.dir());
289 const double width = high.alpha() - low.alpha();
290 const double guard = 1e-3 * width; // or make this an option
291 alpha = std::clamp(alpha, low.alpha() + guard, high.alpha() - guard);
292 return alpha;
293}
294
295template <typename Option>
296inline auto check_armijo(double obj_next, double obj_init, double alpha_next,
297 double dir0, Option&& opt) {
298 return (obj_next >= obj_init)
299 && (obj_next >= obj_init + alpha_next * dir0 * opt.c1);
300}
301
302template <typename Eval, typename WolfeT, typename Option>
303inline bool check_armijo(const Eval& eval, const WolfeT& prev,
304 const Option& opt) {
305 return check_armijo(eval.obj(), prev.obj(), eval.alpha(), prev.dir(), opt);
306}
307
308template <typename Option>
309inline auto check_wolfe_curve(double dir_deriv_next, double dir_deriv_init,
310 Option&& opt) {
311 return std::abs(dir_deriv_next) <= (opt.c2 * std::abs(dir_deriv_init));
312}
313
314template <typename Eval, typename WolfeT, typename Option>
315inline bool check_wolfe(const Eval& eval, const WolfeT& prev,
316 const Option& opt) {
317 return check_wolfe_curve(eval.dir(), prev.dir(), opt);
318}
319
320enum class WolfeReturn : uint8_t {
321 // both conditions true
322 Wolfe,
323 // Armijo true, curvature false
324 Armijo,
325 // |phi'| small
327 // |phi - phi0| small
329 // |phi - phi0| and |phi'| small
331 // high and low became too small
333 // alpha < min_alpha
335 // max iters reached
337 // failed to find a step
339 // All other failures
340 Fail,
341 // When a check passes and we want to continue searching
343};
344
349 // total updates/evaluations
353 // Whether a valid new step was found
354 bool accept_{false};
355 WolfeStatus() = default;
356 WolfeStatus(WolfeReturn stop, int evals, int back)
357 : num_evals_(evals), num_backtracks_(back), stop_(stop), accept_{false} {}
358 WolfeStatus(WolfeReturn stop, int evals, int back, bool success)
359 : num_evals_(evals),
360 num_backtracks_(back),
361 stop_(stop),
362 accept_{success} {}
363};
364
369 switch (s.stop_) {
371 return "Wolfe";
373 return "Armijo";
375 return "ConvergedGradient";
377 return "ConvergedObjective";
379 return "ConvergedObjectiveAndGradient";
381 return "IntervalTooSmall";
383 return "StepTooSmall";
385 return "ReachedMaxStep";
387 return "NumericalIssue";
389 return "Fail";
391 return "Continue";
392 default:
393 return "UNKNOWN";
394 }
395}
396
400struct Eval {
401 // alpha
402 double alpha_{0.0};
403 // obj
404 double obj_{0.0};
405 // directional derivative
406 double dir_{0.0};
407 inline auto& alpha() { return alpha_; }
408 inline const auto& alpha() const { return alpha_; }
409 inline auto& obj() { return obj_; }
410 inline const auto& obj() const { return obj_; }
411 inline auto& dir() { return dir_; }
412 inline const auto& dir() const { return dir_; }
413 constexpr Eval(double alpha, double obj, double dir)
414 : alpha_(alpha), obj_(obj), dir_(dir) {}
415 constexpr Eval() = default;
416};
417
421struct WolfeData {
422 // current parameter values
423 Eigen::VectorXd theta_;
424 // parameter gradients
425 Eigen::VectorXd theta_grad_;
426 // latent variable
427 Eigen::VectorXd a_;
428 // evaluation data
430 explicit WolfeData(Eigen::Index n)
431 : theta_(Eigen::VectorXd::Zero(n)),
432 theta_grad_(Eigen::VectorXd::Zero(n)),
433 a_(Eigen::VectorXd::Zero(n)),
434 eval_() {}
435
436 template <typename ObjFun, typename ThetaGradF, typename Theta0>
437 WolfeData(ObjFun&& obj_fun, const Eigen::VectorXd& a, const Theta0& theta0,
438 ThetaGradF&& theta_grad_f)
439 : theta_(theta0),
440 theta_grad_(theta_grad_f(theta_)),
441 a_(a),
442 eval_(1.0, obj_fun(a_, theta_), 0.0) {}
443
444 template <typename ObjFun, typename ThetaGradF, typename Theta0>
445 WolfeData(ObjFun&& obj_fun, Eigen::Index n, const Theta0& theta0,
446 ThetaGradF&& theta_grad_f)
447 : theta_(theta0),
448 theta_grad_(theta_grad_f(theta_)),
449 a_(Eigen::VectorXd::Zero(n)),
450 eval_(1.0, obj_fun(a_, theta_), 0.0) {}
451
452 // Initialize with theta = 0
453 template <typename LLFun, typename LLArgs, typename Msgs>
454 WolfeData(Eigen::Index n, const LLFun& ll_fun, const LLArgs& ll_args,
455 const Msgs& msgs)
456 : WolfeData(n, Eigen::VectorXd::Zero(n), ll_fun, ll_args, msgs) {}
457
458 void update(WolfeData& other) {
459 theta_.swap(other.theta_);
460 theta_grad_.swap(other.theta_grad_);
461 a_.swap(other.a_);
462 eval_ = other.eval_;
463 }
464 void update(WolfeData& other, const Eval& eval) {
465 theta_.swap(other.theta_);
466 a_.swap(other.a_);
467 theta_grad_.swap(other.theta_grad_);
468 eval_ = eval;
469 }
470 inline auto& theta() & { return theta_; }
471 inline auto&& theta() && { return std::move(theta_); }
472 inline const auto& theta() const& { return theta_; }
473
474 inline auto& theta_grad() & { return theta_grad_; }
475 inline const auto& theta_grad() const& { return theta_grad_; }
476 inline auto&& theta_grad() && { return std::move(theta_grad_); }
477 inline auto& a() & { return a_; }
478 inline const auto& a() const& { return a_; }
479 inline auto&& a() && { return std::move(a_); }
480 inline auto& obj() & { return eval_.obj(); }
481 inline const auto& obj() const& { return eval_.obj(); }
482 inline auto& alpha() & { return eval_.alpha(); }
483 inline const auto& alpha() const& { return eval_.alpha(); }
484 inline auto& dir() & { return eval_.dir(); }
485 inline const auto& dir() const& { return eval_.dir(); }
486};
487
491struct WolfeInfo {
492 // Current step data. On output will be the proposal step
494 // Previous step data
496 // Scratch space for evaluations of proposal steps
498 // Search direction
499 Eigen::VectorXd p_;
500 // Initial directional derivative
501 double init_dir_;
502 template <typename ObjFun, typename Theta0, typename ThetaGradF>
503 WolfeInfo(ObjFun&& obj_fun, Eigen::Index n, Theta0&& theta0,
504 ThetaGradF&& theta_grad_f)
505 : curr_(std::forward<ObjFun>(obj_fun), n, std::forward<Theta0>(theta0),
506 std::forward<ThetaGradF>(theta_grad_f)),
507 prev_(curr_),
508 scratch_(n) {
509 if (!std::isfinite(curr_.obj())) {
510 throw std::domain_error(
511 "laplace_marginal_density: log likelihood is not finite at initial "
512 "theta and likelihood arguments.");
513 }
514 }
516 : curr_(std::move(curr)),
517 prev_(std::move(prev)),
518 scratch_(curr_.theta().size()),
519 p_(Eigen::VectorXd::Zero(curr_.theta().size())),
520 init_dir_(0.0) {}
521 explicit WolfeInfo(Eigen::Index n)
522 : curr_(n),
523 prev_(curr_),
524 scratch_(n),
525 p_(Eigen::VectorXd::Zero(n)),
526 init_dir_(0.0) {}
527
528 inline void flip_direction() {
529 if (this->init_dir_ < 0) {
530 this->p_ *= -1;
531 this->init_dir_ *= -1;
532 }
533 }
534
535 inline auto& curr() & { return curr_; }
536 inline const auto& curr() const& { return curr_; }
537 inline auto&& curr() && { return std::move(curr_); }
538 inline auto& prev() & { return prev_; }
539 inline const auto& prev() const& { return prev_; }
540 inline auto&& prev() && { return std::move(prev_); }
541 inline auto& scratch() & { return scratch_; }
542 inline const auto& scratch() const& { return scratch_; }
543 inline auto&& scratch() && { return std::move(scratch_); }
544};
545
577template <typename Update, typename Proposal, typename Curr, typename Prev,
578 typename Eval, typename P, typename Backoff>
579inline auto retry_evaluate(Update&& update, Proposal&& proposal, Curr&& curr,
580 Prev&& prev, Eval& eval, P&& p, Backoff&& backoff) {
581 while (true) {
582 auto res = update(proposal, curr, prev, eval, p);
583 if (res) {
584 return res;
585 }
586 if (!backoff(eval)) {
587 return res;
588 }
589 }
590}
591
698template <typename Info, typename UpdateFun, typename Options, typename Stream>
699inline WolfeStatus wolfe_line_search(Info& wolfe_info, UpdateFun&& update_fun,
700 Options&& opt, Stream* msgs) {
701 auto& curr = wolfe_info.curr_;
702 auto& prev = wolfe_info.prev_;
703 auto& scratch = wolfe_info.scratch_;
704 auto&& p = wolfe_info.p_;
705 auto&& dir_deriv_init = wolfe_info.init_dir_;
706 Eval low{0.0, prev.obj(), dir_deriv_init};
707 prev.dir() = dir_deriv_init;
708 int total_updates = 0;
709 Eval best = low; // keep the best Armijo-OK in case strong-Wolfe fails
710 auto update_with_tick = [&total_updates, &opt, &best, &update_fun](
711 auto&& proposal, auto&& curr, auto&& prev,
712 Eval& e, auto&& p) {
713 const bool over_budget = total_updates > opt.max_iterations;
714 if (over_budget) {
715 // Soft budget: stop evaluating new trial points once exceeded.
716 if (check_armijo(best, prev, opt)) {
717 update_fun(proposal, curr, prev, best, p);
718 curr.update(proposal, best);
719 if (check_wolfe(best, prev, opt)) {
720 return WolfeStatus{WolfeReturn::Wolfe, total_updates, 0, true};
721 } else {
722 return WolfeStatus{WolfeReturn::Armijo, total_updates, 0, true};
723 }
724 }
725 return WolfeStatus{WolfeReturn::ReachedMaxStep, total_updates, 0, false};
726 } else {
727 update_fun(proposal, curr, prev, e, p);
728 ++total_updates;
729 return WolfeStatus{WolfeReturn::Continue, total_updates, 0, false};
730 }
731 };
732 double alpha_start
733 = std::clamp(curr.alpha() * opt.scale_up, opt.min_alpha, opt.max_alpha);
734 Eval high{alpha_start, curr.obj(), dir_deriv_init};
735 WolfeStatus wolfe_check{WolfeReturn::Continue, 0, 0, false};
736 // Initial check for numerical trouble
737 {
738 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
739 if (wolfe_check.stop_ != WolfeReturn::Continue) {
740 return wolfe_check;
741 }
742 if (high.alpha() < opt.min_alpha) {
743 return WolfeStatus{WolfeReturn::StepTooSmall, total_updates, 0, false};
744 }
745 // Quick accept if Armijo and Wolfe conditions are satisfied
746 if (check_armijo(high, prev, opt)) {
747 if (check_wolfe(high, prev, opt)) {
748 // Try zooming up till we hit a fail
749 best = high;
750 while (check_armijo(high, prev, opt) && check_wolfe(high, prev, opt)) {
751 best = high;
752 high.alpha() *= opt.scale_up;
753 if (high.alpha() > opt.max_alpha) {
754 break;
755 }
756 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
757 if (wolfe_check.stop_ != WolfeReturn::Continue) {
758 return wolfe_check;
759 }
760 }
761 wolfe_check = update_with_tick(scratch, curr, prev, best, p);
762 if (wolfe_check.stop_ != WolfeReturn::Continue) {
763 return wolfe_check;
764 }
765 curr.update(scratch, best);
766 return WolfeStatus{WolfeReturn::Wolfe, total_updates, 0, true};
767 } else {
768 if (best.obj() < high.obj()) {
769 best = high;
770 }
771 }
772 }
773 }
774 int num_backtracks = 0;
785 while (high.alpha() < opt.max_alpha) {
786 num_backtracks++;
787 // 1. Evaluate f(alpha) and g(alpha)
788 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
789 if (wolfe_check.stop_ != WolfeReturn::Continue) {
790 return wolfe_check;
791 }
792 if (check_armijo(high, prev, opt)) {
793 if (check_wolfe(high, prev, opt)) { // [1]
794 curr.update(scratch, high);
795 return WolfeStatus{WolfeReturn::Wolfe, total_updates, num_backtracks,
796 true};
797 }
798 if (best.obj() < high.obj()) {
799 best = high;
800 }
801 if (high.dir() > 0) { // [2]
802 low = high;
803 high.alpha() *= opt.scale_up;
804 continue;
805 }
806 }
807 // [3,4,5]
808 break;
809 }
810 const double grad_tol
811 = std::max(opt.abs_grad_threshold,
812 opt.rel_grad_threshold * std::abs(dir_deriv_init));
813 const double obj_tol
814 = std::max(opt.abs_obj_threshold,
815 opt.rel_obj_threshold * (1.0 + std::abs(prev.obj())));
816 auto check_bounds = [&](auto&& curr_eval) {
817 // Check for grad convergence
818 const bool slope_check = std::abs(curr_eval.dir()) <= grad_tol;
819 // tiny slope or gain
820 const bool obj_check = std::abs(curr_eval.obj() - prev.obj()) <= obj_tol;
821 // alpha too small
822 const bool alpha_check = curr_eval.alpha() < opt.min_alpha;
823 if (slope_check || obj_check || alpha_check) {
824 bool step_ok
825 = curr_eval.obj() != low.obj() && check_armijo(curr_eval, prev, opt);
826 if (slope_check && obj_check) {
828 total_updates, num_backtracks, step_ok};
829 } else if (slope_check) {
830 return WolfeStatus{WolfeReturn::ConvergedGradient, total_updates,
831 num_backtracks, step_ok};
832 } else if (obj_check) {
833 return WolfeStatus{WolfeReturn::ConvergedObjective, total_updates,
834 num_backtracks, step_ok};
835 } else {
836 return WolfeStatus{WolfeReturn::IntervalTooSmall, total_updates,
837 num_backtracks, step_ok};
838 }
839 }
840 return WolfeStatus{WolfeReturn::Continue, total_updates, num_backtracks,
841 false};
842 };
843 auto check_b = check_bounds(high);
844 if (check_b.stop_ != WolfeReturn::Continue) {
845 if (check_b.accept_) {
846 curr.update(scratch, high);
847 }
848 return check_b;
849 }
850 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
851 if (wolfe_check.stop_ != WolfeReturn::Continue) {
852 return wolfe_check;
853 }
864 while ((high.alpha() - low.alpha() > opt.min_alpha)
865 && high.alpha() > opt.min_alpha) {
866 num_backtracks++;
867 const bool have_sign_change = (low.dir() * high.dir() < 0);
868 const bool high_armijo_ok = check_armijo(high, prev, opt);
869 const bool use_cubic = have_sign_change && high_armijo_ok;
870 // Choose trial alpha: cubic when bracket is good, else bisection.
871 double alpha_mid{0};
872 if (use_cubic) {
873 alpha_mid = cubic_spline(low, high, opt);
874 } else {
875 alpha_mid = 0.5 * (low.alpha() + high.alpha());
876 }
877 if (alpha_mid <= opt.min_alpha) {
878 break;
879 }
880 Eval mid{alpha_mid, 0.0, 0.0};
881 auto wolfe_check = update_with_tick(scratch, curr, prev, mid, p);
882 if (wolfe_check.stop_ != WolfeReturn::Continue) {
883 return wolfe_check;
884 }
885 if (mid.alpha() <= opt.min_alpha) {
886 return WolfeStatus{WolfeReturn::StepTooSmall, total_updates,
887 num_backtracks, false};
888 }
889 if (check_armijo(mid, prev, opt)) {
890 if (check_wolfe(mid, prev, opt)) { // [1]
891 curr.update(scratch, mid);
892 return WolfeStatus{WolfeReturn::Wolfe, total_updates, num_backtracks,
893 true};
894 }
895 // Track best Armijo-OK point for fallback.
896 if (mid.obj() > best.obj()) {
897 best = mid;
898 }
899 if (mid.obj() > low.obj()) {
900 if (mid.dir() > 0) { // [2]
901 low = mid;
902 } else { // [3]
903 high = mid;
904 }
905 }
906 // [4]
907 high = mid;
908 } else {
909 // [5]
910 high = mid;
911 }
912 // Convergence/guard-rail checks (uses prev/grad_tol/obj_tol etc.)
913 auto bounds_check = check_bounds(mid);
914 if (bounds_check.stop_ != WolfeReturn::Continue) {
915 if (bounds_check.accept_) {
916 curr.update(scratch, mid);
917 }
918 return bounds_check;
919 }
920 }
921 // On failure, use the best point we have found so far that at least satisfies
922 // armijo
923 const bool armijo_ok_best = check_armijo(best, prev, opt);
924 if (armijo_ok_best) {
925 wolfe_check = update_with_tick(scratch, curr, prev, best, p);
926 curr.update(scratch, best);
927 return WolfeStatus{WolfeReturn::Armijo, total_updates, num_backtracks,
928 true};
929 } else {
930 return WolfeStatus{WolfeReturn::Fail, total_updates, num_backtracks, false};
931 }
932}
933} // namespace internal
934
935} // namespace stan::math
936#endif
int64_t size(const T &m)
Returns the size (number of the elements) of a matrix_cl or var_value<matrix_cl<T>>.
Definition size.hpp:19
(Expert) Numerical traits for algorithmic differentiation variables.
WolfeStatus wolfe_line_search(Info &wolfe_info, UpdateFun &&update_fun, Options &&opt, Stream *msgs)
Strong Wolfe line search for maximization.
auto check_wolfe_curve(double dir_deriv_next, double dir_deriv_init, Option &&opt)
auto wolfe_status_str(WolfeStatus s)
Helper function for pretty printing.
auto check_armijo(double obj_next, double obj_init, double alpha_next, double dir0, Option &&opt)
Scalar cubic_spline(Scalar x_left, Scalar f_left, Scalar df_left, Scalar x_right, Scalar f_right, Scalar df_right) noexcept
Selects a safeguarded trial point for maximizing a scalar function on a line.
bool check_wolfe(const Eval &eval, const WolfeT &prev, const Option &opt)
auto retry_evaluate(Update &&update, Proposal &&proposal, Curr &&curr, Prev &&prev, Eval &eval, P &&p, Backoff &&backoff)
Retry evaluation of a step until it passes a validity check.
static constexpr double e()
Return the base of the natural logarithm.
Definition constants.hpp:20
T eval(T &&arg)
Inputs which have a plain_type equal to the own time are forwarded unmodified (for Eigen expressions ...
Definition eval.hpp:20
Matrices and templated mathematical functions.
STL namespace.
constexpr Eval(double alpha, double obj, double dir)
constexpr Eval()=default
evaluation struct for Wolfe line search
void update(WolfeData &other, const Eval &eval)
WolfeData(ObjFun &&obj_fun, Eigen::Index n, const Theta0 &theta0, ThetaGradF &&theta_grad_f)
WolfeData(Eigen::Index n, const LLFun &ll_fun, const LLArgs &ll_args, const Msgs &msgs)
WolfeData(ObjFun &&obj_fun, const Eigen::VectorXd &a, const Theta0 &theta0, ThetaGradF &&theta_grad_f)
Data used in current evaluation of wolfe line search at a particular stepsize.
WolfeInfo(ObjFun &&obj_fun, Eigen::Index n, Theta0 &&theta0, ThetaGradF &&theta_grad_f)
WolfeInfo(WolfeData &&curr, WolfeData &&prev)
Data object used in wolfe line search.
WolfeStatus(WolfeReturn stop, int evals, int back)
WolfeStatus(WolfeReturn stop, int evals, int back, bool success)
Struct to hold the result status of the Wolfe line search.
constexpr laplace_line_search_options(int max_iter)
double tau
Backtracking shrinkage factor.
double min_alpha
Minimum allowable step size.
double abs_grad_threshold
Absolute gradient tolerance.
double scale_up
Step size expansion factor.
double c2
Curvature condition parameter.
constexpr laplace_line_search_options()=default
double c1
Armijo condition parameter (sufficient decrease).
double max_alpha
Maximum allowable step size.
double abs_obj_threshold
Absolute objective tolerance.
Options for Wolfe line search during optimization.