Automatic Differentiation
 
Loading...
Searching...
No Matches
wolfe_line_search.hpp
Go to the documentation of this file.
1#ifndef STAN_MATH_MIX_FUNCTOR_WOLFE_LINE_SEARCH_HPP
2#define STAN_MATH_MIX_FUNCTOR_WOLFE_LINE_SEARCH_HPP
3
14#include <algorithm>
15#include <cmath>
16#include <limits>
17#include <tuple>
18#include <type_traits>
19
20namespace stan::math {
21
30 constexpr explicit laplace_line_search_options(int max_iter)
31 : max_iterations(max_iter) {}
32 constexpr laplace_line_search_options() = default;
33 int max_iterations{1000};
41 double c1{1e-4};
42
50 double c2{0.9};
51
58 double tau{0.5};
59
66 double min_alpha{1e-8};
67
73 double max_alpha{16.0};
74
82 double scale_up{2.0};
83
90 double abs_grad_threshold{1e-12};
91
99 double abs_obj_threshold{1e-12};
100
101 double rel_grad_threshold{1e-3}; // off by default
102 double rel_obj_threshold{1e-10}; // off by default
103};
104namespace internal {
105
158template <typename Scalar>
159[[nodiscard]] inline Scalar cubic_spline(Scalar x_left, Scalar f_left,
160 Scalar df_left, Scalar x_right,
161 Scalar f_right,
162 Scalar df_right) noexcept {
163 const Scalar midpoint = (x_left + x_right) / Scalar(2);
164
165 // Basic validation: ordering + finiteness.
166 if (!(x_right > x_left) || !std::isfinite(f_left) || !std::isfinite(f_right)
167 || !std::isfinite(df_left) || !std::isfinite(df_right)) {
168 return midpoint;
169 }
170
171 const Scalar width = x_right - x_left;
172 const Scalar eps = std::numeric_limits<Scalar>::epsilon();
173
174 // If the bracket is extremely tight, just bisect.
175 {
176 const Scalar x_scale
177 = std::max(std::max(std::abs(x_left), std::abs(x_right)), Scalar(1));
178 if (width <= eps * x_scale) {
179 return midpoint;
180 }
181 }
182
183 // Derivatives with respect to s, where x = x_left + s * width.
184 const Scalar df_left_s = width * df_left; // F'(0)
185 const Scalar df_right_s = width * df_right; // F'(1)
186
187 // Cubic Hermite coefficients in $s \in [0,1]$:
188 // F(s) = a3*s^3 + a2*s^2 + a1*s + a0
189 // with F(0) = f_left, F'(0) = df_left_s, F(1) = f_right, F'(1) = df_right_s.
190 const Scalar a0 = f_left;
191 const Scalar a1 = df_left_s;
192 const Scalar a2
193 = Scalar(3) * (f_right - f_left) - Scalar(2) * df_left_s - df_right_s;
194 const Scalar a3 = Scalar(2) * (f_left - f_right) + df_left_s + df_right_s;
195
196 auto eval = [&](Scalar s) -> Scalar {
197 // Horner evaluation of F(s).
198 return ((a3 * s + a2) * s + a1) * s + a0;
199 };
200
201 // Candidates are restricted to a trimmed interior [edge_guard, 1 -
202 // edge_guard].
203 constexpr Scalar edge_guard = Scalar(1e-9);
204
205 Scalar best_s = 0.5;
206 Scalar best_val = eval(0.5);
207 auto consider = [&](Scalar s) {
208 if (!std::isfinite(s)) {
209 return;
210 }
211 if (!(s > edge_guard && s < Scalar(1) - edge_guard)) {
212 return;
213 }
214 const Scalar value = eval(s);
215 if (value > best_val) {
216 best_s = s;
217 best_val = value;
218 }
219 };
220
221 // 1) Secant estimate for the derivative root between s = 0 and s = 1.
222 {
223 const Scalar denom = df_left_s - df_right_s;
224 const Scalar deriv_scale = std::max(
225 std::max(std::abs(df_left_s), std::abs(df_right_s)), Scalar(1));
226 if (std::abs(denom) > eps * deriv_scale) {
227 const Scalar s_secant
228 = df_left_s / denom; // Root of linear interpolation of F'.
229 consider(s_secant);
230 }
231 }
232
233 // 2) Stationary points of the cubic model (F'(s) = 0).
234 {
235 // F'(s) = 3*a3*s^2 + 2*a2*s + a1 = 0.
236 const Scalar A = Scalar(3) * a3;
237 const Scalar B = Scalar(2) * a2;
238 const Scalar C = a1;
239
240 const Scalar scale
241 = std::max(std::max(std::abs(B), std::abs(C)), Scalar(1));
242 const Scalar A_tol = eps * scale;
243
244 if (std::abs(A) <= A_tol) {
245 // Degenerate to (approximately) linear: B*s + C = 0.
246 const Scalar B_tol = eps * scale;
247 if (std::abs(B) > B_tol) {
248 consider(-C / B);
249 }
250 } else {
251 // Proper quadratic: A*s^2 + B*s + C = 0.
252 Scalar disc = std::fma(-Scalar(4) * A, C, B * B); // B^2 - 4AC
253 const Scalar disc_scale
254 = std::max(B * B + std::abs(Scalar(4) * A * C), Scalar(1));
255 const Scalar disc_tol = Scalar(10) * eps * disc_scale;
256
257 // Treat tiny negative discriminants as zero.
258 if (disc < Scalar(0) && -disc <= disc_tol) {
259 disc = Scalar(0);
260 }
261
262 if (disc >= Scalar(0)) {
263 const Scalar r = std::sqrt(disc);
264 const Scalar q = -Scalar(0.5) * (B + std::copysign(r, B));
265 const Scalar q_scale = std::max(std::abs(B) + r, Scalar(1));
266 const Scalar q_tol = eps * q_scale;
267
268 if (std::abs(q) > q_tol) {
269 const Scalar s1 = q / A;
270 const Scalar s2 = C / q;
271 consider(s1);
272 consider(s2);
273 } else {
274 // Fallback: vertex of the quadratic derivative.
275 const Scalar s_vertex = -B / (Scalar(2) * A);
276 consider(s_vertex);
277 }
278 }
279 }
280 }
281
282 return x_left + best_s * width;
283}
284
285template <typename Eval, typename Options>
286inline auto cubic_spline(Eval&& low, Eval&& high, Options&& opt) {
287 auto alpha = cubic_spline(low.alpha(), low.obj(), low.dir(), high.alpha(),
288 high.obj(), high.dir());
289 const double width = high.alpha() - low.alpha();
290 const double guard = 1e-3 * width; // or make this an option
291 alpha = std::clamp(alpha, low.alpha() + guard, high.alpha() - guard);
292 return alpha;
293}
294
295template <typename Option>
296inline auto check_armijo(double obj_next, double obj_init, double alpha_next,
297 double dir0, Option&& opt) {
298 return (obj_next >= obj_init)
299 && (obj_next >= obj_init + alpha_next * dir0 * opt.c1);
300}
301
302template <typename Eval, typename WolfeT, typename Option>
303inline bool check_armijo(const Eval& eval, const WolfeT& prev,
304 const Option& opt) {
305 return check_armijo(eval.obj(), prev.obj(), eval.alpha(), prev.dir(), opt);
306}
307
308template <typename Option>
309inline auto check_wolfe_curve(double dir_deriv_next, double dir_deriv_init,
310 Option&& opt) {
311 return std::abs(dir_deriv_next) <= (opt.c2 * std::abs(dir_deriv_init));
312}
313
314template <typename Eval, typename WolfeT, typename Option>
315inline bool check_wolfe(const Eval& eval, const WolfeT& prev,
316 const Option& opt) {
317 return check_wolfe_curve(eval.dir(), prev.dir(), opt);
318}
319
320enum class WolfeReturn : uint8_t {
321 // both conditions true
322 Wolfe,
323 // Armijo true, curvature false
324 Armijo,
325 // |phi'| small
327 // |phi - phi0| small
329 // |phi - phi0| and |phi'| small
331 // high and low became too small
333 // alpha < min_alpha
335 // max iters reached
337 // failed to find a step
339 // All other failures
340 Fail,
341 // When a check passes and we want to continue searching
343};
344
349 // total updates/evaluations
353 // Whether a valid new step was found
354 bool accept_{false};
355 WolfeStatus() = default;
356 WolfeStatus(WolfeReturn stop, int evals, int back)
357 : num_evals_(evals), num_backtracks_(back), stop_(stop), accept_{false} {}
358 WolfeStatus(WolfeReturn stop, int evals, int back, bool success)
359 : num_evals_(evals),
360 num_backtracks_(back),
361 stop_(stop),
362 accept_{success} {}
363};
364
369 switch (s.stop_) {
371 return "Wolfe";
373 return "Armijo";
375 return "ConvergedGradient";
377 return "ConvergedObjective";
379 return "ConvergedObjectiveAndGradient";
381 return "IntervalTooSmall";
383 return "StepTooSmall";
385 return "ReachedMaxStep";
387 return "NumericalIssue";
389 return "Fail";
391 return "Continue";
392 default:
393 return "UNKNOWN";
394 }
395}
396
400struct Eval {
401 // alpha
402 double alpha_{0.0};
403 // obj
404 double obj_{0.0};
405 // directional derivative
406 double dir_{0.0};
407 inline auto& alpha() { return alpha_; }
408 inline const auto& alpha() const { return alpha_; }
409 inline auto& obj() { return obj_; }
410 inline const auto& obj() const { return obj_; }
411 inline auto& dir() { return dir_; }
412 inline const auto& dir() const { return dir_; }
413 constexpr Eval(double alpha, double obj, double dir)
414 : alpha_(alpha), obj_(obj), dir_(dir) {}
415 constexpr Eval() = default;
416};
417
421struct WolfeData {
422 // current parameter values
423 Eigen::VectorXd theta_;
424 // parameter gradients
425 Eigen::VectorXd theta_grad_;
426 // latent variable
427 Eigen::VectorXd a_;
428 // evaluation data
430 explicit WolfeData(Eigen::Index n)
431 : theta_(Eigen::VectorXd::Zero(n)),
432 theta_grad_(Eigen::VectorXd::Zero(n)),
433 a_(Eigen::VectorXd::Zero(n)),
434 eval_() {}
435
436 template <typename ObjFun, typename ThetaGradF, typename Theta0>
437 WolfeData(ObjFun&& obj_fun, const Eigen::VectorXd& a, const Theta0& theta0,
438 ThetaGradF&& theta_grad_f)
439 : theta_(theta0),
440 theta_grad_(theta_grad_f(theta_)),
441 a_(a),
442 eval_(1.0, obj_fun(a_, theta_), 0.0) {}
443
444 template <typename ObjFun, typename ThetaGradF, typename Theta0>
445 WolfeData(ObjFun&& obj_fun, Eigen::Index n, const Theta0& theta0,
446 ThetaGradF&& theta_grad_f)
447 : theta_(theta0),
448 theta_grad_(theta_grad_f(theta_)),
449 a_(Eigen::VectorXd::Zero(n)),
450 eval_(1.0, obj_fun(a_, theta_), 0.0) {}
451
452 // Initialize with theta = 0
453 template <typename LLFun, typename LLArgs, typename Msgs>
454 WolfeData(Eigen::Index n, const LLFun& ll_fun, const LLArgs& ll_args,
455 const Msgs& msgs)
456 : WolfeData(n, Eigen::VectorXd::Zero(n), ll_fun, ll_args, msgs) {}
457
458 void update(WolfeData& other) {
459 theta_.swap(other.theta_);
460 theta_grad_.swap(other.theta_grad_);
461 a_.swap(other.a_);
462 eval_ = other.eval_;
463 }
464 void swap(WolfeData& other) {
465 theta_.swap(other.theta_);
466 theta_grad_.swap(other.theta_grad_);
467 a_.swap(other.a_);
468 std::swap(eval_, other.eval_);
469 }
470 void update(WolfeData& other, const Eval& eval) {
471 theta_.swap(other.theta_);
472 a_.swap(other.a_);
473 theta_grad_.swap(other.theta_grad_);
474 eval_ = eval;
475 }
476 inline auto& theta() & { return theta_; }
477 inline auto&& theta() && { return std::move(theta_); }
478 inline const auto& theta() const& { return theta_; }
479
480 inline auto& theta_grad() & { return theta_grad_; }
481 inline const auto& theta_grad() const& { return theta_grad_; }
482 inline auto&& theta_grad() && { return std::move(theta_grad_); }
483 inline auto& a() & { return a_; }
484 inline const auto& a() const& { return a_; }
485 inline auto&& a() && { return std::move(a_); }
486 inline auto& obj() & { return eval_.obj(); }
487 inline const auto& obj() const& { return eval_.obj(); }
488 inline auto& alpha() & { return eval_.alpha(); }
489 inline const auto& alpha() const& { return eval_.alpha(); }
490 inline auto& dir() & { return eval_.dir(); }
491 inline const auto& dir() const& { return eval_.dir(); }
492};
493
497struct WolfeInfo {
498 // Current step data. On output will be the proposal step
500 // Previous step data
502 // Scratch space for evaluations of proposal steps
504 // Search direction
505 Eigen::VectorXd p_;
506 // Initial directional derivative
507 double init_dir_;
508
518 template <typename ObjFun, typename Theta0, typename AInit,
519 typename ThetaGradF>
520 WolfeInfo(ObjFun&& obj_fun, AInit&& a_init, Theta0&& theta0,
521 ThetaGradF&& theta_grad_f)
522 : curr_(std::forward<ObjFun>(obj_fun), std::forward<AInit>(a_init),
523 std::forward<Theta0>(theta0),
524 std::forward<ThetaGradF>(theta_grad_f)),
525 prev_(curr_),
526 scratch_(a_init.size()) {
527 if (!std::isfinite(curr_.obj())) {
528 throw std::domain_error(
529 "laplace_marginal_density: log likelihood is not finite at initial "
530 "theta and likelihood arguments.");
531 }
532 }
534 : curr_(std::move(curr)),
535 prev_(std::move(prev)),
536 scratch_(curr_.theta().size()),
537 p_(Eigen::VectorXd::Zero(curr_.theta().size())),
538 init_dir_(0.0) {}
539 explicit WolfeInfo(Eigen::Index n)
540 : curr_(n),
541 prev_(curr_),
542 scratch_(n),
543 p_(Eigen::VectorXd::Zero(n)),
544 init_dir_(0.0) {}
545
546 inline void flip_direction() {
547 if (this->init_dir_ < 0) {
548 this->p_ *= -1;
549 this->init_dir_ *= -1;
550 }
551 }
552
553 inline auto& curr() & { return curr_; }
554 inline const auto& curr() const& { return curr_; }
555 inline auto&& curr() && { return std::move(curr_); }
556 inline auto& prev() & { return prev_; }
557 inline const auto& prev() const& { return prev_; }
558 inline auto&& prev() && { return std::move(prev_); }
559 inline auto& scratch() & { return scratch_; }
560 inline const auto& scratch() const& { return scratch_; }
561 inline auto&& scratch() && { return std::move(scratch_); }
562};
563
595template <typename Update, typename Proposal, typename Curr, typename Prev,
596 typename Eval, typename P, typename Backoff>
597inline auto retry_evaluate(Update&& update, Proposal&& proposal, Curr&& curr,
598 Prev&& prev, Eval& eval, P&& p, Backoff&& backoff) {
599 while (true) {
600 auto res = update(proposal, curr, prev, eval, p);
601 if (res) {
602 return res;
603 }
604 if (!backoff(eval)) {
605 return res;
606 }
607 }
608}
609
716template <typename Info, typename UpdateFun, typename Options, typename Stream>
717inline WolfeStatus wolfe_line_search(Info& wolfe_info, UpdateFun&& update_fun,
718 Options&& opt, Stream* msgs) {
719 auto& curr = wolfe_info.curr_;
720 auto& prev = wolfe_info.prev_;
721 auto& scratch = wolfe_info.scratch_;
722 auto&& p = wolfe_info.p_;
723 auto&& dir_deriv_init = wolfe_info.init_dir_;
724 Eval low{0.0, prev.obj(), dir_deriv_init};
725 prev.dir() = dir_deriv_init;
726 int total_updates = 0;
727 Eval best = low; // keep the best Armijo-OK in case strong-Wolfe fails
728 auto update_with_tick = [&total_updates, &opt, &best, &update_fun](
729 auto&& proposal, auto&& curr, auto&& prev,
730 Eval& e, auto&& p) {
731 const bool over_budget = total_updates > opt.max_iterations;
732 if (over_budget) {
733 // Soft budget: stop evaluating new trial points once exceeded.
734 if (check_armijo(best, prev, opt)) {
735 update_fun(proposal, curr, prev, best, p);
736 curr.update(proposal, best);
737 if (check_wolfe(best, prev, opt)) {
738 return WolfeStatus{WolfeReturn::Wolfe, total_updates, 0, true};
739 } else {
740 return WolfeStatus{WolfeReturn::Armijo, total_updates, 0, true};
741 }
742 }
743 return WolfeStatus{WolfeReturn::ReachedMaxStep, total_updates, 0, false};
744 } else {
745 update_fun(proposal, curr, prev, e, p);
746 ++total_updates;
747 return WolfeStatus{WolfeReturn::Continue, total_updates, 0, false};
748 }
749 };
750 double alpha_start
751 = std::clamp(curr.alpha() * opt.scale_up, opt.min_alpha, opt.max_alpha);
752 Eval high{alpha_start, curr.obj(), dir_deriv_init};
753 WolfeStatus wolfe_check{WolfeReturn::Continue, 0, 0, false};
754 // Initial check for numerical trouble
755 {
756 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
757 if (wolfe_check.stop_ != WolfeReturn::Continue) {
758 return wolfe_check;
759 }
760 if (high.alpha() < opt.min_alpha) {
761 return WolfeStatus{WolfeReturn::StepTooSmall, total_updates, 0, false};
762 }
763 // Quick accept if Armijo and Wolfe conditions are satisfied
764 if (check_armijo(high, prev, opt)) {
765 if (check_wolfe(high, prev, opt)) {
766 // Try zooming up till we hit a fail
767 best = high;
768 while (check_armijo(high, prev, opt) && check_wolfe(high, prev, opt)) {
769 best = high;
770 high.alpha() *= opt.scale_up;
771 if (high.alpha() > opt.max_alpha) {
772 break;
773 }
774 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
775 if (wolfe_check.stop_ != WolfeReturn::Continue) {
776 return wolfe_check;
777 }
778 }
779 wolfe_check = update_with_tick(scratch, curr, prev, best, p);
780 if (wolfe_check.stop_ != WolfeReturn::Continue) {
781 return wolfe_check;
782 }
783 curr.update(scratch, best);
784 return WolfeStatus{WolfeReturn::Wolfe, total_updates, 0, true};
785 } else {
786 if (best.obj() < high.obj()) {
787 best = high;
788 }
789 }
790 }
791 }
792 int num_backtracks = 0;
803 while (high.alpha() < opt.max_alpha) {
804 num_backtracks++;
805 // 1. Evaluate f(alpha) and g(alpha)
806 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
807 if (wolfe_check.stop_ != WolfeReturn::Continue) {
808 return wolfe_check;
809 }
810 if (check_armijo(high, prev, opt)) {
811 if (check_wolfe(high, prev, opt)) { // [1]
812 curr.update(scratch, high);
813 return WolfeStatus{WolfeReturn::Wolfe, total_updates, num_backtracks,
814 true};
815 }
816 if (best.obj() < high.obj()) {
817 best = high;
818 }
819 if (high.dir() > 0) { // [2]
820 low = high;
821 high.alpha() *= opt.scale_up;
822 continue;
823 }
824 }
825 // [3,4,5]
826 break;
827 }
828 const double grad_tol
829 = std::max(opt.abs_grad_threshold,
830 opt.rel_grad_threshold * std::abs(dir_deriv_init));
831 const double obj_tol
832 = std::max(opt.abs_obj_threshold,
833 opt.rel_obj_threshold * (1.0 + std::abs(prev.obj())));
834 auto check_bounds = [&](auto&& curr_eval) {
835 // Check for grad convergence
836 const bool slope_check = std::abs(curr_eval.dir()) <= grad_tol;
837 // tiny slope or gain
838 const bool obj_check = std::abs(curr_eval.obj() - prev.obj()) <= obj_tol;
839 // alpha too small
840 const bool alpha_check = curr_eval.alpha() < opt.min_alpha;
841 if (slope_check || obj_check || alpha_check) {
842 bool step_ok
843 = curr_eval.obj() != low.obj() && check_armijo(curr_eval, prev, opt);
844 if (slope_check && obj_check) {
846 total_updates, num_backtracks, step_ok};
847 } else if (slope_check) {
848 return WolfeStatus{WolfeReturn::ConvergedGradient, total_updates,
849 num_backtracks, step_ok};
850 } else if (obj_check) {
851 return WolfeStatus{WolfeReturn::ConvergedObjective, total_updates,
852 num_backtracks, step_ok};
853 } else {
854 return WolfeStatus{WolfeReturn::IntervalTooSmall, total_updates,
855 num_backtracks, step_ok};
856 }
857 }
858 return WolfeStatus{WolfeReturn::Continue, total_updates, num_backtracks,
859 false};
860 };
861 auto check_b = check_bounds(high);
862 if (check_b.stop_ != WolfeReturn::Continue) {
863 if (check_b.accept_) {
864 curr.update(scratch, high);
865 }
866 return check_b;
867 }
868 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
869 if (wolfe_check.stop_ != WolfeReturn::Continue) {
870 return wolfe_check;
871 }
882 while ((high.alpha() - low.alpha() > opt.min_alpha)
883 && high.alpha() > opt.min_alpha) {
884 num_backtracks++;
885 const bool have_sign_change = (low.dir() * high.dir() < 0);
886 const bool high_armijo_ok = check_armijo(high, prev, opt);
887 const bool use_cubic = have_sign_change && high_armijo_ok;
888 // Choose trial alpha: cubic when bracket is good, else bisection.
889 double alpha_mid{0};
890 if (use_cubic) {
891 alpha_mid = cubic_spline(low, high, opt);
892 } else {
893 alpha_mid = 0.5 * (low.alpha() + high.alpha());
894 }
895 if (alpha_mid <= opt.min_alpha) {
896 break;
897 }
898 Eval mid{alpha_mid, 0.0, 0.0};
899 auto wolfe_check = update_with_tick(scratch, curr, prev, mid, p);
900 if (wolfe_check.stop_ != WolfeReturn::Continue) {
901 return wolfe_check;
902 }
903 if (mid.alpha() <= opt.min_alpha) {
904 return WolfeStatus{WolfeReturn::StepTooSmall, total_updates,
905 num_backtracks, false};
906 }
907 if (check_armijo(mid, prev, opt)) {
908 if (check_wolfe(mid, prev, opt)) { // [1]
909 curr.update(scratch, mid);
910 return WolfeStatus{WolfeReturn::Wolfe, total_updates, num_backtracks,
911 true};
912 }
913 // Track best Armijo-OK point for fallback.
914 if (mid.obj() > best.obj()) {
915 best = mid;
916 }
917 if (mid.obj() > low.obj()) {
918 if (mid.dir() > 0) { // [2]
919 low = mid;
920 } else { // [3]
921 high = mid;
922 }
923 } else {
924 // [4]
925 high = mid;
926 }
927 } else {
928 // [5]
929 high = mid;
930 }
931 // Convergence/guard-rail checks (uses prev/grad_tol/obj_tol etc.)
932 auto bounds_check = check_bounds(mid);
933 if (bounds_check.stop_ != WolfeReturn::Continue) {
934 if (bounds_check.accept_) {
935 curr.update(scratch, mid);
936 }
937 return bounds_check;
938 }
939 }
940 // On failure, use the best point we have found so far that at least satisfies
941 // armijo
942 const bool armijo_ok_best = check_armijo(best, prev, opt);
943 if (armijo_ok_best) {
944 wolfe_check = update_with_tick(scratch, curr, prev, best, p);
945 curr.update(scratch, best);
946 return WolfeStatus{WolfeReturn::Armijo, total_updates, num_backtracks,
947 true};
948 } else {
949 return WolfeStatus{WolfeReturn::Fail, total_updates, num_backtracks, false};
950 }
951}
952} // namespace internal
953
954} // namespace stan::math
955#endif
int64_t size(const T &m)
Returns the size (number of the elements) of a matrix_cl or var_value<matrix_cl<T>>.
Definition size.hpp:19
(Expert) Numerical traits for algorithmic differentiation variables.
WolfeStatus wolfe_line_search(Info &wolfe_info, UpdateFun &&update_fun, Options &&opt, Stream *msgs)
Strong Wolfe line search for maximization.
auto check_wolfe_curve(double dir_deriv_next, double dir_deriv_init, Option &&opt)
auto wolfe_status_str(WolfeStatus s)
Helper function for pretty printing.
auto check_armijo(double obj_next, double obj_init, double alpha_next, double dir0, Option &&opt)
Scalar cubic_spline(Scalar x_left, Scalar f_left, Scalar df_left, Scalar x_right, Scalar f_right, Scalar df_right) noexcept
Selects a safeguarded trial point for maximizing a scalar function on a line.
bool check_wolfe(const Eval &eval, const WolfeT &prev, const Option &opt)
auto retry_evaluate(Update &&update, Proposal &&proposal, Curr &&curr, Prev &&prev, Eval &eval, P &&p, Backoff &&backoff)
Retry evaluation of a step until it passes a validity check.
static constexpr double e()
Return the base of the natural logarithm.
Definition constants.hpp:20
T eval(T &&arg)
Inputs which have a plain_type equal to the own time are forwarded unmodified (for Eigen expressions ...
Definition eval.hpp:20
Matrices and templated mathematical functions.
STL namespace.
constexpr Eval(double alpha, double obj, double dir)
constexpr Eval()=default
evaluation struct for Wolfe line search
void update(WolfeData &other, const Eval &eval)
WolfeData(ObjFun &&obj_fun, Eigen::Index n, const Theta0 &theta0, ThetaGradF &&theta_grad_f)
WolfeData(Eigen::Index n, const LLFun &ll_fun, const LLArgs &ll_args, const Msgs &msgs)
WolfeData(ObjFun &&obj_fun, const Eigen::VectorXd &a, const Theta0 &theta0, ThetaGradF &&theta_grad_f)
Data used in current evaluation of wolfe line search at a particular stepsize.
WolfeInfo(WolfeData &&curr, WolfeData &&prev)
WolfeInfo(ObjFun &&obj_fun, AInit &&a_init, Theta0 &&theta0, ThetaGradF &&theta_grad_f)
Construct WolfeInfo with a consistent (a_init, theta_init) pair.
Data object used in wolfe line search.
WolfeStatus(WolfeReturn stop, int evals, int back)
WolfeStatus(WolfeReturn stop, int evals, int back, bool success)
Struct to hold the result status of the Wolfe line search.
constexpr laplace_line_search_options(int max_iter)
double tau
Backtracking shrinkage factor.
double min_alpha
Minimum allowable step size.
double abs_grad_threshold
Absolute gradient tolerance.
double scale_up
Step size expansion factor.
double c2
Curvature condition parameter.
constexpr laplace_line_search_options()=default
double c1
Armijo condition parameter (sufficient decrease).
double max_alpha
Maximum allowable step size.
double abs_obj_threshold
Absolute objective tolerance.
Options for Wolfe line search during optimization.