Automatic Differentiation
 
Loading...
Searching...
No Matches
wolfe_line_search.hpp
Go to the documentation of this file.
1#ifndef STAN_MATH_MIX_FUNCTOR_WOLFE_LINE_SEARCH_HPP
2#define STAN_MATH_MIX_FUNCTOR_WOLFE_LINE_SEARCH_HPP
3
11#include <algorithm>
12#include <cmath>
13#include <limits>
14#include <tuple>
15#include <type_traits>
16
17namespace stan::math {
18
27 constexpr explicit laplace_line_search_options(int max_iter)
28 : max_iterations(max_iter) {}
29 constexpr laplace_line_search_options() = default;
30 int max_iterations{1000};
38 double c1{1e-4};
39
47 double c2{0.9};
48
55 double tau{0.5};
56
63 double min_alpha{1e-8};
64
70 double max_alpha{16.0};
71
79 double scale_up{2.0};
80
87 double abs_grad_threshold{1e-12};
88
96 double abs_obj_threshold{1e-12};
97
98 double rel_grad_threshold{1e-3}; // off by default
99 double rel_obj_threshold{1e-10}; // off by default
100};
101namespace internal {
102
155template <typename Scalar>
156[[nodiscard]] inline Scalar cubic_spline(Scalar x_left, Scalar f_left,
157 Scalar df_left, Scalar x_right,
158 Scalar f_right,
159 Scalar df_right) noexcept {
160 const Scalar midpoint = (x_left + x_right) / Scalar(2);
161
162 // Basic validation: ordering + finiteness.
163 if (!(x_right > x_left) || !std::isfinite(f_left) || !std::isfinite(f_right)
164 || !std::isfinite(df_left) || !std::isfinite(df_right)) {
165 return midpoint;
166 }
167
168 const Scalar width = x_right - x_left;
169 const Scalar eps = std::numeric_limits<Scalar>::epsilon();
170
171 // If the bracket is extremely tight, just bisect.
172 {
173 const Scalar x_scale
174 = std::max(std::max(std::abs(x_left), std::abs(x_right)), Scalar(1));
175 if (width <= eps * x_scale) {
176 return midpoint;
177 }
178 }
179
180 // Derivatives with respect to s, where x = x_left + s * width.
181 const Scalar df_left_s = width * df_left; // F'(0)
182 const Scalar df_right_s = width * df_right; // F'(1)
183
184 // Cubic Hermite coefficients in $s \in [0,1]$:
185 // F(s) = a3*s^3 + a2*s^2 + a1*s + a0
186 // with F(0) = f_left, F'(0) = df_left_s, F(1) = f_right, F'(1) = df_right_s.
187 const Scalar a0 = f_left;
188 const Scalar a1 = df_left_s;
189 const Scalar a2
190 = Scalar(3) * (f_right - f_left) - Scalar(2) * df_left_s - df_right_s;
191 const Scalar a3 = Scalar(2) * (f_left - f_right) + df_left_s + df_right_s;
192
193 auto eval = [&](Scalar s) -> Scalar {
194 // Horner evaluation of F(s).
195 return ((a3 * s + a2) * s + a1) * s + a0;
196 };
197
198 // Candidates are restricted to a trimmed interior [edge_guard, 1 -
199 // edge_guard].
200 constexpr Scalar edge_guard = Scalar(1e-9);
201
202 Scalar best_s = 0.5;
203 Scalar best_val = eval(0.5);
204 auto consider = [&](Scalar s) {
205 if (!std::isfinite(s)) {
206 return;
207 }
208 if (!(s > edge_guard && s < Scalar(1) - edge_guard)) {
209 return;
210 }
211 const Scalar value = eval(s);
212 if (value > best_val) {
213 best_s = s;
214 best_val = value;
215 }
216 };
217
218 // 1) Secant estimate for the derivative root between s = 0 and s = 1.
219 {
220 const Scalar denom = df_left_s - df_right_s;
221 const Scalar deriv_scale = std::max(
222 std::max(std::abs(df_left_s), std::abs(df_right_s)), Scalar(1));
223 if (std::abs(denom) > eps * deriv_scale) {
224 const Scalar s_secant
225 = df_left_s / denom; // Root of linear interpolation of F'.
226 consider(s_secant);
227 }
228 }
229
230 // 2) Stationary points of the cubic model (F'(s) = 0).
231 {
232 // F'(s) = 3*a3*s^2 + 2*a2*s + a1 = 0.
233 const Scalar A = Scalar(3) * a3;
234 const Scalar B = Scalar(2) * a2;
235 const Scalar C = a1;
236
237 const Scalar scale
238 = std::max(std::max(std::abs(B), std::abs(C)), Scalar(1));
239 const Scalar A_tol = eps * scale;
240
241 if (std::abs(A) <= A_tol) {
242 // Degenerate to (approximately) linear: B*s + C = 0.
243 const Scalar B_tol = eps * scale;
244 if (std::abs(B) > B_tol) {
245 consider(-C / B);
246 }
247 } else {
248 // Proper quadratic: A*s^2 + B*s + C = 0.
249 Scalar disc = std::fma(-Scalar(4) * A, C, B * B); // B^2 - 4AC
250 const Scalar disc_scale
251 = std::max(B * B + std::abs(Scalar(4) * A * C), Scalar(1));
252 const Scalar disc_tol = Scalar(10) * eps * disc_scale;
253
254 // Treat tiny negative discriminants as zero.
255 if (disc < Scalar(0) && -disc <= disc_tol) {
256 disc = Scalar(0);
257 }
258
259 if (disc >= Scalar(0)) {
260 const Scalar r = std::sqrt(disc);
261 const Scalar q = -Scalar(0.5) * (B + std::copysign(r, B));
262 const Scalar q_scale = std::max(std::abs(B) + r, Scalar(1));
263 const Scalar q_tol = eps * q_scale;
264
265 if (std::abs(q) > q_tol) {
266 const Scalar s1 = q / A;
267 const Scalar s2 = C / q;
268 consider(s1);
269 consider(s2);
270 } else {
271 // Fallback: vertex of the quadratic derivative.
272 const Scalar s_vertex = -B / (Scalar(2) * A);
273 consider(s_vertex);
274 }
275 }
276 }
277 }
278
279 return x_left + best_s * width;
280}
281
282template <typename Eval, typename Options>
283inline auto cubic_spline(Eval&& low, Eval&& high, Options&& opt) {
284 auto alpha = cubic_spline(low.alpha(), low.obj(), low.dir(), high.alpha(),
285 high.obj(), high.dir());
286 const double width = high.alpha() - low.alpha();
287 const double guard = 1e-3 * width; // or make this an option
288 alpha = std::clamp(alpha, low.alpha() + guard, high.alpha() - guard);
289 return alpha;
290}
291
292template <typename Option>
293inline auto check_armijo(double obj_next, double obj_init, double alpha_next,
294 double dir0, Option&& opt) {
295 return (obj_next >= obj_init)
296 && (obj_next >= obj_init + alpha_next * dir0 * opt.c1);
297}
298
299template <typename Eval, typename WolfeT, typename Option>
300inline bool check_armijo(const Eval& eval, const WolfeT& prev,
301 const Option& opt) {
302 return check_armijo(eval.obj(), prev.obj(), eval.alpha(), prev.dir(), opt);
303}
304
305template <typename Option>
306inline auto check_wolfe_curve(double dir_deriv_next, double dir_deriv_init,
307 Option&& opt) {
308 return std::abs(dir_deriv_next) <= (opt.c2 * std::abs(dir_deriv_init));
309}
310
311template <typename Eval, typename WolfeT, typename Option>
312inline bool check_wolfe(const Eval& eval, const WolfeT& prev,
313 const Option& opt) {
314 return check_wolfe_curve(eval.dir(), prev.dir(), opt);
315}
316
317enum class WolfeReturn : uint8_t {
318 // both conditions true
319 Wolfe,
320 // Armijo true, curvature false
321 Armijo,
322 // |phi'| small
324 // |phi - phi0| small
326 // |phi - phi0| and |phi'| small
328 // high and low became too small
330 // alpha < min_alpha
332 // max iters reached
334 // failed to find a step
336 // All other failures
337 Fail,
338 // When a check passes and we want to continue searching
340};
341
346 // total updates/evaluations
350 // Whether a valid new step was found
351 bool accept_{false};
352 WolfeStatus() = default;
353 WolfeStatus(WolfeReturn stop, int evals, int back)
354 : num_evals_(evals), num_backtracks_(back), stop_(stop), accept_{false} {}
355 WolfeStatus(WolfeReturn stop, int evals, int back, bool success)
356 : num_evals_(evals),
357 num_backtracks_(back),
358 stop_(stop),
359 accept_{success} {}
360};
361
366 switch (s.stop_) {
368 return "Wolfe";
370 return "Armijo";
372 return "ConvergedGradient";
374 return "ConvergedObjective";
376 return "ConvergedObjectiveAndGradient";
378 return "IntervalTooSmall";
380 return "StepTooSmall";
382 return "ReachedMaxStep";
384 return "NumericalIssue";
386 return "Fail";
388 return "Continue";
389 default:
390 return "UNKNOWN";
391 }
392}
393
397struct Eval {
398 // alpha
399 double alpha_{0.0};
400 // obj
401 double obj_{0.0};
402 // directional derivative
403 double dir_{0.0};
404 inline auto& alpha() { return alpha_; }
405 inline const auto& alpha() const { return alpha_; }
406 inline auto& obj() { return obj_; }
407 inline const auto& obj() const { return obj_; }
408 inline auto& dir() { return dir_; }
409 inline const auto& dir() const { return dir_; }
410 constexpr Eval(double alpha, double obj, double dir)
411 : alpha_(alpha), obj_(obj), dir_(dir) {}
412 constexpr Eval() = default;
413};
414
418struct WolfeData {
419 // current parameter values
420 Eigen::VectorXd theta_;
421 // parameter gradients
422 Eigen::VectorXd theta_grad_;
423 // latent variable
424 Eigen::VectorXd a_;
425 // evaluation data
427 explicit WolfeData(Eigen::Index n)
428 : theta_(Eigen::VectorXd::Zero(n)),
429 theta_grad_(Eigen::VectorXd::Zero(n)),
430 a_(Eigen::VectorXd::Zero(n)),
431 eval_() {}
432
433 template <typename ObjFun, typename ThetaGradF, typename Theta0>
434 WolfeData(ObjFun&& obj_fun, const Eigen::VectorXd& a, const Theta0& theta0,
435 ThetaGradF&& theta_grad_f)
436 : theta_(theta0),
437 theta_grad_(theta_grad_f(theta_)),
438 a_(a),
439 eval_(1.0, obj_fun(a_, theta_), 0.0) {}
440
441 template <typename ObjFun, typename ThetaGradF, typename Theta0>
442 WolfeData(ObjFun&& obj_fun, Eigen::Index n, const Theta0& theta0,
443 ThetaGradF&& theta_grad_f)
444 : theta_(theta0),
445 theta_grad_(theta_grad_f(theta_)),
446 a_(Eigen::VectorXd::Zero(n)),
447 eval_(1.0, obj_fun(a_, theta_), 0.0) {}
448
449 // Initialize with theta = 0
450 template <typename LLFun, typename LLArgs, typename Msgs>
451 WolfeData(Eigen::Index n, const LLFun& ll_fun, const LLArgs& ll_args,
452 const Msgs& msgs)
453 : WolfeData(n, Eigen::VectorXd::Zero(n), ll_fun, ll_args, msgs) {}
454
455 void update(WolfeData& other) {
456 theta_.swap(other.theta_);
457 theta_grad_.swap(other.theta_grad_);
458 a_.swap(other.a_);
459 eval_ = other.eval_;
460 }
461 void swap(WolfeData& other) {
462 theta_.swap(other.theta_);
463 theta_grad_.swap(other.theta_grad_);
464 a_.swap(other.a_);
465 std::swap(eval_, other.eval_);
466 }
467 void update(WolfeData& other, const Eval& eval) {
468 theta_.swap(other.theta_);
469 a_.swap(other.a_);
470 theta_grad_.swap(other.theta_grad_);
471 eval_ = eval;
472 }
473 inline auto& theta() & { return theta_; }
474 inline auto&& theta() && { return std::move(theta_); }
475 inline const auto& theta() const& { return theta_; }
476
477 inline auto& theta_grad() & { return theta_grad_; }
478 inline const auto& theta_grad() const& { return theta_grad_; }
479 inline auto&& theta_grad() && { return std::move(theta_grad_); }
480 inline auto& a() & { return a_; }
481 inline const auto& a() const& { return a_; }
482 inline auto&& a() && { return std::move(a_); }
483 inline auto& obj() & { return eval_.obj(); }
484 inline const auto& obj() const& { return eval_.obj(); }
485 inline auto& alpha() & { return eval_.alpha(); }
486 inline const auto& alpha() const& { return eval_.alpha(); }
487 inline auto& dir() & { return eval_.dir(); }
488 inline const auto& dir() const& { return eval_.dir(); }
489};
490
494struct WolfeInfo {
495 // Current step data. On output will be the proposal step
497 // Previous step data
499 // Scratch space for evaluations of proposal steps
501 // Search direction
502 Eigen::VectorXd p_;
503 // Initial directional derivative
504 double init_dir_;
505
515 template <typename ObjFun, typename Theta0, typename AInit,
516 typename ThetaGradF>
517 WolfeInfo(ObjFun&& obj_fun, AInit&& a_init, Theta0&& theta0,
518 ThetaGradF&& theta_grad_f)
519 : curr_(std::forward<ObjFun>(obj_fun), std::forward<AInit>(a_init),
520 std::forward<Theta0>(theta0),
521 std::forward<ThetaGradF>(theta_grad_f)),
522 prev_(curr_),
523 scratch_(a_init.size()) {
524 if (!std::isfinite(curr_.obj())) {
525 throw std::domain_error(
526 "laplace_marginal_density: log likelihood is not finite at initial "
527 "theta and likelihood arguments.");
528 }
529 }
531 : curr_(std::move(curr)),
532 prev_(std::move(prev)),
533 scratch_(curr_.theta().size()),
534 p_(Eigen::VectorXd::Zero(curr_.theta().size())),
535 init_dir_(0.0) {}
536 explicit WolfeInfo(Eigen::Index n)
537 : curr_(n),
538 prev_(curr_),
539 scratch_(n),
540 p_(Eigen::VectorXd::Zero(n)),
541 init_dir_(0.0) {}
542
543 inline void flip_direction() {
544 if (this->init_dir_ < 0) {
545 this->p_ *= -1;
546 this->init_dir_ *= -1;
547 }
548 }
549
550 inline auto& curr() & { return curr_; }
551 inline const auto& curr() const& { return curr_; }
552 inline auto&& curr() && { return std::move(curr_); }
553 inline auto& prev() & { return prev_; }
554 inline const auto& prev() const& { return prev_; }
555 inline auto&& prev() && { return std::move(prev_); }
556 inline auto& scratch() & { return scratch_; }
557 inline const auto& scratch() const& { return scratch_; }
558 inline auto&& scratch() && { return std::move(scratch_); }
559};
560
592template <typename Update, typename Proposal, typename Curr, typename Prev,
593 typename Eval, typename P, typename Backoff>
594inline auto retry_evaluate(Update&& update, Proposal&& proposal, Curr&& curr,
595 Prev&& prev, Eval& eval, P&& p, Backoff&& backoff) {
596 while (true) {
597 auto res = update(proposal, curr, prev, eval, p);
598 if (res) {
599 return res;
600 }
601 if (!backoff(eval)) {
602 return res;
603 }
604 }
605}
606
713template <typename Info, typename UpdateFun, typename Options, typename Stream>
714inline WolfeStatus wolfe_line_search(Info& wolfe_info, UpdateFun&& update_fun,
715 Options&& opt, Stream* msgs) {
716 auto& curr = wolfe_info.curr_;
717 auto& prev = wolfe_info.prev_;
718 auto& scratch = wolfe_info.scratch_;
719 auto&& p = wolfe_info.p_;
720 auto&& dir_deriv_init = wolfe_info.init_dir_;
721 Eval low{0.0, prev.obj(), dir_deriv_init};
722 prev.dir() = dir_deriv_init;
723 int total_updates = 0;
724 Eval best = low; // keep the best Armijo-OK in case strong-Wolfe fails
725 auto update_with_tick = [&total_updates, &opt, &best, &update_fun](
726 auto&& proposal, auto&& curr, auto&& prev,
727 Eval& e, auto&& p) {
728 const bool over_budget = total_updates > opt.max_iterations;
729 if (over_budget) {
730 // Soft budget: stop evaluating new trial points once exceeded.
731 if (check_armijo(best, prev, opt)) {
732 update_fun(proposal, curr, prev, best, p);
733 curr.update(proposal, best);
734 if (check_wolfe(best, prev, opt)) {
735 return WolfeStatus{WolfeReturn::Wolfe, total_updates, 0, true};
736 } else {
737 return WolfeStatus{WolfeReturn::Armijo, total_updates, 0, true};
738 }
739 }
740 return WolfeStatus{WolfeReturn::ReachedMaxStep, total_updates, 0, false};
741 } else {
742 update_fun(proposal, curr, prev, e, p);
743 ++total_updates;
744 return WolfeStatus{WolfeReturn::Continue, total_updates, 0, false};
745 }
746 };
747 double alpha_start
748 = std::clamp(curr.alpha() * opt.scale_up, opt.min_alpha, opt.max_alpha);
749 Eval high{alpha_start, curr.obj(), dir_deriv_init};
750 WolfeStatus wolfe_check{WolfeReturn::Continue, 0, 0, false};
751 // Initial check for numerical trouble
752 {
753 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
754 if (wolfe_check.stop_ != WolfeReturn::Continue) {
755 return wolfe_check;
756 }
757 if (high.alpha() < opt.min_alpha) {
758 return WolfeStatus{WolfeReturn::StepTooSmall, total_updates, 0, false};
759 }
760 // Quick accept if Armijo and Wolfe conditions are satisfied
761 if (check_armijo(high, prev, opt)) {
762 if (check_wolfe(high, prev, opt)) {
763 // Try zooming up till we hit a fail
764 best = high;
765 while (check_armijo(high, prev, opt) && check_wolfe(high, prev, opt)) {
766 best = high;
767 high.alpha() *= opt.scale_up;
768 if (high.alpha() > opt.max_alpha) {
769 break;
770 }
771 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
772 if (wolfe_check.stop_ != WolfeReturn::Continue) {
773 return wolfe_check;
774 }
775 }
776 wolfe_check = update_with_tick(scratch, curr, prev, best, p);
777 if (wolfe_check.stop_ != WolfeReturn::Continue) {
778 return wolfe_check;
779 }
780 curr.update(scratch, best);
781 return WolfeStatus{WolfeReturn::Wolfe, total_updates, 0, true};
782 } else {
783 if (best.obj() < high.obj()) {
784 best = high;
785 }
786 }
787 }
788 }
789 int num_backtracks = 0;
800 while (high.alpha() < opt.max_alpha) {
801 num_backtracks++;
802 // 1. Evaluate f(alpha) and g(alpha)
803 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
804 if (wolfe_check.stop_ != WolfeReturn::Continue) {
805 return wolfe_check;
806 }
807 if (check_armijo(high, prev, opt)) {
808 if (check_wolfe(high, prev, opt)) { // [1]
809 curr.update(scratch, high);
810 return WolfeStatus{WolfeReturn::Wolfe, total_updates, num_backtracks,
811 true};
812 }
813 if (best.obj() < high.obj()) {
814 best = high;
815 }
816 if (high.dir() > 0) { // [2]
817 low = high;
818 high.alpha() *= opt.scale_up;
819 continue;
820 }
821 }
822 // [3,4,5]
823 break;
824 }
825 const double grad_tol
826 = std::max(opt.abs_grad_threshold,
827 opt.rel_grad_threshold * std::abs(dir_deriv_init));
828 const double obj_tol
829 = std::max(opt.abs_obj_threshold,
830 opt.rel_obj_threshold * (1.0 + std::abs(prev.obj())));
831 auto check_bounds = [&](auto&& curr_eval) {
832 // Check for grad convergence
833 const bool slope_check = std::abs(curr_eval.dir()) <= grad_tol;
834 // tiny slope or gain
835 const bool obj_check = std::abs(curr_eval.obj() - prev.obj()) <= obj_tol;
836 // alpha too small
837 const bool alpha_check = curr_eval.alpha() < opt.min_alpha;
838 if (slope_check || obj_check || alpha_check) {
839 bool step_ok
840 = curr_eval.obj() != low.obj() && check_armijo(curr_eval, prev, opt);
841 if (slope_check && obj_check) {
843 total_updates, num_backtracks, step_ok};
844 } else if (slope_check) {
845 return WolfeStatus{WolfeReturn::ConvergedGradient, total_updates,
846 num_backtracks, step_ok};
847 } else if (obj_check) {
848 return WolfeStatus{WolfeReturn::ConvergedObjective, total_updates,
849 num_backtracks, step_ok};
850 } else {
851 return WolfeStatus{WolfeReturn::IntervalTooSmall, total_updates,
852 num_backtracks, step_ok};
853 }
854 }
855 return WolfeStatus{WolfeReturn::Continue, total_updates, num_backtracks,
856 false};
857 };
858 auto check_b = check_bounds(high);
859 if (check_b.stop_ != WolfeReturn::Continue) {
860 if (check_b.accept_) {
861 curr.update(scratch, high);
862 }
863 return check_b;
864 }
865 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
866 if (wolfe_check.stop_ != WolfeReturn::Continue) {
867 return wolfe_check;
868 }
879 while ((high.alpha() - low.alpha() > opt.min_alpha)
880 && high.alpha() > opt.min_alpha) {
881 num_backtracks++;
882 const bool have_sign_change = (low.dir() * high.dir() < 0);
883 const bool high_armijo_ok = check_armijo(high, prev, opt);
884 const bool use_cubic = have_sign_change && high_armijo_ok;
885 // Choose trial alpha: cubic when bracket is good, else bisection.
886 double alpha_mid{0};
887 if (use_cubic) {
888 alpha_mid = cubic_spline(low, high, opt);
889 } else {
890 alpha_mid = 0.5 * (low.alpha() + high.alpha());
891 }
892 if (alpha_mid <= opt.min_alpha) {
893 break;
894 }
895 Eval mid{alpha_mid, 0.0, 0.0};
896 auto wolfe_check = update_with_tick(scratch, curr, prev, mid, p);
897 if (wolfe_check.stop_ != WolfeReturn::Continue) {
898 return wolfe_check;
899 }
900 if (mid.alpha() <= opt.min_alpha) {
901 return WolfeStatus{WolfeReturn::StepTooSmall, total_updates,
902 num_backtracks, false};
903 }
904 if (check_armijo(mid, prev, opt)) {
905 if (check_wolfe(mid, prev, opt)) { // [1]
906 curr.update(scratch, mid);
907 return WolfeStatus{WolfeReturn::Wolfe, total_updates, num_backtracks,
908 true};
909 }
910 // Track best Armijo-OK point for fallback.
911 if (mid.obj() > best.obj()) {
912 best = mid;
913 }
914 if (mid.obj() > low.obj()) {
915 if (mid.dir() > 0) { // [2]
916 low = mid;
917 } else { // [3]
918 high = mid;
919 }
920 } else {
921 // [4]
922 high = mid;
923 }
924 } else {
925 // [5]
926 high = mid;
927 }
928 // Convergence/guard-rail checks (uses prev/grad_tol/obj_tol etc.)
929 auto bounds_check = check_bounds(mid);
930 if (bounds_check.stop_ != WolfeReturn::Continue) {
931 if (bounds_check.accept_) {
932 curr.update(scratch, mid);
933 }
934 return bounds_check;
935 }
936 }
937 // On failure, use the best point we have found so far that at least satisfies
938 // armijo
939 const bool armijo_ok_best = check_armijo(best, prev, opt);
940 if (armijo_ok_best) {
941 wolfe_check = update_with_tick(scratch, curr, prev, best, p);
942 curr.update(scratch, best);
943 return WolfeStatus{WolfeReturn::Armijo, total_updates, num_backtracks,
944 true};
945 } else {
946 return WolfeStatus{WolfeReturn::Fail, total_updates, num_backtracks, false};
947 }
948}
949} // namespace internal
950
951} // namespace stan::math
952#endif
int64_t size(const T &m)
Returns the size (number of the elements) of a matrix_cl or var_value<matrix_cl<T>>.
Definition size.hpp:19
(Expert) Numerical traits for algorithmic differentiation variables.
WolfeStatus wolfe_line_search(Info &wolfe_info, UpdateFun &&update_fun, Options &&opt, Stream *msgs)
Strong Wolfe line search for maximization.
auto check_wolfe_curve(double dir_deriv_next, double dir_deriv_init, Option &&opt)
auto wolfe_status_str(WolfeStatus s)
Helper function for pretty printing.
auto check_armijo(double obj_next, double obj_init, double alpha_next, double dir0, Option &&opt)
Scalar cubic_spline(Scalar x_left, Scalar f_left, Scalar df_left, Scalar x_right, Scalar f_right, Scalar df_right) noexcept
Selects a safeguarded trial point for maximizing a scalar function on a line.
bool check_wolfe(const Eval &eval, const WolfeT &prev, const Option &opt)
auto retry_evaluate(Update &&update, Proposal &&proposal, Curr &&curr, Prev &&prev, Eval &eval, P &&p, Backoff &&backoff)
Retry evaluation of a step until it passes a validity check.
static constexpr double e()
Return the base of the natural logarithm.
Definition constants.hpp:20
T eval(T &&arg)
Inputs which have a plain_type equal to the own time are forwarded unmodified (for Eigen expressions ...
Definition eval.hpp:20
Matrices and templated mathematical functions.
STL namespace.
constexpr Eval(double alpha, double obj, double dir)
constexpr Eval()=default
evaluation struct for Wolfe line search
void update(WolfeData &other, const Eval &eval)
WolfeData(ObjFun &&obj_fun, Eigen::Index n, const Theta0 &theta0, ThetaGradF &&theta_grad_f)
WolfeData(Eigen::Index n, const LLFun &ll_fun, const LLArgs &ll_args, const Msgs &msgs)
WolfeData(ObjFun &&obj_fun, const Eigen::VectorXd &a, const Theta0 &theta0, ThetaGradF &&theta_grad_f)
Data used in current evaluation of wolfe line search at a particular stepsize.
WolfeInfo(WolfeData &&curr, WolfeData &&prev)
WolfeInfo(ObjFun &&obj_fun, AInit &&a_init, Theta0 &&theta0, ThetaGradF &&theta_grad_f)
Construct WolfeInfo with a consistent (a_init, theta_init) pair.
Data object used in wolfe line search.
WolfeStatus(WolfeReturn stop, int evals, int back)
WolfeStatus(WolfeReturn stop, int evals, int back, bool success)
Struct to hold the result status of the Wolfe line search.
constexpr laplace_line_search_options(int max_iter)
double tau
Backtracking shrinkage factor.
double min_alpha
Minimum allowable step size.
double abs_grad_threshold
Absolute gradient tolerance.
double scale_up
Step size expansion factor.
double c2
Curvature condition parameter.
constexpr laplace_line_search_options()=default
double c1
Armijo condition parameter (sufficient decrease).
double max_alpha
Maximum allowable step size.
double abs_obj_threshold
Absolute objective tolerance.
Options for Wolfe line search during optimization.