1#ifndef STAN_MATH_MIX_FUNCTOR_WOLFE_LINE_SEARCH_HPP
2#define STAN_MATH_MIX_FUNCTOR_WOLFE_LINE_SEARCH_HPP
158template <
typename Scalar>
160 Scalar df_left, Scalar x_right,
162 Scalar df_right)
noexcept {
163 const Scalar midpoint = (x_left + x_right) / Scalar(2);
166 if (!(x_right > x_left) || !std::isfinite(f_left) || !std::isfinite(f_right)
167 || !std::isfinite(df_left) || !std::isfinite(df_right)) {
171 const Scalar width = x_right - x_left;
172 const Scalar eps = std::numeric_limits<Scalar>::epsilon();
177 = std::max(std::max(std::abs(x_left), std::abs(x_right)), Scalar(1));
178 if (width <= eps * x_scale) {
184 const Scalar df_left_s = width * df_left;
185 const Scalar df_right_s = width * df_right;
190 const Scalar a0 = f_left;
191 const Scalar a1 = df_left_s;
193 = Scalar(3) * (f_right - f_left) - Scalar(2) * df_left_s - df_right_s;
194 const Scalar a3 = Scalar(2) * (f_left - f_right) + df_left_s + df_right_s;
196 auto eval = [&](Scalar s) -> Scalar {
198 return ((a3 * s + a2) * s + a1) * s + a0;
203 constexpr Scalar edge_guard = Scalar(1
e-9);
206 Scalar best_val =
eval(0.5);
207 auto consider = [&](Scalar s) {
208 if (!std::isfinite(s)) {
211 if (!(s > edge_guard && s < Scalar(1) - edge_guard)) {
214 const Scalar value =
eval(s);
215 if (value > best_val) {
223 const Scalar denom = df_left_s - df_right_s;
224 const Scalar deriv_scale = std::max(
225 std::max(std::abs(df_left_s), std::abs(df_right_s)), Scalar(1));
226 if (std::abs(denom) > eps * deriv_scale) {
227 const Scalar s_secant
236 const Scalar A = Scalar(3) * a3;
237 const Scalar B = Scalar(2) * a2;
241 = std::max(std::max(std::abs(B), std::abs(C)), Scalar(1));
242 const Scalar A_tol = eps * scale;
244 if (std::abs(A) <= A_tol) {
246 const Scalar B_tol = eps * scale;
247 if (std::abs(B) > B_tol) {
252 Scalar disc = std::fma(-Scalar(4) * A, C, B * B);
253 const Scalar disc_scale
254 = std::max(B * B + std::abs(Scalar(4) * A * C), Scalar(1));
255 const Scalar disc_tol = Scalar(10) * eps * disc_scale;
258 if (disc < Scalar(0) && -disc <= disc_tol) {
262 if (disc >= Scalar(0)) {
263 const Scalar r = std::sqrt(disc);
264 const Scalar q = -Scalar(0.5) * (B + std::copysign(r, B));
265 const Scalar q_scale = std::max(std::abs(B) + r, Scalar(1));
266 const Scalar q_tol = eps * q_scale;
268 if (std::abs(q) > q_tol) {
269 const Scalar s1 = q / A;
270 const Scalar s2 = C / q;
275 const Scalar s_vertex = -B / (Scalar(2) * A);
282 return x_left + best_s * width;
285template <
typename Eval,
typename Options>
287 auto alpha =
cubic_spline(low.alpha(), low.obj(), low.dir(), high.alpha(),
288 high.obj(), high.dir());
289 const double width = high.alpha() - low.alpha();
290 const double guard = 1
e-3 * width;
291 alpha = std::clamp(alpha, low.alpha() + guard, high.alpha() - guard);
295template <
typename Option>
296inline auto check_armijo(
double obj_next,
double obj_init,
double alpha_next,
297 double dir0, Option&& opt) {
298 return (obj_next >= obj_init)
299 && (obj_next >= obj_init + alpha_next * dir0 * opt.c1);
302template <
typename Eval,
typename WolfeT,
typename Option>
308template <
typename Option>
311 return std::abs(dir_deriv_next) <= (opt.c2 * std::abs(dir_deriv_init));
314template <
typename Eval,
typename WolfeT,
typename Option>
375 return "ConvergedGradient";
377 return "ConvergedObjective";
379 return "ConvergedObjectiveAndGradient";
381 return "IntervalTooSmall";
383 return "StepTooSmall";
385 return "ReachedMaxStep";
387 return "NumericalIssue";
410 inline const auto&
obj()
const {
return obj_; }
412 inline const auto&
dir()
const {
return dir_; }
436 template <
typename ObjFun,
typename ThetaGradF,
typename Theta0>
437 WolfeData(ObjFun&& obj_fun,
const Eigen::VectorXd&
a,
const Theta0& theta0,
438 ThetaGradF&& theta_grad_f)
444 template <
typename ObjFun,
typename ThetaGradF,
typename Theta0>
445 WolfeData(ObjFun&& obj_fun, Eigen::Index n,
const Theta0& theta0,
446 ThetaGradF&& theta_grad_f)
453 template <
typename LLFun,
typename LLArgs,
typename Msgs>
454 WolfeData(Eigen::Index n,
const LLFun& ll_fun,
const LLArgs& ll_args,
456 :
WolfeData(n,
Eigen::VectorXd::Zero(n), ll_fun, ll_args, msgs) {}
477 inline auto&
a() & {
return a_; }
478 inline const auto&
a() const& {
return a_; }
479 inline auto&&
a() && {
return std::move(
a_); }
502 template <
typename ObjFun,
typename Theta0,
typename ThetaGradF>
503 WolfeInfo(ObjFun&& obj_fun, Eigen::Index n, Theta0&& theta0,
504 ThetaGradF&& theta_grad_f)
505 :
curr_(
std::forward<ObjFun>(obj_fun), n,
std::forward<Theta0>(theta0),
506 std::forward<ThetaGradF>(theta_grad_f)),
510 throw std::domain_error(
511 "laplace_marginal_density: log likelihood is not finite at initial "
512 "theta and likelihood arguments.");
529 if (this->init_dir_ < 0) {
531 this->init_dir_ *= -1;
577template <
typename Update,
typename Proposal,
typename Curr,
typename Prev,
578 typename Eval,
typename P,
typename Backoff>
580 Prev&& prev,
Eval&
eval, P&& p, Backoff&& backoff) {
582 auto res = update(proposal, curr, prev,
eval, p);
586 if (!backoff(
eval)) {
698template <
typename Info,
typename UpdateFun,
typename Options,
typename Stream>
700 Options&& opt, Stream* msgs) {
701 auto& curr = wolfe_info.curr_;
702 auto& prev = wolfe_info.prev_;
703 auto& scratch = wolfe_info.scratch_;
704 auto&& p = wolfe_info.p_;
705 auto&& dir_deriv_init = wolfe_info.init_dir_;
706 Eval low{0.0, prev.
obj(), dir_deriv_init};
707 prev.dir() = dir_deriv_init;
708 int total_updates = 0;
710 auto update_with_tick = [&total_updates, &opt, &best, &update_fun](
711 auto&& proposal,
auto&& curr,
auto&& prev,
713 const bool over_budget = total_updates > opt.max_iterations;
717 update_fun(proposal, curr, prev, best, p);
718 curr.update(proposal, best);
727 update_fun(proposal, curr, prev,
e, p);
733 = std::clamp(curr.alpha() * opt.scale_up, opt.min_alpha, opt.max_alpha);
734 Eval high{alpha_start, curr.
obj(), dir_deriv_init};
738 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
742 if (high.alpha() < opt.min_alpha) {
752 high.
alpha() *= opt.scale_up;
753 if (high.alpha() > opt.max_alpha) {
756 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
761 wolfe_check = update_with_tick(scratch, curr, prev, best, p);
765 curr.update(scratch, best);
768 if (best.
obj() < high.obj()) {
774 int num_backtracks = 0;
785 while (high.alpha() < opt.max_alpha) {
788 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
794 curr.update(scratch, high);
798 if (best.
obj() < high.obj()) {
801 if (high.dir() > 0) {
803 high.
alpha() *= opt.scale_up;
810 const double grad_tol
811 = std::max(opt.abs_grad_threshold,
812 opt.rel_grad_threshold * std::abs(dir_deriv_init));
814 = std::max(opt.abs_obj_threshold,
815 opt.rel_obj_threshold * (1.0 + std::abs(prev.obj())));
816 auto check_bounds = [&](
auto&& curr_eval) {
818 const bool slope_check = std::abs(curr_eval.dir()) <= grad_tol;
820 const bool obj_check = std::abs(curr_eval.obj() - prev.obj()) <= obj_tol;
822 const bool alpha_check = curr_eval.alpha() < opt.min_alpha;
823 if (slope_check || obj_check || alpha_check) {
825 = curr_eval.obj() != low.obj() &&
check_armijo(curr_eval, prev, opt);
826 if (slope_check && obj_check) {
828 total_updates, num_backtracks, step_ok};
829 }
else if (slope_check) {
831 num_backtracks, step_ok};
832 }
else if (obj_check) {
834 num_backtracks, step_ok};
837 num_backtracks, step_ok};
843 auto check_b = check_bounds(high);
845 if (check_b.accept_) {
846 curr.update(scratch, high);
850 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
864 while ((high.alpha() - low.alpha() > opt.min_alpha)
865 && high.alpha() > opt.min_alpha) {
867 const bool have_sign_change = (low.dir() * high.dir() < 0);
868 const bool high_armijo_ok =
check_armijo(high, prev, opt);
869 const bool use_cubic = have_sign_change && high_armijo_ok;
875 alpha_mid = 0.5 * (low.alpha() + high.alpha());
877 if (alpha_mid <= opt.min_alpha) {
880 Eval mid{alpha_mid, 0.0, 0.0};
881 auto wolfe_check = update_with_tick(scratch, curr, prev, mid, p);
885 if (mid.alpha() <= opt.min_alpha) {
887 num_backtracks,
false};
891 curr.update(scratch, mid);
896 if (mid.obj() > best.
obj()) {
899 if (mid.obj() > low.obj()) {
913 auto bounds_check = check_bounds(mid);
915 if (bounds_check.accept_) {
916 curr.update(scratch, mid);
923 const bool armijo_ok_best =
check_armijo(best, prev, opt);
924 if (armijo_ok_best) {
925 wolfe_check = update_with_tick(scratch, curr, prev, best, p);
926 curr.update(scratch, best);
int64_t size(const T &m)
Returns the size (number of the elements) of a matrix_cl or var_value<matrix_cl<T>>.
(Expert) Numerical traits for algorithmic differentiation variables.
WolfeStatus wolfe_line_search(Info &wolfe_info, UpdateFun &&update_fun, Options &&opt, Stream *msgs)
Strong Wolfe line search for maximization.
@ ConvergedObjectiveAndGradient
auto check_wolfe_curve(double dir_deriv_next, double dir_deriv_init, Option &&opt)
auto wolfe_status_str(WolfeStatus s)
Helper function for pretty printing.
auto check_armijo(double obj_next, double obj_init, double alpha_next, double dir0, Option &&opt)
Scalar cubic_spline(Scalar x_left, Scalar f_left, Scalar df_left, Scalar x_right, Scalar f_right, Scalar df_right) noexcept
Selects a safeguarded trial point for maximizing a scalar function on a line.
bool check_wolfe(const Eval &eval, const WolfeT &prev, const Option &opt)
auto retry_evaluate(Update &&update, Proposal &&proposal, Curr &&curr, Prev &&prev, Eval &eval, P &&p, Backoff &&backoff)
Retry evaluation of a step until it passes a validity check.
static constexpr double e()
Return the base of the natural logarithm.
T eval(T &&arg)
Inputs which have a plain_type equal to the own time are forwarded unmodified (for Eigen expressions ...
Matrices and templated mathematical functions.
const auto & alpha() const
constexpr Eval(double alpha, double obj, double dir)
evaluation struct for Wolfe line search
void update(WolfeData &other, const Eval &eval)
void update(WolfeData &other)
const auto & dir() const &
const auto & theta() const &
WolfeData(Eigen::Index n)
WolfeData(ObjFun &&obj_fun, Eigen::Index n, const Theta0 &theta0, ThetaGradF &&theta_grad_f)
WolfeData(Eigen::Index n, const LLFun &ll_fun, const LLArgs &ll_args, const Msgs &msgs)
const auto & alpha() const &
WolfeData(ObjFun &&obj_fun, const Eigen::VectorXd &a, const Theta0 &theta0, ThetaGradF &&theta_grad_f)
const auto & obj() const &
Eigen::VectorXd theta_grad_
const auto & theta_grad() const &
Data used in current evaluation of wolfe line search at a particular stepsize.
const auto & scratch() const &
const auto & prev() const &
WolfeInfo(Eigen::Index n)
const auto & curr() const &
WolfeInfo(ObjFun &&obj_fun, Eigen::Index n, Theta0 &&theta0, ThetaGradF &&theta_grad_f)
WolfeInfo(WolfeData &&curr, WolfeData &&prev)
Data object used in wolfe line search.
WolfeStatus(WolfeReturn stop, int evals, int back)
WolfeStatus(WolfeReturn stop, int evals, int back, bool success)
Struct to hold the result status of the Wolfe line search.
double rel_grad_threshold
constexpr laplace_line_search_options(int max_iter)
double tau
Backtracking shrinkage factor.
double min_alpha
Minimum allowable step size.
double abs_grad_threshold
Absolute gradient tolerance.
double scale_up
Step size expansion factor.
double c2
Curvature condition parameter.
constexpr laplace_line_search_options()=default
double c1
Armijo condition parameter (sufficient decrease).
double max_alpha
Maximum allowable step size.
double abs_obj_threshold
Absolute objective tolerance.
Options for Wolfe line search during optimization.