1#ifndef STAN_MATH_MIX_FUNCTOR_WOLFE_LINE_SEARCH_HPP
2#define STAN_MATH_MIX_FUNCTOR_WOLFE_LINE_SEARCH_HPP
158template <
typename Scalar>
160 Scalar df_left, Scalar x_right,
162 Scalar df_right)
noexcept {
163 const Scalar midpoint = (x_left + x_right) / Scalar(2);
166 if (!(x_right > x_left) || !std::isfinite(f_left) || !std::isfinite(f_right)
167 || !std::isfinite(df_left) || !std::isfinite(df_right)) {
171 const Scalar width = x_right - x_left;
172 const Scalar eps = std::numeric_limits<Scalar>::epsilon();
177 = std::max(std::max(std::abs(x_left), std::abs(x_right)), Scalar(1));
178 if (width <= eps * x_scale) {
184 const Scalar df_left_s = width * df_left;
185 const Scalar df_right_s = width * df_right;
190 const Scalar a0 = f_left;
191 const Scalar a1 = df_left_s;
193 = Scalar(3) * (f_right - f_left) - Scalar(2) * df_left_s - df_right_s;
194 const Scalar a3 = Scalar(2) * (f_left - f_right) + df_left_s + df_right_s;
196 auto eval = [&](Scalar s) -> Scalar {
198 return ((a3 * s + a2) * s + a1) * s + a0;
203 constexpr Scalar edge_guard = Scalar(1
e-9);
206 Scalar best_val =
eval(0.5);
207 auto consider = [&](Scalar s) {
208 if (!std::isfinite(s)) {
211 if (!(s > edge_guard && s < Scalar(1) - edge_guard)) {
214 const Scalar value =
eval(s);
215 if (value > best_val) {
223 const Scalar denom = df_left_s - df_right_s;
224 const Scalar deriv_scale = std::max(
225 std::max(std::abs(df_left_s), std::abs(df_right_s)), Scalar(1));
226 if (std::abs(denom) > eps * deriv_scale) {
227 const Scalar s_secant
236 const Scalar A = Scalar(3) * a3;
237 const Scalar B = Scalar(2) * a2;
241 = std::max(std::max(std::abs(B), std::abs(C)), Scalar(1));
242 const Scalar A_tol = eps * scale;
244 if (std::abs(A) <= A_tol) {
246 const Scalar B_tol = eps * scale;
247 if (std::abs(B) > B_tol) {
252 Scalar disc = std::fma(-Scalar(4) * A, C, B * B);
253 const Scalar disc_scale
254 = std::max(B * B + std::abs(Scalar(4) * A * C), Scalar(1));
255 const Scalar disc_tol = Scalar(10) * eps * disc_scale;
258 if (disc < Scalar(0) && -disc <= disc_tol) {
262 if (disc >= Scalar(0)) {
263 const Scalar r = std::sqrt(disc);
264 const Scalar q = -Scalar(0.5) * (B + std::copysign(r, B));
265 const Scalar q_scale = std::max(std::abs(B) + r, Scalar(1));
266 const Scalar q_tol = eps * q_scale;
268 if (std::abs(q) > q_tol) {
269 const Scalar s1 = q / A;
270 const Scalar s2 = C / q;
275 const Scalar s_vertex = -B / (Scalar(2) * A);
282 return x_left + best_s * width;
285template <
typename Eval,
typename Options>
287 auto alpha =
cubic_spline(low.alpha(), low.obj(), low.dir(), high.alpha(),
288 high.obj(), high.dir());
289 const double width = high.alpha() - low.alpha();
290 const double guard = 1
e-3 * width;
291 alpha = std::clamp(alpha, low.alpha() + guard, high.alpha() - guard);
295template <
typename Option>
296inline auto check_armijo(
double obj_next,
double obj_init,
double alpha_next,
297 double dir0, Option&& opt) {
298 return (obj_next >= obj_init)
299 && (obj_next >= obj_init + alpha_next * dir0 * opt.c1);
302template <
typename Eval,
typename WolfeT,
typename Option>
308template <
typename Option>
311 return std::abs(dir_deriv_next) <= (opt.c2 * std::abs(dir_deriv_init));
314template <
typename Eval,
typename WolfeT,
typename Option>
375 return "ConvergedGradient";
377 return "ConvergedObjective";
379 return "ConvergedObjectiveAndGradient";
381 return "IntervalTooSmall";
383 return "StepTooSmall";
385 return "ReachedMaxStep";
387 return "NumericalIssue";
410 inline const auto&
obj()
const {
return obj_; }
412 inline const auto&
dir()
const {
return dir_; }
436 template <
typename ObjFun,
typename ThetaGradF,
typename Theta0>
437 WolfeData(ObjFun&& obj_fun,
const Eigen::VectorXd&
a,
const Theta0& theta0,
438 ThetaGradF&& theta_grad_f)
444 template <
typename ObjFun,
typename ThetaGradF,
typename Theta0>
445 WolfeData(ObjFun&& obj_fun, Eigen::Index n,
const Theta0& theta0,
446 ThetaGradF&& theta_grad_f)
453 template <
typename LLFun,
typename LLArgs,
typename Msgs>
454 WolfeData(Eigen::Index n,
const LLFun& ll_fun,
const LLArgs& ll_args,
456 :
WolfeData(n,
Eigen::VectorXd::Zero(n), ll_fun, ll_args, msgs) {}
483 inline auto&
a() & {
return a_; }
484 inline const auto&
a() const& {
return a_; }
485 inline auto&&
a() && {
return std::move(
a_); }
518 template <
typename ObjFun,
typename Theta0,
typename AInit,
520 WolfeInfo(ObjFun&& obj_fun, AInit&& a_init, Theta0&& theta0,
521 ThetaGradF&& theta_grad_f)
522 :
curr_(
std::forward<ObjFun>(obj_fun),
std::forward<AInit>(a_init),
523 std::forward<Theta0>(theta0),
524 std::forward<ThetaGradF>(theta_grad_f)),
528 throw std::domain_error(
529 "laplace_marginal_density: log likelihood is not finite at initial "
530 "theta and likelihood arguments.");
547 if (this->init_dir_ < 0) {
549 this->init_dir_ *= -1;
595template <
typename Update,
typename Proposal,
typename Curr,
typename Prev,
596 typename Eval,
typename P,
typename Backoff>
598 Prev&& prev,
Eval&
eval, P&& p, Backoff&& backoff) {
600 auto res = update(proposal, curr, prev,
eval, p);
604 if (!backoff(
eval)) {
716template <
typename Info,
typename UpdateFun,
typename Options,
typename Stream>
718 Options&& opt, Stream* msgs) {
719 auto& curr = wolfe_info.curr_;
720 auto& prev = wolfe_info.prev_;
721 auto& scratch = wolfe_info.scratch_;
722 auto&& p = wolfe_info.p_;
723 auto&& dir_deriv_init = wolfe_info.init_dir_;
724 Eval low{0.0, prev.
obj(), dir_deriv_init};
725 prev.dir() = dir_deriv_init;
726 int total_updates = 0;
728 auto update_with_tick = [&total_updates, &opt, &best, &update_fun](
729 auto&& proposal,
auto&& curr,
auto&& prev,
731 const bool over_budget = total_updates > opt.max_iterations;
735 update_fun(proposal, curr, prev, best, p);
736 curr.update(proposal, best);
745 update_fun(proposal, curr, prev,
e, p);
751 = std::clamp(curr.alpha() * opt.scale_up, opt.min_alpha, opt.max_alpha);
752 Eval high{alpha_start, curr.
obj(), dir_deriv_init};
756 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
760 if (high.alpha() < opt.min_alpha) {
770 high.
alpha() *= opt.scale_up;
771 if (high.alpha() > opt.max_alpha) {
774 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
779 wolfe_check = update_with_tick(scratch, curr, prev, best, p);
783 curr.update(scratch, best);
786 if (best.
obj() < high.obj()) {
792 int num_backtracks = 0;
803 while (high.alpha() < opt.max_alpha) {
806 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
812 curr.update(scratch, high);
816 if (best.
obj() < high.obj()) {
819 if (high.dir() > 0) {
821 high.
alpha() *= opt.scale_up;
828 const double grad_tol
829 = std::max(opt.abs_grad_threshold,
830 opt.rel_grad_threshold * std::abs(dir_deriv_init));
832 = std::max(opt.abs_obj_threshold,
833 opt.rel_obj_threshold * (1.0 + std::abs(prev.obj())));
834 auto check_bounds = [&](
auto&& curr_eval) {
836 const bool slope_check = std::abs(curr_eval.dir()) <= grad_tol;
838 const bool obj_check = std::abs(curr_eval.obj() - prev.obj()) <= obj_tol;
840 const bool alpha_check = curr_eval.alpha() < opt.min_alpha;
841 if (slope_check || obj_check || alpha_check) {
843 = curr_eval.obj() != low.obj() &&
check_armijo(curr_eval, prev, opt);
844 if (slope_check && obj_check) {
846 total_updates, num_backtracks, step_ok};
847 }
else if (slope_check) {
849 num_backtracks, step_ok};
850 }
else if (obj_check) {
852 num_backtracks, step_ok};
855 num_backtracks, step_ok};
861 auto check_b = check_bounds(high);
863 if (check_b.accept_) {
864 curr.update(scratch, high);
868 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
882 while ((high.alpha() - low.alpha() > opt.min_alpha)
883 && high.alpha() > opt.min_alpha) {
885 const bool have_sign_change = (low.dir() * high.dir() < 0);
886 const bool high_armijo_ok =
check_armijo(high, prev, opt);
887 const bool use_cubic = have_sign_change && high_armijo_ok;
893 alpha_mid = 0.5 * (low.alpha() + high.alpha());
895 if (alpha_mid <= opt.min_alpha) {
898 Eval mid{alpha_mid, 0.0, 0.0};
899 auto wolfe_check = update_with_tick(scratch, curr, prev, mid, p);
903 if (mid.alpha() <= opt.min_alpha) {
905 num_backtracks,
false};
909 curr.update(scratch, mid);
914 if (mid.obj() > best.
obj()) {
917 if (mid.obj() > low.obj()) {
932 auto bounds_check = check_bounds(mid);
934 if (bounds_check.accept_) {
935 curr.update(scratch, mid);
942 const bool armijo_ok_best =
check_armijo(best, prev, opt);
943 if (armijo_ok_best) {
944 wolfe_check = update_with_tick(scratch, curr, prev, best, p);
945 curr.update(scratch, best);
int64_t size(const T &m)
Returns the size (number of the elements) of a matrix_cl or var_value<matrix_cl<T>>.
(Expert) Numerical traits for algorithmic differentiation variables.
WolfeStatus wolfe_line_search(Info &wolfe_info, UpdateFun &&update_fun, Options &&opt, Stream *msgs)
Strong Wolfe line search for maximization.
@ ConvergedObjectiveAndGradient
auto check_wolfe_curve(double dir_deriv_next, double dir_deriv_init, Option &&opt)
auto wolfe_status_str(WolfeStatus s)
Helper function for pretty printing.
auto check_armijo(double obj_next, double obj_init, double alpha_next, double dir0, Option &&opt)
Scalar cubic_spline(Scalar x_left, Scalar f_left, Scalar df_left, Scalar x_right, Scalar f_right, Scalar df_right) noexcept
Selects a safeguarded trial point for maximizing a scalar function on a line.
bool check_wolfe(const Eval &eval, const WolfeT &prev, const Option &opt)
auto retry_evaluate(Update &&update, Proposal &&proposal, Curr &&curr, Prev &&prev, Eval &eval, P &&p, Backoff &&backoff)
Retry evaluation of a step until it passes a validity check.
static constexpr double e()
Return the base of the natural logarithm.
T eval(T &&arg)
Inputs which have a plain_type equal to the own time are forwarded unmodified (for Eigen expressions ...
Matrices and templated mathematical functions.
const auto & alpha() const
constexpr Eval(double alpha, double obj, double dir)
evaluation struct for Wolfe line search
void update(WolfeData &other, const Eval &eval)
void update(WolfeData &other)
const auto & dir() const &
const auto & theta() const &
WolfeData(Eigen::Index n)
WolfeData(ObjFun &&obj_fun, Eigen::Index n, const Theta0 &theta0, ThetaGradF &&theta_grad_f)
WolfeData(Eigen::Index n, const LLFun &ll_fun, const LLArgs &ll_args, const Msgs &msgs)
const auto & alpha() const &
WolfeData(ObjFun &&obj_fun, const Eigen::VectorXd &a, const Theta0 &theta0, ThetaGradF &&theta_grad_f)
const auto & obj() const &
void swap(WolfeData &other)
Eigen::VectorXd theta_grad_
const auto & theta_grad() const &
Data used in current evaluation of wolfe line search at a particular stepsize.
const auto & scratch() const &
const auto & prev() const &
WolfeInfo(Eigen::Index n)
const auto & curr() const &
WolfeInfo(WolfeData &&curr, WolfeData &&prev)
WolfeInfo(ObjFun &&obj_fun, AInit &&a_init, Theta0 &&theta0, ThetaGradF &&theta_grad_f)
Construct WolfeInfo with a consistent (a_init, theta_init) pair.
Data object used in wolfe line search.
WolfeStatus(WolfeReturn stop, int evals, int back)
WolfeStatus(WolfeReturn stop, int evals, int back, bool success)
Struct to hold the result status of the Wolfe line search.
double rel_grad_threshold
constexpr laplace_line_search_options(int max_iter)
double tau
Backtracking shrinkage factor.
double min_alpha
Minimum allowable step size.
double abs_grad_threshold
Absolute gradient tolerance.
double scale_up
Step size expansion factor.
double c2
Curvature condition parameter.
constexpr laplace_line_search_options()=default
double c1
Armijo condition parameter (sufficient decrease).
double max_alpha
Maximum allowable step size.
double abs_obj_threshold
Absolute objective tolerance.
Options for Wolfe line search during optimization.