1#ifndef STAN_MATH_MIX_FUNCTOR_WOLFE_LINE_SEARCH_HPP
2#define STAN_MATH_MIX_FUNCTOR_WOLFE_LINE_SEARCH_HPP
155template <
typename Scalar>
157 Scalar df_left, Scalar x_right,
159 Scalar df_right)
noexcept {
160 const Scalar midpoint = (x_left + x_right) / Scalar(2);
163 if (!(x_right > x_left) || !std::isfinite(f_left) || !std::isfinite(f_right)
164 || !std::isfinite(df_left) || !std::isfinite(df_right)) {
168 const Scalar width = x_right - x_left;
169 const Scalar eps = std::numeric_limits<Scalar>::epsilon();
174 = std::max(std::max(std::abs(x_left), std::abs(x_right)), Scalar(1));
175 if (width <= eps * x_scale) {
181 const Scalar df_left_s = width * df_left;
182 const Scalar df_right_s = width * df_right;
187 const Scalar a0 = f_left;
188 const Scalar a1 = df_left_s;
190 = Scalar(3) * (f_right - f_left) - Scalar(2) * df_left_s - df_right_s;
191 const Scalar a3 = Scalar(2) * (f_left - f_right) + df_left_s + df_right_s;
193 auto eval = [&](Scalar s) -> Scalar {
195 return ((a3 * s + a2) * s + a1) * s + a0;
200 constexpr Scalar edge_guard = Scalar(1
e-9);
203 Scalar best_val =
eval(0.5);
204 auto consider = [&](Scalar s) {
205 if (!std::isfinite(s)) {
208 if (!(s > edge_guard && s < Scalar(1) - edge_guard)) {
211 const Scalar value =
eval(s);
212 if (value > best_val) {
220 const Scalar denom = df_left_s - df_right_s;
221 const Scalar deriv_scale = std::max(
222 std::max(std::abs(df_left_s), std::abs(df_right_s)), Scalar(1));
223 if (std::abs(denom) > eps * deriv_scale) {
224 const Scalar s_secant
233 const Scalar A = Scalar(3) * a3;
234 const Scalar B = Scalar(2) * a2;
238 = std::max(std::max(std::abs(B), std::abs(C)), Scalar(1));
239 const Scalar A_tol = eps * scale;
241 if (std::abs(A) <= A_tol) {
243 const Scalar B_tol = eps * scale;
244 if (std::abs(B) > B_tol) {
249 Scalar disc = std::fma(-Scalar(4) * A, C, B * B);
250 const Scalar disc_scale
251 = std::max(B * B + std::abs(Scalar(4) * A * C), Scalar(1));
252 const Scalar disc_tol = Scalar(10) * eps * disc_scale;
255 if (disc < Scalar(0) && -disc <= disc_tol) {
259 if (disc >= Scalar(0)) {
260 const Scalar r = std::sqrt(disc);
261 const Scalar q = -Scalar(0.5) * (B + std::copysign(r, B));
262 const Scalar q_scale = std::max(std::abs(B) + r, Scalar(1));
263 const Scalar q_tol = eps * q_scale;
265 if (std::abs(q) > q_tol) {
266 const Scalar s1 = q / A;
267 const Scalar s2 = C / q;
272 const Scalar s_vertex = -B / (Scalar(2) * A);
279 return x_left + best_s * width;
282template <
typename Eval,
typename Options>
284 auto alpha =
cubic_spline(low.alpha(), low.obj(), low.dir(), high.alpha(),
285 high.obj(), high.dir());
286 const double width = high.alpha() - low.alpha();
287 const double guard = 1
e-3 * width;
288 alpha = std::clamp(alpha, low.alpha() + guard, high.alpha() - guard);
292template <
typename Option>
293inline auto check_armijo(
double obj_next,
double obj_init,
double alpha_next,
294 double dir0, Option&& opt) {
295 return (obj_next >= obj_init)
296 && (obj_next >= obj_init + alpha_next * dir0 * opt.c1);
299template <
typename Eval,
typename WolfeT,
typename Option>
305template <
typename Option>
308 return std::abs(dir_deriv_next) <= (opt.c2 * std::abs(dir_deriv_init));
311template <
typename Eval,
typename WolfeT,
typename Option>
372 return "ConvergedGradient";
374 return "ConvergedObjective";
376 return "ConvergedObjectiveAndGradient";
378 return "IntervalTooSmall";
380 return "StepTooSmall";
382 return "ReachedMaxStep";
384 return "NumericalIssue";
407 inline const auto&
obj()
const {
return obj_; }
409 inline const auto&
dir()
const {
return dir_; }
433 template <
typename ObjFun,
typename ThetaGradF,
typename Theta0>
434 WolfeData(ObjFun&& obj_fun,
const Eigen::VectorXd&
a,
const Theta0& theta0,
435 ThetaGradF&& theta_grad_f)
441 template <
typename ObjFun,
typename ThetaGradF,
typename Theta0>
442 WolfeData(ObjFun&& obj_fun, Eigen::Index n,
const Theta0& theta0,
443 ThetaGradF&& theta_grad_f)
450 template <
typename LLFun,
typename LLArgs,
typename Msgs>
451 WolfeData(Eigen::Index n,
const LLFun& ll_fun,
const LLArgs& ll_args,
453 :
WolfeData(n,
Eigen::VectorXd::Zero(n), ll_fun, ll_args, msgs) {}
480 inline auto&
a() & {
return a_; }
481 inline const auto&
a() const& {
return a_; }
482 inline auto&&
a() && {
return std::move(
a_); }
515 template <
typename ObjFun,
typename Theta0,
typename AInit,
517 WolfeInfo(ObjFun&& obj_fun, AInit&& a_init, Theta0&& theta0,
518 ThetaGradF&& theta_grad_f)
519 :
curr_(
std::forward<ObjFun>(obj_fun),
std::forward<AInit>(a_init),
520 std::forward<Theta0>(theta0),
521 std::forward<ThetaGradF>(theta_grad_f)),
525 throw std::domain_error(
526 "laplace_marginal_density: log likelihood is not finite at initial "
527 "theta and likelihood arguments.");
544 if (this->init_dir_ < 0) {
546 this->init_dir_ *= -1;
592template <
typename Update,
typename Proposal,
typename Curr,
typename Prev,
593 typename Eval,
typename P,
typename Backoff>
595 Prev&& prev,
Eval&
eval, P&& p, Backoff&& backoff) {
597 auto res = update(proposal, curr, prev,
eval, p);
601 if (!backoff(
eval)) {
713template <
typename Info,
typename UpdateFun,
typename Options,
typename Stream>
715 Options&& opt, Stream* msgs) {
716 auto& curr = wolfe_info.curr_;
717 auto& prev = wolfe_info.prev_;
718 auto& scratch = wolfe_info.scratch_;
719 auto&& p = wolfe_info.p_;
720 auto&& dir_deriv_init = wolfe_info.init_dir_;
721 Eval low{0.0, prev.
obj(), dir_deriv_init};
722 prev.dir() = dir_deriv_init;
723 int total_updates = 0;
725 auto update_with_tick = [&total_updates, &opt, &best, &update_fun](
726 auto&& proposal,
auto&& curr,
auto&& prev,
728 const bool over_budget = total_updates > opt.max_iterations;
732 update_fun(proposal, curr, prev, best, p);
733 curr.update(proposal, best);
742 update_fun(proposal, curr, prev,
e, p);
748 = std::clamp(curr.alpha() * opt.scale_up, opt.min_alpha, opt.max_alpha);
749 Eval high{alpha_start, curr.
obj(), dir_deriv_init};
753 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
757 if (high.alpha() < opt.min_alpha) {
767 high.
alpha() *= opt.scale_up;
768 if (high.alpha() > opt.max_alpha) {
771 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
776 wolfe_check = update_with_tick(scratch, curr, prev, best, p);
780 curr.update(scratch, best);
783 if (best.
obj() < high.obj()) {
789 int num_backtracks = 0;
800 while (high.alpha() < opt.max_alpha) {
803 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
809 curr.update(scratch, high);
813 if (best.
obj() < high.obj()) {
816 if (high.dir() > 0) {
818 high.
alpha() *= opt.scale_up;
825 const double grad_tol
826 = std::max(opt.abs_grad_threshold,
827 opt.rel_grad_threshold * std::abs(dir_deriv_init));
829 = std::max(opt.abs_obj_threshold,
830 opt.rel_obj_threshold * (1.0 + std::abs(prev.obj())));
831 auto check_bounds = [&](
auto&& curr_eval) {
833 const bool slope_check = std::abs(curr_eval.dir()) <= grad_tol;
835 const bool obj_check = std::abs(curr_eval.obj() - prev.obj()) <= obj_tol;
837 const bool alpha_check = curr_eval.alpha() < opt.min_alpha;
838 if (slope_check || obj_check || alpha_check) {
840 = curr_eval.obj() != low.obj() &&
check_armijo(curr_eval, prev, opt);
841 if (slope_check && obj_check) {
843 total_updates, num_backtracks, step_ok};
844 }
else if (slope_check) {
846 num_backtracks, step_ok};
847 }
else if (obj_check) {
849 num_backtracks, step_ok};
852 num_backtracks, step_ok};
858 auto check_b = check_bounds(high);
860 if (check_b.accept_) {
861 curr.update(scratch, high);
865 wolfe_check = update_with_tick(scratch, curr, prev, high, p);
879 while ((high.alpha() - low.alpha() > opt.min_alpha)
880 && high.alpha() > opt.min_alpha) {
882 const bool have_sign_change = (low.dir() * high.dir() < 0);
883 const bool high_armijo_ok =
check_armijo(high, prev, opt);
884 const bool use_cubic = have_sign_change && high_armijo_ok;
890 alpha_mid = 0.5 * (low.alpha() + high.alpha());
892 if (alpha_mid <= opt.min_alpha) {
895 Eval mid{alpha_mid, 0.0, 0.0};
896 auto wolfe_check = update_with_tick(scratch, curr, prev, mid, p);
900 if (mid.alpha() <= opt.min_alpha) {
902 num_backtracks,
false};
906 curr.update(scratch, mid);
911 if (mid.obj() > best.
obj()) {
914 if (mid.obj() > low.obj()) {
929 auto bounds_check = check_bounds(mid);
931 if (bounds_check.accept_) {
932 curr.update(scratch, mid);
939 const bool armijo_ok_best =
check_armijo(best, prev, opt);
940 if (armijo_ok_best) {
941 wolfe_check = update_with_tick(scratch, curr, prev, best, p);
942 curr.update(scratch, best);
int64_t size(const T &m)
Returns the size (number of the elements) of a matrix_cl or var_value<matrix_cl<T>>.
(Expert) Numerical traits for algorithmic differentiation variables.
WolfeStatus wolfe_line_search(Info &wolfe_info, UpdateFun &&update_fun, Options &&opt, Stream *msgs)
Strong Wolfe line search for maximization.
@ ConvergedObjectiveAndGradient
auto check_wolfe_curve(double dir_deriv_next, double dir_deriv_init, Option &&opt)
auto wolfe_status_str(WolfeStatus s)
Helper function for pretty printing.
auto check_armijo(double obj_next, double obj_init, double alpha_next, double dir0, Option &&opt)
Scalar cubic_spline(Scalar x_left, Scalar f_left, Scalar df_left, Scalar x_right, Scalar f_right, Scalar df_right) noexcept
Selects a safeguarded trial point for maximizing a scalar function on a line.
bool check_wolfe(const Eval &eval, const WolfeT &prev, const Option &opt)
auto retry_evaluate(Update &&update, Proposal &&proposal, Curr &&curr, Prev &&prev, Eval &eval, P &&p, Backoff &&backoff)
Retry evaluation of a step until it passes a validity check.
static constexpr double e()
Return the base of the natural logarithm.
T eval(T &&arg)
Inputs which have a plain_type equal to the own time are forwarded unmodified (for Eigen expressions ...
Matrices and templated mathematical functions.
const auto & alpha() const
constexpr Eval(double alpha, double obj, double dir)
evaluation struct for Wolfe line search
void update(WolfeData &other, const Eval &eval)
void update(WolfeData &other)
const auto & dir() const &
const auto & theta() const &
WolfeData(Eigen::Index n)
WolfeData(ObjFun &&obj_fun, Eigen::Index n, const Theta0 &theta0, ThetaGradF &&theta_grad_f)
WolfeData(Eigen::Index n, const LLFun &ll_fun, const LLArgs &ll_args, const Msgs &msgs)
const auto & alpha() const &
WolfeData(ObjFun &&obj_fun, const Eigen::VectorXd &a, const Theta0 &theta0, ThetaGradF &&theta_grad_f)
const auto & obj() const &
void swap(WolfeData &other)
Eigen::VectorXd theta_grad_
const auto & theta_grad() const &
Data used in current evaluation of wolfe line search at a particular stepsize.
const auto & scratch() const &
const auto & prev() const &
WolfeInfo(Eigen::Index n)
const auto & curr() const &
WolfeInfo(WolfeData &&curr, WolfeData &&prev)
WolfeInfo(ObjFun &&obj_fun, AInit &&a_init, Theta0 &&theta0, ThetaGradF &&theta_grad_f)
Construct WolfeInfo with a consistent (a_init, theta_init) pair.
Data object used in wolfe line search.
WolfeStatus(WolfeReturn stop, int evals, int back)
WolfeStatus(WolfeReturn stop, int evals, int back, bool success)
Struct to hold the result status of the Wolfe line search.
double rel_grad_threshold
constexpr laplace_line_search_options(int max_iter)
double tau
Backtracking shrinkage factor.
double min_alpha
Minimum allowable step size.
double abs_grad_threshold
Absolute gradient tolerance.
double scale_up
Step size expansion factor.
double c2
Curvature condition parameter.
constexpr laplace_line_search_options()=default
double c1
Armijo condition parameter (sufficient decrease).
double max_alpha
Maximum allowable step size.
double abs_obj_threshold
Absolute objective tolerance.
Options for Wolfe line search during optimization.