lbfgs.h 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773
  1. #ifndef LBFGS_HPP
  2. #define LBFGS_HPP
  3. #include <Eigen/Eigen>
  4. #include <cmath>
  5. #include <algorithm>
  6. #include <time.h>
  7. #include <chrono>
  8. namespace lbfgs
  9. {
  10. // ----------------------- Data Type Part -----------------------
  11. /**
  12. * L-BFGS optimization parameters.
  13. */
  14. struct lbfgs_parameter_t
  15. {
  16. /**
  17. * The number of corrections to approximate the inverse hessian matrix.
  18. * The L-BFGS routine stores the computation results of previous m
  19. * iterations to approximate the inverse hessian matrix of the current
  20. * iteration. This parameter controls the size of the limited memories
  21. * (corrections). The default value is 8. Values less than 3 are
  22. * not recommended. Large values will result in excessive computing time.
  23. */
  24. int mem_size = 8;
  25. /**
  26. * Epsilon for grad convergence test. DO NOT USE IT in nonsmooth cases!
  27. * Set it to 0.0 and use past-delta-based test for nonsmooth functions.
  28. * This parameter determines the accuracy with which the solution is to
  29. * be found. A minimization terminates when
  30. * ||g(x)||_inf / max(1, ||x||_inf) < g_epsilon,
  31. * where ||.||_inf is the infinity norm. The default value is 0.0.
  32. * This should be greater than 1.0e-6 in practice because L-BFGS does
  33. * not directly reduce first-order residual. It still needs the function
  34. * value which can be corrupted by machine_prec when ||g|| is small.
  35. */
  36. double g_epsilon = 0.0;
  37. /**
  38. * Distance for delta-based convergence test.
  39. * This parameter determines the distance, in iterations, to compute
  40. * the rate of decrease of the cost function. If the value of this
  41. * parameter is zero, the library does not perform the delta-based
  42. * convergence test. The default value is 3.
  43. */
  44. int past = 3;
  45. /**
  46. * Delta for convergence test.
  47. * This parameter determines the minimum rate of decrease of the
  48. * cost function. The library stops iterations when the following
  49. * condition is met:
  50. * |f' - f| / max(1, |f|) < delta,
  51. * where f' is the cost value of past iterations ago, and f is
  52. * the cost value of the current iteration.
  53. * The default value is 1.0e-6.
  54. */
  55. double delta = 1.0e-6;
  56. /**
  57. * The maximum number of iterations.
  58. * The lbfgs_optimize() function terminates an minimization process with
  59. * ::LBFGSERR_MAXIMUMITERATION status code when the iteration count
  60. * exceedes this parameter. Setting this parameter to zero continues an
  61. * minimization process until a convergence or error. The default value
  62. * is 0.
  63. */
  64. int max_iterations = 0;
  65. /**
  66. * The maximum number of trials for the line search.
  67. * This parameter controls the number of function and gradients evaluations
  68. * per iteration for the line search routine. The default value is 64.
  69. */
  70. int max_linesearch = 64;
  71. /**
  72. * The minimum step of the line search routine.
  73. * The default value is 1.0e-20. This value need not be modified unless
  74. * the exponents are too large for the machine being used, or unless the
  75. * problem is extremely badly scaled (in which case the exponents should
  76. * be increased).
  77. */
  78. double min_step = 1.0e-20;
  79. /**
  80. * The maximum step of the line search.
  81. * The default value is 1.0e+20. This value need not be modified unless
  82. * the exponents are too large for the machine being used, or unless the
  83. * problem is extremely badly scaled (in which case the exponents should
  84. * be increased).
  85. */
  86. double max_step = 1.0e+20;
  87. /**
  88. * A parameter to control the accuracy of the line search routine.
  89. * The default value is 1.0e-4. This parameter should be greater
  90. * than zero and smaller than 1.0.
  91. */
  92. double f_dec_coeff = 1.0e-4;
  93. /**
  94. * A parameter to control the accuracy of the line search routine.
  95. * The default value is 0.9. If the function and gradient
  96. * evaluations are inexpensive with respect to the cost of the
  97. * iteration (which is sometimes the case when solving very large
  98. * problems) it may be advantageous to set this parameter to a small
  99. * value. A typical small value is 0.1. This parameter should be
  100. * greater than the f_dec_coeff parameter and smaller than 1.0.
  101. */
  102. double s_curv_coeff = 0.9;
  103. /**
  104. * A parameter to ensure the global convergence for nonconvex functions.
  105. * The default value is 1.0e-6. The parameter performs the so called
  106. * cautious update for L-BFGS, especially when the convergence is
  107. * not sufficient. The parameter must be positive but might as well
  108. * be less than 1.0e-3 in practice.
  109. */
  110. double cautious_factor = 1.0e-6;
  111. /**
  112. * The machine precision for floating-point values. The default is 1.0e-16.
  113. * This parameter must be a positive value set by a client program to
  114. * estimate the machine precision.
  115. */
  116. double machine_prec = 1.0e-16;
  117. };
  118. /**
  119. * Return values of lbfgs_optimize().
  120. * Roughly speaking, a negative value indicates an error.
  121. */
  122. enum
  123. {
  124. /** L-BFGS reaches convergence. */
  125. LBFGS_CONVERGENCE = 0,
  126. /** L-BFGS satisfies stopping criteria. */
  127. LBFGS_STOP,
  128. /** The iteration has been canceled by the monitor callback. */
  129. LBFGS_CANCELED,
  130. /** Unknown error. */
  131. LBFGSERR_UNKNOWNERROR = -1024,
  132. /** Invalid number of variables specified. */
  133. LBFGSERR_INVALID_N,
  134. /** Invalid parameter lbfgs_parameter_t::mem_size specified. */
  135. LBFGSERR_INVALID_MEMSIZE,
  136. /** Invalid parameter lbfgs_parameter_t::g_epsilon specified. */
  137. LBFGSERR_INVALID_GEPSILON,
  138. /** Invalid parameter lbfgs_parameter_t::past specified. */
  139. LBFGSERR_INVALID_TESTPERIOD,
  140. /** Invalid parameter lbfgs_parameter_t::delta specified. */
  141. LBFGSERR_INVALID_DELTA,
  142. /** Invalid parameter lbfgs_parameter_t::min_step specified. */
  143. LBFGSERR_INVALID_MINSTEP,
  144. /** Invalid parameter lbfgs_parameter_t::max_step specified. */
  145. LBFGSERR_INVALID_MAXSTEP,
  146. /** Invalid parameter lbfgs_parameter_t::f_dec_coeff specified. */
  147. LBFGSERR_INVALID_FDECCOEFF,
  148. /** Invalid parameter lbfgs_parameter_t::s_curv_coeff specified. */
  149. LBFGSERR_INVALID_SCURVCOEFF,
  150. /** Invalid parameter lbfgs_parameter_t::machine_prec specified. */
  151. LBFGSERR_INVALID_MACHINEPREC,
  152. /** Invalid parameter lbfgs_parameter_t::max_linesearch specified. */
  153. LBFGSERR_INVALID_MAXLINESEARCH,
  154. /** The function value became NaN or Inf. */
  155. LBFGSERR_INVALID_FUNCVAL,
  156. /** The line-search step became smaller than lbfgs_parameter_t::min_step. */
  157. LBFGSERR_MINIMUMSTEP,
  158. /** The line-search step became larger than lbfgs_parameter_t::max_step. */
  159. LBFGSERR_MAXIMUMSTEP,
  160. /** Line search reaches the maximum, assumptions not satisfied or precision not achievable.*/
  161. /* 线搜索达到最大值,不满足假设或无法达到精度。 */
  162. LBFGSERR_MAXIMUMLINESEARCH,
  163. /** The algorithm routine reaches the maximum number of iterations. */
  164. LBFGSERR_MAXIMUMITERATION,
  165. /** Relative search interval width is at least lbfgs_parameter_t::machine_prec. */
  166. LBFGSERR_WIDTHTOOSMALL,
  167. /** A logic error (negative line-search step) occurred. */
  168. LBFGSERR_INVALIDPARAMETERS,
  169. /** The current search direction increases the cost function value. */
  170. LBFGSERR_INCREASEGRADIENT,
  171. };
  172. /**
  173. * Callback interface to provide cost function and gradient evaluations.
  174. *
  175. * The lbfgs_optimize() function call this function to obtain the values of cost
  176. * function and its gradients when needed. A client program must implement
  177. * this function to evaluate the values of the cost function and its
  178. * gradients, given current values of variables.
  179. *
  180. * @param instance The user data sent for lbfgs_optimize() function by the client.
  181. * @param x The current values of variables.
  182. * @param g The gradient vector. The callback function must compute
  183. * the gradient values for the current variables.
  184. * @retval double The value of the cost function for the current variables.
  185. */
  186. typedef double (*lbfgs_evaluate_t)(void *instance,
  187. const Eigen::VectorXd &x,
  188. Eigen::VectorXd &g);
  189. /**
  190. * Callback interface to monitor the progress of the minimization process.
  191. *
  192. * The lbfgs_optimize() function call this function for each iteration. Implementing
  193. * this function, a client program can store or display the current progress
  194. * of the minimization process. If it is not used, just set it nullptr.
  195. *
  196. * @param instance The user data sent for lbfgs_optimize() function by the client.
  197. * @param x The current values of variables.
  198. * @param g The current gradient values of variables.
  199. * @param fx The current value of the cost function.
  200. * @param step The line-search step used for this iteration.
  201. * @param k The iteration count.
  202. * @param ls The number of evaluations called for this iteration.
  203. * @retval int Zero to continue the minimization process. Returning a
  204. * non-zero value will cancel the minimization process.
  205. */
  206. typedef int (*lbfgs_progress_t)(void *instance,
  207. const Eigen::VectorXd &x,
  208. const Eigen::VectorXd &g,
  209. const double fx,
  210. const double step,
  211. const int k,
  212. const int ls);
  213. /**
  214. * Callback data struct
  215. */
  216. struct callback_data_t
  217. {
  218. void *instance = nullptr;
  219. lbfgs_evaluate_t proc_evaluate = nullptr;
  220. lbfgs_progress_t proc_progress = nullptr;
  221. };
  222. // ----------------------- L-BFGS Part -----------------------
  223. /**
  224. * Line search method for smooth or nonsmooth functions.
  225. * This function performs line search to find a point that satisfy
  226. * both the Armijo condition and the weak Wolfe condition. It is
  227. * as robust as the backtracking line search but further applies
  228. * to continuous and piecewise smooth functions where the strong
  229. * Wolfe condition usually does not hold.
  230. *
  231. * @see
  232. * Adrian S. Lewis and Michael L. Overton. Nonsmooth optimization
  233. * via quasi-Newton methods. Mathematical Programming, Vol 141,
  234. * No 1, pp. 135-163, 2013.
  235. */
  236. inline int line_search_lewisoverton(Eigen::VectorXd &x,
  237. double &f,
  238. Eigen::VectorXd &g,
  239. double &stp,
  240. const Eigen::VectorXd &s,
  241. const Eigen::VectorXd &xp,
  242. const Eigen::VectorXd &gp,
  243. const double stpmin,
  244. const double stpmax,
  245. const callback_data_t &cd,
  246. const lbfgs_parameter_t &param)
  247. {
  248. // x is the decision variable vector
  249. // f is function value at x
  250. // g is the gradient value at x
  251. // stp is the initial stepsize for line search
  252. // s is the search direction vector
  253. // xp is the decision variable vector at the current iteration
  254. // gp is the gradient vector at the current iteration
  255. // stpmin is the minimum allowable stepsize
  256. // stpmax is the maximum allowable stepsize
  257. // the struct param contains all necessary parameters
  258. // the cd contains all necessary callback function
  259. // x 是决策变量向量
  260. // f 是 x 处的函数值
  261. // g 是 x 处的梯度值
  262. // stp 是线搜索的初始步长
  263. // s 是搜索方向向量
  264. // xp 是当前迭代的决策变量向量
  265. // gp 是当前迭代的梯度向量
  266. // stpmin 是允许的最小步长
  267. // stpmax 是最大允许步长
  268. // struct param 包含所有必需的参数
  269. // cd 包含所有必需的回调函数
  270. // eg. x = xp; f = cd.proc_evaluate(cd.instance, x, g);
  271. // the above line assigns x with xp and computes the function and grad at x
  272. // 上面的行将 x 指定为 xp 并计算 x 处的函数和梯度
  273. // note the output x, f and g which satisfy the weak wolfe condition when the function returns
  274. // 注意函数返回时满足弱沃尔夫条件的输出 x、f 和 g
  275. //////////////////////////// HOMEWORK 1 START ////////////////////////////
  276. // PUT YOUR CODE FOR Lewis-Overton line search here
  277. int linesearch_cnt = 0; // 线搜索计次
  278. double step_l = 0.0; // l
  279. double step_u = stpmax; // u
  280. const double fp = cd.proc_evaluate(cd.instance, xp, g); // 当前 x 处的代价值
  281. const double d_delta = gp.dot(s); // 点积
  282. while (linesearch_cnt < param.max_linesearch)
  283. {
  284. ++linesearch_cnt;
  285. // std::cout << "xp = " << std::endl
  286. // << xp << std::endl;
  287. // std::cout << "stp = " << stp << std::endl;
  288. // std::cout << "s = " << std::endl
  289. // << s << std::endl;
  290. x = xp + stp * s; // 按照方向 x 和 步长 step 搜索得到的下一组决策变量
  291. // std::cout << "x = " << std::endl
  292. // << x << std::endl;
  293. if (x.minCoeff() < 0)
  294. {
  295. // std::cout << "x < 0 =======" << std::endl;
  296. x = xp;
  297. // continue;
  298. }
  299. f = cd.proc_evaluate(cd.instance, x, g); // 下一组决策变量的代价值
  300. if (f > fp + param.f_dec_coeff * stp * d_delta)
  301. stp = stp / 2;
  302. else
  303. return linesearch_cnt;
  304. if (stp < 0.6)
  305. return linesearch_cnt; // 返回线搜索次数
  306. }
  307. return LBFGSERR_MAXIMUMLINESEARCH; // -1009 是这里return的
  308. //////////////////////////// HOMEWORK 1 END ////////////////////////////
  309. }
  310. /**
  311. * Start a L-BFGS optimization.
  312. * Assumptions: 1. f(x) is either C2 or C0 but piecewise C2;
  313. * 2. f(x) is lower bounded;
  314. * 3. f(x) has bounded level sets;
  315. * 4. g(x) is either the gradient or subgradient;
  316. * 5. The gradient exists at the initial guess x0.
  317. * A user must implement a function compatible with ::lbfgs_evaluate_t (evaluation
  318. * callback) and pass the pointer to the callback function to lbfgs_optimize()
  319. * arguments. Similarly, a user can implement a function compatible with
  320. * ::lbfgs_progress_t (progress callback) to obtain the current progress
  321. * (e.g., variables, function, and gradient, etc) and to cancel the iteration
  322. * process if necessary. Implementation of the stepbound and the progress callback
  323. * is optional: a user can pass nullptr if progress notification is not necessary.
  324. *
  325. *
  326. * @param x The vector of decision variables.
  327. * THE INITIAL GUESS x0 SHOULD BE SET BEFORE THE CALL!
  328. * A client program can receive decision variables
  329. * through this vector, at which the cost and its
  330. * gradient are queried during minimization.
  331. * @param f The ref to the variable that receives the final
  332. * value of the cost function for the variables.
  333. * @param proc_evaluate The callback function to provide function f(x) and
  334. * gradient g(x) evaluations given a current values of
  335. * variables x. A client program must implement a
  336. * callback function compatible with lbfgs_evaluate_t
  337. * and pass the pointer to the callback function.
  338. * @param proc_stepbound The callback function to provide values of the
  339. * upperbound of the stepsize to search in, provided
  340. * with the beginning values of variables before the
  341. * line search, and the current step vector (can be
  342. * negative gradient). A client program can implement
  343. * this function for more efficient linesearch. If it is
  344. * not used, just set it nullptr.
  345. * @param proc_progress The callback function to receive the progress
  346. * (the number of iterations, the current value of
  347. * the cost function) of the minimization
  348. * process. This argument can be set to nullptr if
  349. * a progress report is unnecessary.
  350. * @param instance A user data pointer for client programs. The callback
  351. * functions will receive the value of this argument.
  352. * @param param The parameters for L-BFGS optimization.
  353. * @retval int The status code. This function returns a nonnegative
  354. * integer if the minimization process terminates without
  355. * an error. A negative integer indicates an error.
  356. */
  357. inline int lbfgs_optimize(Eigen::VectorXd &x, // 决策变量
  358. double &f, // 最小代价值
  359. lbfgs_evaluate_t proc_evaluate, // 代价函数
  360. lbfgs_progress_t proc_progress, // 空
  361. void *instance, // this
  362. const lbfgs_parameter_t &param) // 参数
  363. {
  364. int ret, i, j, k, ls, end, bound;
  365. double step, step_min, step_max, fx, ys, yy;
  366. double gnorm_inf, xnorm_inf, beta, rate, cau;
  367. const int n = x.size();
  368. const int m = param.mem_size;
  369. /* Check the input parameters for errors. */
  370. if (n <= 0)
  371. {
  372. return LBFGSERR_INVALID_N;
  373. }
  374. if (m <= 0)
  375. {
  376. return LBFGSERR_INVALID_MEMSIZE;
  377. }
  378. if (param.g_epsilon < 0.0)
  379. {
  380. return LBFGSERR_INVALID_GEPSILON;
  381. }
  382. if (param.past < 0)
  383. {
  384. return LBFGSERR_INVALID_TESTPERIOD;
  385. }
  386. if (param.delta < 0.0)
  387. {
  388. return LBFGSERR_INVALID_DELTA;
  389. }
  390. if (param.min_step < 0.0)
  391. {
  392. return LBFGSERR_INVALID_MINSTEP;
  393. }
  394. if (param.max_step < param.min_step)
  395. {
  396. return LBFGSERR_INVALID_MAXSTEP;
  397. }
  398. if (!(param.f_dec_coeff > 0.0 &&
  399. param.f_dec_coeff < 1.0))
  400. {
  401. return LBFGSERR_INVALID_FDECCOEFF;
  402. }
  403. if (!(param.s_curv_coeff < 1.0 &&
  404. param.s_curv_coeff > param.f_dec_coeff))
  405. {
  406. return LBFGSERR_INVALID_SCURVCOEFF;
  407. }
  408. if (!(param.machine_prec > 0.0))
  409. {
  410. return LBFGSERR_INVALID_MACHINEPREC;
  411. }
  412. if (param.max_linesearch <= 0)
  413. {
  414. return LBFGSERR_INVALID_MAXLINESEARCH;
  415. }
  416. /* Prepare intermediate variables. */
  417. Eigen::VectorXd xp(n);
  418. Eigen::VectorXd g(n);
  419. Eigen::VectorXd gp(n);
  420. Eigen::VectorXd d(n);
  421. Eigen::VectorXd pf(std::max(1, param.past));
  422. /* Initialize the limited memory. */
  423. Eigen::VectorXd lm_alpha = Eigen::VectorXd::Zero(m);
  424. Eigen::MatrixXd lm_s = Eigen::MatrixXd::Zero(n, m);
  425. Eigen::MatrixXd lm_y = Eigen::MatrixXd::Zero(n, m);
  426. Eigen::VectorXd lm_ys = Eigen::VectorXd::Zero(m);
  427. /* Construct a callback data. */
  428. callback_data_t cd;
  429. cd.instance = instance;
  430. cd.proc_evaluate = proc_evaluate;
  431. cd.proc_progress = proc_progress;
  432. /* Evaluate the function value and its gradient. */
  433. fx = cd.proc_evaluate(cd.instance, x, g);
  434. /* Store the initial value of the cost function. */
  435. pf(0) = fx;
  436. /*
  437. Compute the direction;
  438. we assume the initial hessian matrix H_0 as the identity matrix.
  439. */
  440. d = -g;
  441. /*
  442. Make sure that the initial variables are not a stationary point.
  443. */
  444. gnorm_inf = g.cwiseAbs().maxCoeff();
  445. xnorm_inf = x.cwiseAbs().maxCoeff();
  446. if (gnorm_inf / std::max(1.0, xnorm_inf) < param.g_epsilon)
  447. {
  448. /* The initial guess is already a stationary point. */
  449. ret = LBFGS_CONVERGENCE;
  450. }
  451. else
  452. {
  453. /*
  454. Compute the initial step:
  455. */
  456. step = 1.0 / d.norm();
  457. k = 1;
  458. end = 0;
  459. bound = 0;
  460. auto LBFGS_start = std::chrono::high_resolution_clock::now();
  461. auto LBFGS_NOW = std::chrono::high_resolution_clock::now();
  462. std::chrono::duration<double, std::milli> LBFGS_duration;
  463. while (true)
  464. {
  465. LBFGS_NOW = std::chrono::high_resolution_clock::now();
  466. LBFGS_duration = LBFGS_NOW - LBFGS_start;
  467. if(LBFGS_duration.count() > 30){ // 自己加的 效果好像ok喔,小论文这个就不写了,大论文感觉可以提一下
  468. ret = 95432;
  469. break; // 直接暴力解决了有时候求解时间特别长的问题!
  470. }
  471. /* Store the current position and gradient vectors. */
  472. xp = x;
  473. gp = g;
  474. /* If the step bound can be provied dynamically, then apply it. */
  475. step_min = param.min_step;
  476. step_max = param.max_step;
  477. /* Search for an optimal step. */
  478. // ls = line_search_lewisoverton(x, fx, g, step, d, xp, gp, step_min, step_max, cd, param);
  479. // auto start_time = std::chrono::high_resolution_clock::now();
  480. ls = line_search_lewisoverton(x, fx, g, step, d, xp, gp, step_min, step_max, cd, param);
  481. // auto end_time = std::chrono::high_resolution_clock::now();
  482. // auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
  483. // std::cout << ls << " ";
  484. // if (ls != 1)
  485. // ROS_INFO("Slept for %lld microseconds, line_search_times = %d", duration.count(), ls);
  486. if (ls < 0)
  487. {
  488. /* Revert to the previous point. */
  489. x = xp;
  490. g = gp;
  491. ret = ls;
  492. break;
  493. }
  494. /* Report the progress. */
  495. if (cd.proc_progress)
  496. {
  497. if (cd.proc_progress(cd.instance, x, g, fx, step, k, ls))
  498. {
  499. ret = LBFGS_CANCELED;
  500. break;
  501. }
  502. }
  503. /*
  504. Convergence test.
  505. The criterion is given by the following formula:
  506. ||g(x)||_inf / max(1, ||x||_inf) < g_epsilon
  507. */
  508. gnorm_inf = g.cwiseAbs().maxCoeff();
  509. xnorm_inf = x.cwiseAbs().maxCoeff();
  510. if (gnorm_inf / std::max(1.0, xnorm_inf) < param.g_epsilon)
  511. {
  512. /* Convergence. */
  513. ret = LBFGS_CONVERGENCE;
  514. break;
  515. }
  516. /*
  517. Test for stopping criterion.
  518. The criterion is given by the following formula:
  519. |f(past_x) - f(x)| / max(1, |f(x)|) < \delta.
  520. */
  521. if (0 < param.past)
  522. {
  523. /* We don't test the stopping criterion while k < past. */
  524. if (param.past <= k)
  525. {
  526. /* The stopping criterion. */
  527. rate = std::fabs(pf(k % param.past) - fx) / std::max(1.0, std::fabs(fx));
  528. if (rate < param.delta)
  529. {
  530. ret = LBFGS_STOP;
  531. break;
  532. }
  533. }
  534. /* Store the current value of the cost function. */
  535. pf(k % param.past) = fx;
  536. }
  537. if (param.max_iterations != 0 && param.max_iterations <= k)
  538. {
  539. /* Maximum number of iterations. */
  540. ret = LBFGSERR_MAXIMUMITERATION;
  541. break;
  542. }
  543. /* Count the iteration number. */
  544. ++k;
  545. // std::cout << "k = " << k << std::endl;
  546. /*
  547. Update vectors s and y:
  548. s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}.
  549. y_{k+1} = g_{k+1} - g_{k}.
  550. */
  551. lm_s.col(end) = x - xp;
  552. lm_y.col(end) = g - gp;
  553. /*
  554. Compute scalars ys and yy:
  555. ys = y^t \cdot s = 1 / \rho.
  556. yy = y^t \cdot y.
  557. Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor).
  558. */
  559. ys = lm_y.col(end).dot(lm_s.col(end));
  560. yy = lm_y.col(end).squaredNorm();
  561. lm_ys(end) = ys;
  562. /* Compute the negative of gradients. */
  563. d = -g;
  564. /*
  565. Only cautious update is performed here as long as
  566. (y^t \cdot s) / ||s_{k+1}||^2 > \epsilon * ||g_{k}||^\alpha,
  567. where \epsilon is the cautious factor and a proposed value
  568. for \alpha is 1.
  569. This is not for enforcing the PD of the approxomated Hessian
  570. since ys > 0 is already ensured by the weak Wolfe condition.
  571. This is to ensure the global convergence as described in:
  572. Dong-Hui Li and Masao Fukushima. On the global convergence of
  573. the BFGS method for nonconvex unconstrained optimization problems.
  574. SIAM Journal on Optimization, Vol 11, No 4, pp. 1054-1064, 2011.
  575. */
  576. cau = lm_s.col(end).squaredNorm() * gp.norm() * param.cautious_factor;
  577. if (ys > cau)
  578. {
  579. /*
  580. Recursive formula to compute dir = -(H \cdot g).
  581. This is described in page 779 of:
  582. Jorge Nocedal.
  583. Updating Quasi-Newton Matrices with Limited Storage.
  584. Mathematics of Computation, Vol. 35, No. 151,
  585. pp. 773--782, 1980.
  586. */
  587. ++bound;
  588. bound = m < bound ? m : bound;
  589. end = (end + 1) % m;
  590. j = end;
  591. for (i = 0; i < bound; ++i)
  592. {
  593. j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */
  594. /* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */
  595. lm_alpha(j) = lm_s.col(j).dot(d) / lm_ys(j);
  596. /* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */
  597. d += (-lm_alpha(j)) * lm_y.col(j);
  598. }
  599. d *= ys / yy;
  600. for (i = 0; i < bound; ++i)
  601. {
  602. /* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamm_{i}. */
  603. beta = lm_y.col(j).dot(d) / lm_ys(j);
  604. /* \gamm_{i+1} = \gamm_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */
  605. d += (lm_alpha(j) - beta) * lm_s.col(j);
  606. j = (j + 1) % m; /* if (++j == m) j = 0; */
  607. }
  608. }
  609. /* The search direction d is ready. We try step = 1 first. */
  610. step = 1.0;
  611. }
  612. int asdfd = 0;
  613. }
  614. /* Return the final value of the cost function. */
  615. f = fx;
  616. return ret;
  617. }
  618. /**
  619. * Get string description of an lbfgs_optimize() return code.
  620. *
  621. * @param err A value returned by lbfgs_optimize().
  622. */
  623. inline const char *lbfgs_strerror(const int err)
  624. {
  625. switch (err)
  626. {
  627. case LBFGS_CONVERGENCE:
  628. return "Success: reached convergence (g_epsilon).";
  629. case LBFGS_STOP:
  630. return "Success: met stopping criteria (past f decrease less than delta).";
  631. case LBFGS_CANCELED:
  632. return "The iteration has been canceled by the monitor callback.";
  633. case LBFGSERR_UNKNOWNERROR:
  634. return "Unknown error.";
  635. case LBFGSERR_INVALID_N:
  636. return "Invalid number of variables specified.";
  637. case LBFGSERR_INVALID_MEMSIZE:
  638. return "Invalid parameter lbfgs_parameter_t::mem_size specified.";
  639. case LBFGSERR_INVALID_GEPSILON:
  640. return "Invalid parameter lbfgs_parameter_t::g_epsilon specified.";
  641. case LBFGSERR_INVALID_TESTPERIOD:
  642. return "Invalid parameter lbfgs_parameter_t::past specified.";
  643. case LBFGSERR_INVALID_DELTA:
  644. return "Invalid parameter lbfgs_parameter_t::delta specified.";
  645. case LBFGSERR_INVALID_MINSTEP:
  646. return "Invalid parameter lbfgs_parameter_t::min_step specified.";
  647. case LBFGSERR_INVALID_MAXSTEP:
  648. return "Invalid parameter lbfgs_parameter_t::max_step specified.";
  649. case LBFGSERR_INVALID_FDECCOEFF:
  650. return "Invalid parameter lbfgs_parameter_t::f_dec_coeff specified.";
  651. case LBFGSERR_INVALID_SCURVCOEFF:
  652. return "Invalid parameter lbfgs_parameter_t::s_curv_coeff specified.";
  653. case LBFGSERR_INVALID_MACHINEPREC:
  654. return "Invalid parameter lbfgs_parameter_t::machine_prec specified.";
  655. case LBFGSERR_INVALID_MAXLINESEARCH:
  656. return "Invalid parameter lbfgs_parameter_t::max_linesearch specified.";
  657. case LBFGSERR_INVALID_FUNCVAL:
  658. return "The function value became NaN or Inf.";
  659. case LBFGSERR_MINIMUMSTEP:
  660. return "The line-search step became smaller than lbfgs_parameter_t::min_step.";
  661. case LBFGSERR_MAXIMUMSTEP:
  662. return "The line-search step became larger than lbfgs_parameter_t::max_step.";
  663. case LBFGSERR_MAXIMUMLINESEARCH:
  664. return "Line search reaches the maximum try number, assumptions not satisfied or precision not achievable.";
  665. case LBFGSERR_MAXIMUMITERATION:
  666. return "The algorithm routine reaches the maximum number of iterations.";
  667. case LBFGSERR_WIDTHTOOSMALL:
  668. return "Relative search interval width is at least lbfgs_parameter_t::machine_prec.";
  669. case LBFGSERR_INVALIDPARAMETERS:
  670. return "A logic error (negative line-search step) occurred.";
  671. case LBFGSERR_INCREASEGRADIENT:
  672. return "The current search direction increases the cost function value.";
  673. default:
  674. return "(unknown)";
  675. }
  676. }
  677. } // namespace lbfgs
  678. #endif