parser.hpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629
  1. /*
  2. * Copyright (c) 2014, Peter Thorson. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions are met:
  6. * * Redistributions of source code must retain the above copyright
  7. * notice, this list of conditions and the following disclaimer.
  8. * * Redistributions in binary form must reproduce the above copyright
  9. * notice, this list of conditions and the following disclaimer in the
  10. * documentation and/or other materials provided with the distribution.
  11. * * Neither the name of the WebSocket++ Project nor the
  12. * names of its contributors may be used to endorse or promote products
  13. * derived from this software without specific prior written permission.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18. * ARE DISCLAIMED. IN NO EVENT SHALL PETER THORSON BE LIABLE FOR ANY
  19. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  20. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  21. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  22. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  24. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. *
  26. */
  27. #ifndef HTTP_PARSER_HPP
  28. #define HTTP_PARSER_HPP
  29. #include <algorithm>
  30. #include <map>
  31. #include <string>
  32. #include <utility>
  33. #include <websocketpp/utilities.hpp>
  34. #include <websocketpp/http/constants.hpp>
  35. namespace websocketpp {
  36. namespace http {
  37. namespace parser {
  38. namespace state {
  39. enum value {
  40. method,
  41. resource,
  42. version,
  43. headers
  44. };
  45. }
  46. namespace body_encoding {
  47. enum value {
  48. unknown,
  49. plain,
  50. chunked
  51. };
  52. }
  53. typedef std::map<std::string, std::string, utility::ci_less > header_list;
  54. /// Read and return the next token in the stream
  55. /**
  56. * Read until a non-token character is found and then return the token and
  57. * iterator to the next character to read
  58. *
  59. * @param begin An iterator to the beginning of the sequence
  60. * @param end An iterator to the end of the sequence
  61. * @return A pair containing the token and an iterator to the next character in
  62. * the stream
  63. */
  64. template <typename InputIterator>
  65. std::pair<std::string,InputIterator> extract_token(InputIterator begin,
  66. InputIterator end)
  67. {
  68. InputIterator it = std::find_if(begin,end,&is_not_token_char);
  69. return std::make_pair(std::string(begin,it),it);
  70. }
  71. /// Read and return the next quoted string in the stream
  72. /**
  73. * Read a double quoted string starting at `begin`. The quotes themselves are
  74. * stripped. The quoted value is returned along with an iterator to the next
  75. * character to read
  76. *
  77. * @param begin An iterator to the beginning of the sequence
  78. * @param end An iterator to the end of the sequence
  79. * @return A pair containing the string read and an iterator to the next
  80. * character in the stream
  81. */
  82. template <typename InputIterator>
  83. std::pair<std::string,InputIterator> extract_quoted_string(InputIterator begin,
  84. InputIterator end)
  85. {
  86. std::string s;
  87. if (end == begin) {
  88. return std::make_pair(s,begin);
  89. }
  90. if (*begin != '"') {
  91. return std::make_pair(s,begin);
  92. }
  93. InputIterator cursor = begin+1;
  94. InputIterator marker = cursor;
  95. cursor = std::find(cursor,end,'"');
  96. while (cursor != end) {
  97. // either this is the end or a quoted string
  98. if (*(cursor-1) == '\\') {
  99. s.append(marker,cursor-1);
  100. s.append(1,'"');
  101. ++cursor;
  102. marker = cursor;
  103. } else {
  104. s.append(marker,cursor);
  105. ++cursor;
  106. return std::make_pair(s,cursor);
  107. }
  108. cursor = std::find(cursor,end,'"');
  109. }
  110. return std::make_pair("",begin);
  111. }
  112. /// Read and discard one unit of linear whitespace
  113. /**
  114. * Read one unit of linear white space and return the iterator to the character
  115. * afterwards. If `begin` is returned, no whitespace was extracted.
  116. *
  117. * @param begin An iterator to the beginning of the sequence
  118. * @param end An iterator to the end of the sequence
  119. * @return An iterator to the character after the linear whitespace read
  120. */
  121. template <typename InputIterator>
  122. InputIterator extract_lws(InputIterator begin, InputIterator end) {
  123. InputIterator it = begin;
  124. // strip leading CRLF
  125. if (end-begin > 2 && *begin == '\r' && *(begin+1) == '\n' &&
  126. is_whitespace_char(static_cast<unsigned char>(*(begin+2))))
  127. {
  128. it+=3;
  129. }
  130. it = std::find_if(it,end,&is_not_whitespace_char);
  131. return it;
  132. }
  133. /// Read and discard linear whitespace
  134. /**
  135. * Read linear white space until a non-lws character is read and return an
  136. * iterator to that character. If `begin` is returned, no whitespace was
  137. * extracted.
  138. *
  139. * @param begin An iterator to the beginning of the sequence
  140. * @param end An iterator to the end of the sequence
  141. * @return An iterator to the character after the linear whitespace read
  142. */
  143. template <typename InputIterator>
  144. InputIterator extract_all_lws(InputIterator begin, InputIterator end) {
  145. InputIterator old_it;
  146. InputIterator new_it = begin;
  147. do {
  148. // Pull value from previous iteration
  149. old_it = new_it;
  150. // look ahead another pass
  151. new_it = extract_lws(old_it,end);
  152. } while (new_it != end && old_it != new_it);
  153. return new_it;
  154. }
  155. /// Extract HTTP attributes
  156. /**
  157. * An http attributes list is a semicolon delimited list of key value pairs in
  158. * the format: *( ";" attribute "=" value ) where attribute is a token and value
  159. * is a token or quoted string.
  160. *
  161. * Attributes extracted are appended to the supplied attributes list
  162. * `attributes`.
  163. *
  164. * @param [in] begin An iterator to the beginning of the sequence
  165. * @param [in] end An iterator to the end of the sequence
  166. * @param [out] attributes A reference to the attributes list to append
  167. * attribute/value pairs extracted to
  168. * @return An iterator to the character after the last atribute read
  169. */
  170. template <typename InputIterator>
  171. InputIterator extract_attributes(InputIterator begin, InputIterator end,
  172. attribute_list & attributes)
  173. {
  174. InputIterator cursor;
  175. bool first = true;
  176. if (begin == end) {
  177. return begin;
  178. }
  179. cursor = begin;
  180. std::pair<std::string,InputIterator> ret;
  181. while (cursor != end) {
  182. std::string name;
  183. cursor = http::parser::extract_all_lws(cursor,end);
  184. if (cursor == end) {
  185. break;
  186. }
  187. if (first) {
  188. // ignore this check for the very first pass
  189. first = false;
  190. } else {
  191. if (*cursor == ';') {
  192. // advance past the ';'
  193. ++cursor;
  194. } else {
  195. // non-semicolon in this position indicates end end of the
  196. // attribute list, break and return.
  197. break;
  198. }
  199. }
  200. cursor = http::parser::extract_all_lws(cursor,end);
  201. ret = http::parser::extract_token(cursor,end);
  202. if (ret.first.empty()) {
  203. // error: expected a token
  204. return begin;
  205. } else {
  206. name = ret.first;
  207. cursor = ret.second;
  208. }
  209. cursor = http::parser::extract_all_lws(cursor,end);
  210. if (cursor == end || *cursor != '=') {
  211. // if there is an equals sign, read the attribute value. Otherwise
  212. // record a blank value and continue
  213. attributes[name].clear();
  214. continue;
  215. }
  216. // advance past the '='
  217. ++cursor;
  218. cursor = http::parser::extract_all_lws(cursor,end);
  219. if (cursor == end) {
  220. // error: expected a token or quoted string
  221. return begin;
  222. }
  223. ret = http::parser::extract_quoted_string(cursor,end);
  224. if (ret.second != cursor) {
  225. attributes[name] = ret.first;
  226. cursor = ret.second;
  227. continue;
  228. }
  229. ret = http::parser::extract_token(cursor,end);
  230. if (ret.first.empty()) {
  231. // error : expected token or quoted string
  232. return begin;
  233. } else {
  234. attributes[name] = ret.first;
  235. cursor = ret.second;
  236. }
  237. }
  238. return cursor;
  239. }
  240. /// Extract HTTP parameters
  241. /**
  242. * An http parameters list is a comma delimited list of tokens followed by
  243. * optional semicolon delimited attributes lists.
  244. *
  245. * Parameters extracted are appended to the supplied parameters list
  246. * `parameters`.
  247. *
  248. * @param [in] begin An iterator to the beginning of the sequence
  249. * @param [in] end An iterator to the end of the sequence
  250. * @param [out] parameters A reference to the parameters list to append
  251. * paramter values extracted to
  252. * @return An iterator to the character after the last parameter read
  253. */
  254. template <typename InputIterator>
  255. InputIterator extract_parameters(InputIterator begin, InputIterator end,
  256. parameter_list &parameters)
  257. {
  258. InputIterator cursor;
  259. if (begin == end) {
  260. // error: expected non-zero length range
  261. return begin;
  262. }
  263. cursor = begin;
  264. std::pair<std::string,InputIterator> ret;
  265. /**
  266. * LWS
  267. * token
  268. * LWS
  269. * *(";" method-param)
  270. * LWS
  271. * ,=loop again
  272. */
  273. while (cursor != end) {
  274. std::string parameter_name;
  275. attribute_list attributes;
  276. // extract any stray whitespace
  277. cursor = http::parser::extract_all_lws(cursor,end);
  278. if (cursor == end) {break;}
  279. ret = http::parser::extract_token(cursor,end);
  280. if (ret.first.empty()) {
  281. // error: expected a token
  282. return begin;
  283. } else {
  284. parameter_name = ret.first;
  285. cursor = ret.second;
  286. }
  287. // Safe break point, insert parameter with blank attributes and exit
  288. cursor = http::parser::extract_all_lws(cursor,end);
  289. if (cursor == end) {
  290. //parameters[parameter_name] = attributes;
  291. parameters.push_back(std::make_pair(parameter_name,attributes));
  292. break;
  293. }
  294. // If there is an attribute list, read it in
  295. if (*cursor == ';') {
  296. InputIterator acursor;
  297. ++cursor;
  298. acursor = http::parser::extract_attributes(cursor,end,attributes);
  299. if (acursor == cursor) {
  300. // attribute extraction ended in syntax error
  301. return begin;
  302. }
  303. cursor = acursor;
  304. }
  305. // insert parameter into output list
  306. //parameters[parameter_name] = attributes;
  307. parameters.push_back(std::make_pair(parameter_name,attributes));
  308. cursor = http::parser::extract_all_lws(cursor,end);
  309. if (cursor == end) {break;}
  310. // if next char is ',' then read another parameter, else stop
  311. if (*cursor != ',') {
  312. break;
  313. }
  314. // advance past comma
  315. ++cursor;
  316. if (cursor == end) {
  317. // expected more bytes after a comma
  318. return begin;
  319. }
  320. }
  321. return cursor;
  322. }
  323. inline std::string strip_lws(std::string const & input) {
  324. std::string::const_iterator begin = extract_all_lws(input.begin(),input.end());
  325. if (begin == input.end()) {
  326. return std::string();
  327. }
  328. std::string::const_reverse_iterator rbegin = extract_all_lws(input.rbegin(),input.rend());
  329. if (rbegin == input.rend()) {
  330. return std::string();
  331. }
  332. return std::string(begin,rbegin.base());
  333. }
  334. /// Base HTTP parser
  335. /**
  336. * Includes methods and data elements common to all types of HTTP messages such
  337. * as headers, versions, bodies, etc.
  338. */
  339. class parser {
  340. public:
  341. parser()
  342. : m_header_bytes(0)
  343. , m_body_bytes_needed(0)
  344. , m_body_bytes_max(max_body_size)
  345. , m_body_encoding(body_encoding::unknown) {}
  346. /// Get the HTTP version string
  347. /**
  348. * @return The version string for this parser
  349. */
  350. std::string const & get_version() const {
  351. return m_version;
  352. }
  353. /// Set HTTP parser Version
  354. /**
  355. * Input should be in format: HTTP/x.y where x and y are positive integers.
  356. * @todo Does this method need any validation?
  357. *
  358. * @param [in] version The value to set the HTTP version to.
  359. */
  360. void set_version(std::string const & version);
  361. /// Get the value of an HTTP header
  362. /**
  363. * @todo Make this method case insensitive.
  364. *
  365. * @param [in] key The name/key of the header to get.
  366. * @return The value associated with the given HTTP header key.
  367. */
  368. std::string const & get_header(std::string const & key) const;
  369. /// Extract an HTTP parameter list from a parser header.
  370. /**
  371. * If the header requested doesn't exist or exists and is empty the
  372. * parameter list is valid (but empty).
  373. *
  374. * @param [in] key The name/key of the HTTP header to use as input.
  375. * @param [out] out The parameter list to store extracted parameters in.
  376. * @return Whether or not the input was a valid parameter list.
  377. */
  378. bool get_header_as_plist(std::string const & key, parameter_list & out)
  379. const;
  380. /// Return a list of all HTTP headers
  381. /**
  382. * Return a list of all HTTP headers
  383. *
  384. * @since 0.8.0
  385. *
  386. * @return A list of all HTTP headers
  387. */
  388. header_list const & get_headers() const;
  389. /// Append a value to an existing HTTP header
  390. /**
  391. * This method will set the value of the HTTP header `key` with the
  392. * indicated value. If a header with the name `key` already exists, `val`
  393. * will be appended to the existing value.
  394. *
  395. * @todo Make this method case insensitive.
  396. * @todo Should there be any restrictions on which keys are allowed?
  397. * @todo Exception free varient
  398. *
  399. * @see replace_header
  400. *
  401. * @param [in] key The name/key of the header to append to.
  402. * @param [in] val The value to append.
  403. */
  404. void append_header(std::string const & key, std::string const & val);
  405. /// Set a value for an HTTP header, replacing an existing value
  406. /**
  407. * This method will set the value of the HTTP header `key` with the
  408. * indicated value. If a header with the name `key` already exists, `val`
  409. * will replace the existing value.
  410. *
  411. * @todo Make this method case insensitive.
  412. * @todo Should there be any restrictions on which keys are allowed?
  413. * @todo Exception free varient
  414. *
  415. * @see append_header
  416. *
  417. * @param [in] key The name/key of the header to append to.
  418. * @param [in] val The value to append.
  419. */
  420. void replace_header(std::string const & key, std::string const & val);
  421. /// Remove a header from the parser
  422. /**
  423. * Removes the header entirely from the parser. This is different than
  424. * setting the value of the header to blank.
  425. *
  426. * @todo Make this method case insensitive.
  427. *
  428. * @param [in] key The name/key of the header to remove.
  429. */
  430. void remove_header(std::string const & key);
  431. /// Get HTTP body
  432. /**
  433. * Gets the body of the HTTP object
  434. *
  435. * @return The body of the HTTP message.
  436. */
  437. std::string const & get_body() const {
  438. return m_body;
  439. }
  440. /// Set body content
  441. /**
  442. * Set the body content of the HTTP response to the parameter string. Note
  443. * set_body will also set the Content-Length HTTP header to the appropriate
  444. * value. If you want the Content-Length header to be something else, do so
  445. * via replace_header("Content-Length") after calling set_body()
  446. *
  447. * @param value String data to include as the body content.
  448. */
  449. void set_body(std::string const & value);
  450. /// Get body size limit
  451. /**
  452. * Retrieves the maximum number of bytes to parse & buffer before canceling
  453. * a request.
  454. *
  455. * @since 0.5.0
  456. *
  457. * @return The maximum length of a message body.
  458. */
  459. size_t get_max_body_size() const {
  460. return m_body_bytes_max;
  461. }
  462. /// Set body size limit
  463. /**
  464. * Set the maximum number of bytes to parse and buffer before canceling a
  465. * request.
  466. *
  467. * @since 0.5.0
  468. *
  469. * @param value The size to set the max body length to.
  470. */
  471. void set_max_body_size(size_t value) {
  472. m_body_bytes_max = value;
  473. }
  474. /// Extract an HTTP parameter list from a string.
  475. /**
  476. * @param [in] in The input string.
  477. * @param [out] out The parameter list to store extracted parameters in.
  478. * @return Whether or not the input was a valid parameter list.
  479. */
  480. bool parse_parameter_list(std::string const & in, parameter_list & out)
  481. const;
  482. protected:
  483. /// Process a header line
  484. /**
  485. * @todo Update this method to be exception free.
  486. *
  487. * @param [in] begin An iterator to the beginning of the sequence.
  488. * @param [in] end An iterator to the end of the sequence.
  489. */
  490. void process_header(std::string::iterator begin, std::string::iterator end);
  491. /// Prepare the parser to begin parsing body data
  492. /**
  493. * Inspects headers to determine if the message has a body that needs to be
  494. * read. If so, sets up the necessary state, otherwise returns false. If
  495. * this method returns true and loading the message body is desired call
  496. * `process_body` until it returns zero bytes or an error.
  497. *
  498. * Must not be called until after all headers have been processed.
  499. *
  500. * @since 0.5.0
  501. *
  502. * @return True if more bytes are needed to load the body, false otherwise.
  503. */
  504. bool prepare_body();
  505. /// Process body data
  506. /**
  507. * Parses body data.
  508. *
  509. * @since 0.5.0
  510. *
  511. * @param [in] begin An iterator to the beginning of the sequence.
  512. * @param [in] end An iterator to the end of the sequence.
  513. * @return The number of bytes processed
  514. */
  515. size_t process_body(char const * buf, size_t len);
  516. /// Check if the parser is done parsing the body
  517. /**
  518. * Behavior before a call to `prepare_body` is undefined.
  519. *
  520. * @since 0.5.0
  521. *
  522. * @return True if the message body has been completed loaded.
  523. */
  524. bool body_ready() const {
  525. return (m_body_bytes_needed == 0);
  526. }
  527. /// Generate and return the HTTP headers as a string
  528. /**
  529. * Each headers will be followed by the \r\n sequence including the last one.
  530. * A second \r\n sequence (blank header) is not appended by this method
  531. *
  532. * @return The HTTP headers as a string.
  533. */
  534. std::string raw_headers() const;
  535. std::string m_version;
  536. header_list m_headers;
  537. size_t m_header_bytes;
  538. std::string m_body;
  539. size_t m_body_bytes_needed;
  540. size_t m_body_bytes_max;
  541. body_encoding::value m_body_encoding;
  542. };
  543. } // namespace parser
  544. } // namespace http
  545. } // namespace websocketpp
  546. #include <websocketpp/http/impl/parser.hpp>
  547. #endif // HTTP_PARSER_HPP