123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629 |
- /*
- * Copyright (c) 2014, Peter Thorson. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the WebSocket++ Project nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL PETER THORSON BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
- #ifndef HTTP_PARSER_HPP
- #define HTTP_PARSER_HPP
- #include <algorithm>
- #include <map>
- #include <string>
- #include <utility>
- #include <websocketpp/utilities.hpp>
- #include <websocketpp/http/constants.hpp>
- namespace websocketpp {
- namespace http {
- namespace parser {
- namespace state {
- enum value {
- method,
- resource,
- version,
- headers
- };
- }
- namespace body_encoding {
- enum value {
- unknown,
- plain,
- chunked
- };
- }
- typedef std::map<std::string, std::string, utility::ci_less > header_list;
- /// Read and return the next token in the stream
- /**
- * Read until a non-token character is found and then return the token and
- * iterator to the next character to read
- *
- * @param begin An iterator to the beginning of the sequence
- * @param end An iterator to the end of the sequence
- * @return A pair containing the token and an iterator to the next character in
- * the stream
- */
- template <typename InputIterator>
- std::pair<std::string,InputIterator> extract_token(InputIterator begin,
- InputIterator end)
- {
- InputIterator it = std::find_if(begin,end,&is_not_token_char);
- return std::make_pair(std::string(begin,it),it);
- }
- /// Read and return the next quoted string in the stream
- /**
- * Read a double quoted string starting at `begin`. The quotes themselves are
- * stripped. The quoted value is returned along with an iterator to the next
- * character to read
- *
- * @param begin An iterator to the beginning of the sequence
- * @param end An iterator to the end of the sequence
- * @return A pair containing the string read and an iterator to the next
- * character in the stream
- */
- template <typename InputIterator>
- std::pair<std::string,InputIterator> extract_quoted_string(InputIterator begin,
- InputIterator end)
- {
- std::string s;
- if (end == begin) {
- return std::make_pair(s,begin);
- }
- if (*begin != '"') {
- return std::make_pair(s,begin);
- }
- InputIterator cursor = begin+1;
- InputIterator marker = cursor;
- cursor = std::find(cursor,end,'"');
- while (cursor != end) {
- // either this is the end or a quoted string
- if (*(cursor-1) == '\\') {
- s.append(marker,cursor-1);
- s.append(1,'"');
- ++cursor;
- marker = cursor;
- } else {
- s.append(marker,cursor);
- ++cursor;
- return std::make_pair(s,cursor);
- }
- cursor = std::find(cursor,end,'"');
- }
- return std::make_pair("",begin);
- }
- /// Read and discard one unit of linear whitespace
- /**
- * Read one unit of linear white space and return the iterator to the character
- * afterwards. If `begin` is returned, no whitespace was extracted.
- *
- * @param begin An iterator to the beginning of the sequence
- * @param end An iterator to the end of the sequence
- * @return An iterator to the character after the linear whitespace read
- */
- template <typename InputIterator>
- InputIterator extract_lws(InputIterator begin, InputIterator end) {
- InputIterator it = begin;
- // strip leading CRLF
- if (end-begin > 2 && *begin == '\r' && *(begin+1) == '\n' &&
- is_whitespace_char(static_cast<unsigned char>(*(begin+2))))
- {
- it+=3;
- }
- it = std::find_if(it,end,&is_not_whitespace_char);
- return it;
- }
- /// Read and discard linear whitespace
- /**
- * Read linear white space until a non-lws character is read and return an
- * iterator to that character. If `begin` is returned, no whitespace was
- * extracted.
- *
- * @param begin An iterator to the beginning of the sequence
- * @param end An iterator to the end of the sequence
- * @return An iterator to the character after the linear whitespace read
- */
- template <typename InputIterator>
- InputIterator extract_all_lws(InputIterator begin, InputIterator end) {
- InputIterator old_it;
- InputIterator new_it = begin;
- do {
- // Pull value from previous iteration
- old_it = new_it;
- // look ahead another pass
- new_it = extract_lws(old_it,end);
- } while (new_it != end && old_it != new_it);
- return new_it;
- }
- /// Extract HTTP attributes
- /**
- * An http attributes list is a semicolon delimited list of key value pairs in
- * the format: *( ";" attribute "=" value ) where attribute is a token and value
- * is a token or quoted string.
- *
- * Attributes extracted are appended to the supplied attributes list
- * `attributes`.
- *
- * @param [in] begin An iterator to the beginning of the sequence
- * @param [in] end An iterator to the end of the sequence
- * @param [out] attributes A reference to the attributes list to append
- * attribute/value pairs extracted to
- * @return An iterator to the character after the last atribute read
- */
- template <typename InputIterator>
- InputIterator extract_attributes(InputIterator begin, InputIterator end,
- attribute_list & attributes)
- {
- InputIterator cursor;
- bool first = true;
- if (begin == end) {
- return begin;
- }
- cursor = begin;
- std::pair<std::string,InputIterator> ret;
- while (cursor != end) {
- std::string name;
- cursor = http::parser::extract_all_lws(cursor,end);
- if (cursor == end) {
- break;
- }
- if (first) {
- // ignore this check for the very first pass
- first = false;
- } else {
- if (*cursor == ';') {
- // advance past the ';'
- ++cursor;
- } else {
- // non-semicolon in this position indicates end end of the
- // attribute list, break and return.
- break;
- }
- }
- cursor = http::parser::extract_all_lws(cursor,end);
- ret = http::parser::extract_token(cursor,end);
- if (ret.first.empty()) {
- // error: expected a token
- return begin;
- } else {
- name = ret.first;
- cursor = ret.second;
- }
- cursor = http::parser::extract_all_lws(cursor,end);
- if (cursor == end || *cursor != '=') {
- // if there is an equals sign, read the attribute value. Otherwise
- // record a blank value and continue
- attributes[name].clear();
- continue;
- }
- // advance past the '='
- ++cursor;
- cursor = http::parser::extract_all_lws(cursor,end);
- if (cursor == end) {
- // error: expected a token or quoted string
- return begin;
- }
- ret = http::parser::extract_quoted_string(cursor,end);
- if (ret.second != cursor) {
- attributes[name] = ret.first;
- cursor = ret.second;
- continue;
- }
- ret = http::parser::extract_token(cursor,end);
- if (ret.first.empty()) {
- // error : expected token or quoted string
- return begin;
- } else {
- attributes[name] = ret.first;
- cursor = ret.second;
- }
- }
- return cursor;
- }
- /// Extract HTTP parameters
- /**
- * An http parameters list is a comma delimited list of tokens followed by
- * optional semicolon delimited attributes lists.
- *
- * Parameters extracted are appended to the supplied parameters list
- * `parameters`.
- *
- * @param [in] begin An iterator to the beginning of the sequence
- * @param [in] end An iterator to the end of the sequence
- * @param [out] parameters A reference to the parameters list to append
- * paramter values extracted to
- * @return An iterator to the character after the last parameter read
- */
- template <typename InputIterator>
- InputIterator extract_parameters(InputIterator begin, InputIterator end,
- parameter_list ¶meters)
- {
- InputIterator cursor;
- if (begin == end) {
- // error: expected non-zero length range
- return begin;
- }
- cursor = begin;
- std::pair<std::string,InputIterator> ret;
- /**
- * LWS
- * token
- * LWS
- * *(";" method-param)
- * LWS
- * ,=loop again
- */
- while (cursor != end) {
- std::string parameter_name;
- attribute_list attributes;
- // extract any stray whitespace
- cursor = http::parser::extract_all_lws(cursor,end);
- if (cursor == end) {break;}
- ret = http::parser::extract_token(cursor,end);
- if (ret.first.empty()) {
- // error: expected a token
- return begin;
- } else {
- parameter_name = ret.first;
- cursor = ret.second;
- }
- // Safe break point, insert parameter with blank attributes and exit
- cursor = http::parser::extract_all_lws(cursor,end);
- if (cursor == end) {
- //parameters[parameter_name] = attributes;
- parameters.push_back(std::make_pair(parameter_name,attributes));
- break;
- }
- // If there is an attribute list, read it in
- if (*cursor == ';') {
- InputIterator acursor;
- ++cursor;
- acursor = http::parser::extract_attributes(cursor,end,attributes);
- if (acursor == cursor) {
- // attribute extraction ended in syntax error
- return begin;
- }
- cursor = acursor;
- }
- // insert parameter into output list
- //parameters[parameter_name] = attributes;
- parameters.push_back(std::make_pair(parameter_name,attributes));
- cursor = http::parser::extract_all_lws(cursor,end);
- if (cursor == end) {break;}
- // if next char is ',' then read another parameter, else stop
- if (*cursor != ',') {
- break;
- }
- // advance past comma
- ++cursor;
- if (cursor == end) {
- // expected more bytes after a comma
- return begin;
- }
- }
- return cursor;
- }
- inline std::string strip_lws(std::string const & input) {
- std::string::const_iterator begin = extract_all_lws(input.begin(),input.end());
- if (begin == input.end()) {
- return std::string();
- }
- std::string::const_reverse_iterator rbegin = extract_all_lws(input.rbegin(),input.rend());
- if (rbegin == input.rend()) {
- return std::string();
- }
- return std::string(begin,rbegin.base());
- }
- /// Base HTTP parser
- /**
- * Includes methods and data elements common to all types of HTTP messages such
- * as headers, versions, bodies, etc.
- */
- class parser {
- public:
- parser()
- : m_header_bytes(0)
- , m_body_bytes_needed(0)
- , m_body_bytes_max(max_body_size)
- , m_body_encoding(body_encoding::unknown) {}
-
- /// Get the HTTP version string
- /**
- * @return The version string for this parser
- */
- std::string const & get_version() const {
- return m_version;
- }
- /// Set HTTP parser Version
- /**
- * Input should be in format: HTTP/x.y where x and y are positive integers.
- * @todo Does this method need any validation?
- *
- * @param [in] version The value to set the HTTP version to.
- */
- void set_version(std::string const & version);
- /// Get the value of an HTTP header
- /**
- * @todo Make this method case insensitive.
- *
- * @param [in] key The name/key of the header to get.
- * @return The value associated with the given HTTP header key.
- */
- std::string const & get_header(std::string const & key) const;
- /// Extract an HTTP parameter list from a parser header.
- /**
- * If the header requested doesn't exist or exists and is empty the
- * parameter list is valid (but empty).
- *
- * @param [in] key The name/key of the HTTP header to use as input.
- * @param [out] out The parameter list to store extracted parameters in.
- * @return Whether or not the input was a valid parameter list.
- */
- bool get_header_as_plist(std::string const & key, parameter_list & out)
- const;
- /// Return a list of all HTTP headers
- /**
- * Return a list of all HTTP headers
- *
- * @since 0.8.0
- *
- * @return A list of all HTTP headers
- */
- header_list const & get_headers() const;
- /// Append a value to an existing HTTP header
- /**
- * This method will set the value of the HTTP header `key` with the
- * indicated value. If a header with the name `key` already exists, `val`
- * will be appended to the existing value.
- *
- * @todo Make this method case insensitive.
- * @todo Should there be any restrictions on which keys are allowed?
- * @todo Exception free varient
- *
- * @see replace_header
- *
- * @param [in] key The name/key of the header to append to.
- * @param [in] val The value to append.
- */
- void append_header(std::string const & key, std::string const & val);
- /// Set a value for an HTTP header, replacing an existing value
- /**
- * This method will set the value of the HTTP header `key` with the
- * indicated value. If a header with the name `key` already exists, `val`
- * will replace the existing value.
- *
- * @todo Make this method case insensitive.
- * @todo Should there be any restrictions on which keys are allowed?
- * @todo Exception free varient
- *
- * @see append_header
- *
- * @param [in] key The name/key of the header to append to.
- * @param [in] val The value to append.
- */
- void replace_header(std::string const & key, std::string const & val);
- /// Remove a header from the parser
- /**
- * Removes the header entirely from the parser. This is different than
- * setting the value of the header to blank.
- *
- * @todo Make this method case insensitive.
- *
- * @param [in] key The name/key of the header to remove.
- */
- void remove_header(std::string const & key);
- /// Get HTTP body
- /**
- * Gets the body of the HTTP object
- *
- * @return The body of the HTTP message.
- */
- std::string const & get_body() const {
- return m_body;
- }
- /// Set body content
- /**
- * Set the body content of the HTTP response to the parameter string. Note
- * set_body will also set the Content-Length HTTP header to the appropriate
- * value. If you want the Content-Length header to be something else, do so
- * via replace_header("Content-Length") after calling set_body()
- *
- * @param value String data to include as the body content.
- */
- void set_body(std::string const & value);
- /// Get body size limit
- /**
- * Retrieves the maximum number of bytes to parse & buffer before canceling
- * a request.
- *
- * @since 0.5.0
- *
- * @return The maximum length of a message body.
- */
- size_t get_max_body_size() const {
- return m_body_bytes_max;
- }
- /// Set body size limit
- /**
- * Set the maximum number of bytes to parse and buffer before canceling a
- * request.
- *
- * @since 0.5.0
- *
- * @param value The size to set the max body length to.
- */
- void set_max_body_size(size_t value) {
- m_body_bytes_max = value;
- }
- /// Extract an HTTP parameter list from a string.
- /**
- * @param [in] in The input string.
- * @param [out] out The parameter list to store extracted parameters in.
- * @return Whether or not the input was a valid parameter list.
- */
- bool parse_parameter_list(std::string const & in, parameter_list & out)
- const;
- protected:
- /// Process a header line
- /**
- * @todo Update this method to be exception free.
- *
- * @param [in] begin An iterator to the beginning of the sequence.
- * @param [in] end An iterator to the end of the sequence.
- */
- void process_header(std::string::iterator begin, std::string::iterator end);
- /// Prepare the parser to begin parsing body data
- /**
- * Inspects headers to determine if the message has a body that needs to be
- * read. If so, sets up the necessary state, otherwise returns false. If
- * this method returns true and loading the message body is desired call
- * `process_body` until it returns zero bytes or an error.
- *
- * Must not be called until after all headers have been processed.
- *
- * @since 0.5.0
- *
- * @return True if more bytes are needed to load the body, false otherwise.
- */
- bool prepare_body();
- /// Process body data
- /**
- * Parses body data.
- *
- * @since 0.5.0
- *
- * @param [in] begin An iterator to the beginning of the sequence.
- * @param [in] end An iterator to the end of the sequence.
- * @return The number of bytes processed
- */
- size_t process_body(char const * buf, size_t len);
- /// Check if the parser is done parsing the body
- /**
- * Behavior before a call to `prepare_body` is undefined.
- *
- * @since 0.5.0
- *
- * @return True if the message body has been completed loaded.
- */
- bool body_ready() const {
- return (m_body_bytes_needed == 0);
- }
- /// Generate and return the HTTP headers as a string
- /**
- * Each headers will be followed by the \r\n sequence including the last one.
- * A second \r\n sequence (blank header) is not appended by this method
- *
- * @return The HTTP headers as a string.
- */
- std::string raw_headers() const;
- std::string m_version;
- header_list m_headers;
-
- size_t m_header_bytes;
-
- std::string m_body;
- size_t m_body_bytes_needed;
- size_t m_body_bytes_max;
- body_encoding::value m_body_encoding;
- };
- } // namespace parser
- } // namespace http
- } // namespace websocketpp
- #include <websocketpp/http/impl/parser.hpp>
- #endif // HTTP_PARSER_HPP
|