NS-3 based Named Data Networking (NDN) simulator
ndnSIM 2.3: NDN, CCN, CCNx, content centric networks
API Documentation
parser.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014, Peter Thorson. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright
7  * notice, this list of conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright
9  * notice, this list of conditions and the following disclaimer in the
10  * documentation and/or other materials provided with the distribution.
11  * * Neither the name of the WebSocket++ Project nor the
12  * names of its contributors may be used to endorse or promote products
13  * derived from this software without specific prior written permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL PETER THORSON BE LIABLE FOR ANY
19  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  */
27 
28 #ifndef HTTP_PARSER_HPP
29 #define HTTP_PARSER_HPP
30 
31 #include <algorithm>
32 #include <map>
33 #include <string>
34 #include <utility>
35 
38 
39 namespace websocketpp {
40 namespace http {
41 namespace parser {
42 
43 namespace state {
44  enum value {
49  };
50 }
51 
52 namespace body_encoding {
53  enum value {
57  };
58 }
59 
60 typedef std::map<std::string, std::string, utility::ci_less > header_list;
61 
63 
72 template <typename InputIterator>
73 std::pair<std::string,InputIterator> extract_token(InputIterator begin,
74  InputIterator end)
75 {
76  InputIterator it = std::find_if(begin,end,&is_not_token_char);
77  return std::make_pair(std::string(begin,it),it);
78 }
79 
81 
91 template <typename InputIterator>
92 std::pair<std::string,InputIterator> extract_quoted_string(InputIterator begin,
93  InputIterator end)
94 {
95  std::string s;
96 
97  if (end == begin) {
98  return std::make_pair(s,begin);
99  }
100 
101  if (*begin != '"') {
102  return std::make_pair(s,begin);
103  }
104 
105  InputIterator cursor = begin+1;
106  InputIterator marker = cursor;
107 
108  cursor = std::find(cursor,end,'"');
109 
110  while (cursor != end) {
111  // either this is the end or a quoted string
112  if (*(cursor-1) == '\\') {
113  s.append(marker,cursor-1);
114  s.append(1,'"');
115  ++cursor;
116  marker = cursor;
117  } else {
118  s.append(marker,cursor);
119  ++cursor;
120  return std::make_pair(s,cursor);
121  }
122 
123  cursor = std::find(cursor,end,'"');
124  }
125 
126  return std::make_pair("",begin);
127 }
128 
130 
138 template <typename InputIterator>
139 InputIterator extract_lws(InputIterator begin, InputIterator end) {
140  InputIterator it = begin;
141 
142  // strip leading CRLF
143  if (end-begin > 2 && *begin == '\r' && *(begin+1) == '\n' &&
144  is_whitespace_char(static_cast<unsigned char>(*(begin+2))))
145  {
146  it+=3;
147  }
148 
149  it = std::find_if(it,end,&is_not_whitespace_char);
150  return it;
151 }
152 
154 
163 template <typename InputIterator>
164 InputIterator extract_all_lws(InputIterator begin, InputIterator end) {
165  InputIterator old_it;
166  InputIterator new_it = begin;
167 
168  do {
169  // Pull value from previous iteration
170  old_it = new_it;
171 
172  // look ahead another pass
173  new_it = extract_lws(old_it,end);
174  } while (new_it != end && old_it != new_it);
175 
176  return new_it;
177 }
178 
180 
194 template <typename InputIterator>
195 InputIterator extract_attributes(InputIterator begin, InputIterator end,
196  attribute_list & attributes)
197 {
198  InputIterator cursor;
199  bool first = true;
200 
201  if (begin == end) {
202  return begin;
203  }
204 
205  cursor = begin;
206  std::pair<std::string,InputIterator> ret;
207 
208  while (cursor != end) {
209  std::string name;
210 
211  cursor = http::parser::extract_all_lws(cursor,end);
212  if (cursor == end) {
213  break;
214  }
215 
216  if (first) {
217  // ignore this check for the very first pass
218  first = false;
219  } else {
220  if (*cursor == ';') {
221  // advance past the ';'
222  ++cursor;
223  } else {
224  // non-semicolon in this position indicates end end of the
225  // attribute list, break and return.
226  break;
227  }
228  }
229 
230  cursor = http::parser::extract_all_lws(cursor,end);
231  ret = http::parser::extract_token(cursor,end);
232 
233  if (ret.first.empty()) {
234  // error: expected a token
235  return begin;
236  } else {
237  name = ret.first;
238  cursor = ret.second;
239  }
240 
241  cursor = http::parser::extract_all_lws(cursor,end);
242  if (cursor == end || *cursor != '=') {
243  // if there is an equals sign, read the attribute value. Otherwise
244  // record a blank value and continue
245  attributes[name].clear();
246  continue;
247  }
248 
249  // advance past the '='
250  ++cursor;
251 
252  cursor = http::parser::extract_all_lws(cursor,end);
253  if (cursor == end) {
254  // error: expected a token or quoted string
255  return begin;
256  }
257 
258  ret = http::parser::extract_quoted_string(cursor,end);
259  if (ret.second != cursor) {
260  attributes[name] = ret.first;
261  cursor = ret.second;
262  continue;
263  }
264 
265  ret = http::parser::extract_token(cursor,end);
266  if (ret.first.empty()) {
267  // error : expected token or quoted string
268  return begin;
269  } else {
270  attributes[name] = ret.first;
271  cursor = ret.second;
272  }
273  }
274 
275  return cursor;
276 }
277 
279 
292 template <typename InputIterator>
293 InputIterator extract_parameters(InputIterator begin, InputIterator end,
294  parameter_list &parameters)
295 {
296  InputIterator cursor;
297 
298  if (begin == end) {
299  // error: expected non-zero length range
300  return begin;
301  }
302 
303  cursor = begin;
304  std::pair<std::string,InputIterator> ret;
305 
314  while (cursor != end) {
315  std::string parameter_name;
316  attribute_list attributes;
317 
318  // extract any stray whitespace
319  cursor = http::parser::extract_all_lws(cursor,end);
320  if (cursor == end) {break;}
321 
322  ret = http::parser::extract_token(cursor,end);
323 
324  if (ret.first.empty()) {
325  // error: expected a token
326  return begin;
327  } else {
328  parameter_name = ret.first;
329  cursor = ret.second;
330  }
331 
332  // Safe break point, insert parameter with blank attributes and exit
333  cursor = http::parser::extract_all_lws(cursor,end);
334  if (cursor == end) {
335  //parameters[parameter_name] = attributes;
336  parameters.push_back(std::make_pair(parameter_name,attributes));
337  break;
338  }
339 
340  // If there is an attribute list, read it in
341  if (*cursor == ';') {
342  InputIterator acursor;
343 
344  ++cursor;
345  acursor = http::parser::extract_attributes(cursor,end,attributes);
346 
347  if (acursor == cursor) {
348  // attribute extraction ended in syntax error
349  return begin;
350  }
351 
352  cursor = acursor;
353  }
354 
355  // insert parameter into output list
356  //parameters[parameter_name] = attributes;
357  parameters.push_back(std::make_pair(parameter_name,attributes));
358 
359  cursor = http::parser::extract_all_lws(cursor,end);
360  if (cursor == end) {break;}
361 
362  // if next char is ',' then read another parameter, else stop
363  if (*cursor != ',') {
364  break;
365  }
366 
367  // advance past comma
368  ++cursor;
369 
370  if (cursor == end) {
371  // expected more bytes after a comma
372  return begin;
373  }
374  }
375 
376  return cursor;
377 }
378 
379 inline std::string strip_lws(std::string const & input) {
380  std::string::const_iterator begin = extract_all_lws(input.begin(),input.end());
381  if (begin == input.end()) {
382  return std::string();
383  }
384 
385  std::string::const_reverse_iterator rbegin = extract_all_lws(input.rbegin(),input.rend());
386  if (rbegin == input.rend()) {
387  return std::string();
388  }
389 
390  return std::string(begin,rbegin.base());
391 }
392 
394 
398 class parser {
399 public:
401  : m_header_bytes(0)
402  , m_body_bytes_needed(0)
403  , m_body_bytes_max(max_body_size)
404  , m_body_encoding(body_encoding::unknown) {}
405 
407 
410  std::string const & get_version() const {
411  return m_version;
412  }
413 
415 
421  void set_version(std::string const & version);
422 
424 
430  std::string const & get_header(std::string const & key) const;
431 
433 
441  bool get_header_as_plist(std::string const & key, parameter_list & out)
442  const;
443 
445 
459  void append_header(std::string const & key, std::string const & val);
460 
462 
476  void replace_header(std::string const & key, std::string const & val);
477 
479 
487  void remove_header(std::string const & key);
488 
490 
495  std::string const & get_body() const {
496  return m_body;
497  }
498 
500 
508  void set_body(std::string const & value);
509 
511 
519  size_t get_max_body_size() const {
520  return m_body_bytes_max;
521  }
522 
524 
532  void set_max_body_size(size_t value) {
533  m_body_bytes_max = value;
534  }
535 
537 
542  bool parse_parameter_list(std::string const & in, parameter_list & out)
543  const;
544 protected:
546 
552  void process_header(std::string::iterator begin, std::string::iterator end);
553 
555 
567  bool prepare_body();
568 
570 
579  size_t process_body(char const * buf, size_t len);
580 
582 
589  bool body_ready() const {
590  return (m_body_bytes_needed == 0);
591  }
592 
594 
600  std::string raw_headers() const;
601 
602  std::string m_version;
603  header_list m_headers;
604 
606 
607  std::string m_body;
611 };
612 
613 } // namespace parser
614 } // namespace http
615 } // namespace websocketpp
616 
618 
619 #endif // HTTP_PARSER_HPP
size_t get_max_body_size() const
Get body size limit.
Definition: parser.hpp:519
bool is_not_whitespace_char(unsigned char c)
Is the character non-whitespace.
Definition: constants.hpp:116
std::string const & get_body() const
Get HTTP body.
Definition: parser.hpp:495
std::vector< std::pair< std::string, attribute_list > > parameter_list
The type of an HTTP parameter list.
Definition: constants.hpp:53
InputIterator extract_lws(InputIterator begin, InputIterator end)
Read and discard one unit of linear whitespace.
Definition: parser.hpp:139
bool is_not_token_char(unsigned char c)
Is the character a non-token.
Definition: constants.hpp:103
std::map< std::string, std::string, utility::ci_less > header_list
Definition: parser.hpp:60
Table::const_iterator iterator
Definition: cs-internal.hpp:41
std::string const & get_version() const
Get the HTTP version string.
Definition: parser.hpp:410
InputIterator extract_parameters(InputIterator begin, InputIterator end, parameter_list &parameters)
Extract HTTP parameters.
Definition: parser.hpp:293
InputIterator extract_attributes(InputIterator begin, InputIterator end, attribute_list &attributes)
Extract HTTP attributes.
Definition: parser.hpp:195
std::pair< std::string, InputIterator > extract_quoted_string(InputIterator begin, InputIterator end)
Read and return the next quoted string in the stream.
Definition: parser.hpp:92
Namespace for the WebSocket++ project.
Definition: base64.hpp:41
std::map< std::string, std::string > attribute_list
The type of an HTTP attribute list.
Definition: constants.hpp:45
std::string strip_lws(std::string const &input)
Definition: parser.hpp:379
bool body_ready() const
Check if the parser is done parsing the body.
Definition: parser.hpp:589
body_encoding::value m_body_encoding
Definition: parser.hpp:610
InputIterator extract_all_lws(InputIterator begin, InputIterator end)
Read and discard linear whitespace.
Definition: parser.hpp:164
std::pair< std::string, InputIterator > extract_token(InputIterator begin, InputIterator end)
Read and return the next token in the stream.
Definition: parser.hpp:73
bool is_whitespace_char(unsigned char c)
Is the character whitespace.
Definition: constants.hpp:111
size_t const max_body_size
Default Maximum size in bytes for HTTP message bodies.
Definition: constants.hpp:68
void set_max_body_size(size_t value)
Set body size limit.
Definition: parser.hpp:532