Line data Source code
1 : /*
2 : START OF LICENSE STUB
3 : DeDOS: Declarative Dispersion-Oriented Software
4 : Copyright (C) 2017 University of Pennsylvania, Georgetown University
5 :
6 : This program is free software: you can redistribute it and/or modify
7 : it under the terms of the GNU General Public License as published by
8 : the Free Software Foundation, either version 3 of the License, or
9 : (at your option) any later version.
10 :
11 : This program is distributed in the hope that it will be useful,
12 : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : GNU General Public License for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with this program. If not, see <http://www.gnu.org/licenses/>.
18 : END OF LICENSE STUB
19 : */
20 : /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
21 : *
22 : * Additional changes are licensed under the same terms as NGINX and
23 : * copyright Joyent, Inc. and other Node contributors. All rights reserved.
24 : *
25 : * Permission is hereby granted, free of charge, to any person obtaining a copy
26 : * of this software and associated documentation files (the "Software"), to
27 : * deal in the Software without restriction, including without limitation the
28 : * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
29 : * sell copies of the Software, and to permit persons to whom the Software is
30 : * furnished to do so, subject to the following conditions:
31 : *
32 : * The above copyright notice and this permission notice shall be included in
33 : * all copies or substantial portions of the Software.
34 : *
35 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
36 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
37 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
38 : * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
39 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
40 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
41 : * IN THE SOFTWARE.
42 : */
43 : #include "webserver/http_parser.h"
44 : #include <assert.h>
45 : #include <stddef.h>
46 : #include <ctype.h>
47 : #include <stdlib.h>
48 : #include <string.h>
49 : #include <limits.h>
50 :
51 : #ifndef ULLONG_MAX
52 : # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
53 : #endif
54 :
55 : #ifndef MIN
56 : # define MIN(a,b) ((a) < (b) ? (a) : (b))
57 : #endif
58 :
59 : #ifndef ARRAY_SIZE
60 : # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
61 : #endif
62 :
63 : #ifndef BIT_AT
64 : # define BIT_AT(a, i) \
65 : (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
66 : (1 << ((unsigned int) (i) & 7))))
67 : #endif
68 :
69 : #ifndef ELEM_AT
70 : # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
71 : #endif
72 :
73 : #define SET_ERRNO(e) \
74 : do { \
75 : parser->http_errno = (e); \
76 : } while(0)
77 :
78 : #define CURRENT_STATE() p_state
79 : #define UPDATE_STATE(V) p_state = (enum state) (V);
80 : #define RETURN(V) \
81 : do { \
82 : parser->state = CURRENT_STATE(); \
83 : return (V); \
84 : } while (0);
85 : #define REEXECUTE() \
86 : goto reexecute; \
87 :
88 :
89 : #ifdef __GNUC__
90 : # define LIKELY(X) __builtin_expect(!!(X), 1)
91 : # define UNLIKELY(X) __builtin_expect(!!(X), 0)
92 : #else
93 : # define LIKELY(X) (X)
94 : # define UNLIKELY(X) (X)
95 : #endif
96 :
97 :
98 : /* Run the notify callback FOR, returning ER if it fails */
99 : #define CALLBACK_NOTIFY_(FOR, ER) \
100 : do { \
101 : assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
102 : \
103 : if (LIKELY(settings->on_##FOR)) { \
104 : parser->state = CURRENT_STATE(); \
105 : if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
106 : SET_ERRNO(HPE_CB_##FOR); \
107 : } \
108 : UPDATE_STATE(parser->state); \
109 : \
110 : /* We either errored above or got paused; get out */ \
111 : if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
112 : return (ER); \
113 : } \
114 : } \
115 : } while (0)
116 :
117 : /* Run the notify callback FOR and consume the current byte */
118 : #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
119 :
120 : /* Run the notify callback FOR and don't consume the current byte */
121 : #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
122 :
123 : /* Run data callback FOR with LEN bytes, returning ER if it fails */
124 : #define CALLBACK_DATA_(FOR, LEN, ER) \
125 : do { \
126 : assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
127 : \
128 : if (FOR##_mark) { \
129 : if (LIKELY(settings->on_##FOR)) { \
130 : parser->state = CURRENT_STATE(); \
131 : if (UNLIKELY(0 != \
132 : settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
133 : SET_ERRNO(HPE_CB_##FOR); \
134 : } \
135 : UPDATE_STATE(parser->state); \
136 : \
137 : /* We either errored above or got paused; get out */ \
138 : if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
139 : return (ER); \
140 : } \
141 : } \
142 : FOR##_mark = NULL; \
143 : } \
144 : } while (0)
145 :
146 : /* Run the data callback FOR and consume the current byte */
147 : #define CALLBACK_DATA(FOR) \
148 : CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
149 :
150 : /* Run the data callback FOR and don't consume the current byte */
151 : #define CALLBACK_DATA_NOADVANCE(FOR) \
152 : CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
153 :
154 : /* Set the mark FOR; non-destructive if mark is already set */
155 : #define MARK(FOR) \
156 : do { \
157 : if (!FOR##_mark) { \
158 : FOR##_mark = p; \
159 : } \
160 : } while (0)
161 :
162 : /* Don't allow the total size of the HTTP headers (including the status
163 : * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
164 : * embedders against denial-of-service attacks where the attacker feeds
165 : * us a never-ending header that the embedder keeps buffering.
166 : *
167 : * This check is arguably the responsibility of embedders but we're doing
168 : * it on the embedder's behalf because most won't bother and this way we
169 : * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
170 : * than any reasonable request or response so this should never affect
171 : * day-to-day operation.
172 : */
173 : #define COUNT_HEADER_SIZE(V) \
174 : do { \
175 : parser->nread += (V); \
176 : if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \
177 : SET_ERRNO(HPE_HEADER_OVERFLOW); \
178 : goto error; \
179 : } \
180 : } while (0)
181 :
182 :
183 : #define PROXY_CONNECTION "proxy-connection"
184 : #define CONNECTION "connection"
185 : #define CONTENT_LENGTH "content-length"
186 : #define TRANSFER_ENCODING "transfer-encoding"
187 : #define UPGRADE "upgrade"
188 : #define CHUNKED "chunked"
189 : #define KEEP_ALIVE "keep-alive"
190 : #define CLOSE "close"
191 :
192 :
193 : static const char *method_strings[] =
194 : {
195 : #define XX(num, name, string) #string,
196 : HTTP_METHOD_MAP(XX)
197 : #undef XX
198 : };
199 :
200 :
201 : /* Tokens as defined by rfc 2616. Also lowercases them.
202 : * token = 1*<any CHAR except CTLs or separators>
203 : * separators = "(" | ")" | "<" | ">" | "@"
204 : * | "," | ";" | ":" | "\" | <">
205 : * | "/" | "[" | "]" | "?" | "="
206 : * | "{" | "}" | SP | HT
207 : */
208 : static const char tokens[256] = {
209 : /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
210 : 0, 0, 0, 0, 0, 0, 0, 0,
211 : /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
212 : 0, 0, 0, 0, 0, 0, 0, 0,
213 : /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
214 : 0, 0, 0, 0, 0, 0, 0, 0,
215 : /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
216 : 0, 0, 0, 0, 0, 0, 0, 0,
217 : /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
218 : 0, '!', 0, '#', '$', '%', '&', '\'',
219 : /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
220 : 0, 0, '*', '+', 0, '-', '.', 0,
221 : /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
222 : '0', '1', '2', '3', '4', '5', '6', '7',
223 : /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
224 : '8', '9', 0, 0, 0, 0, 0, 0,
225 : /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
226 : 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
227 : /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
228 : 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
229 : /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
230 : 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
231 : /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
232 : 'x', 'y', 'z', 0, 0, 0, '^', '_',
233 : /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
234 : '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
235 : /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
236 : 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
237 : /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
238 : 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
239 : /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
240 : 'x', 'y', 'z', 0, '|', 0, '~', 0 };
241 :
242 :
243 : static const int8_t unhex[256] =
244 : {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
245 : ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
246 : ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
247 : , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
248 : ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
249 : ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
250 : ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
251 : ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
252 : };
253 :
254 :
255 : #if HTTP_PARSER_STRICT
256 : # define T(v) 0
257 : #else
258 : # define T(v) v
259 : #endif
260 :
261 :
262 : static const uint8_t normal_url_char[32] = {
263 : /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
264 : 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
265 : /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
266 : 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
267 : /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
268 : 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
269 : /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
270 : 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
271 : /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
272 : 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
273 : /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
274 : 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
275 : /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
276 : 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
277 : /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
278 : 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
279 : /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
280 : 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
281 : /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
282 : 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
283 : /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
284 : 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
285 : /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
286 : 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
287 : /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
288 : 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
289 : /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
290 : 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
291 : /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
292 : 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
293 : /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
294 : 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
295 :
296 : #undef T
297 :
298 : enum state
299 : { s_dead = 1 /* important that this is > 0 */
300 :
301 : , s_start_req_or_res
302 : , s_res_or_resp_H
303 : , s_start_res
304 : , s_res_H
305 : , s_res_HT
306 : , s_res_HTT
307 : , s_res_HTTP
308 : , s_res_http_major
309 : , s_res_http_dot
310 : , s_res_http_minor
311 : , s_res_http_end
312 : , s_res_first_status_code
313 : , s_res_status_code
314 : , s_res_status_start
315 : , s_res_status
316 : , s_res_line_almost_done
317 :
318 : , s_start_req
319 :
320 : , s_req_method
321 : , s_req_spaces_before_url
322 : , s_req_schema
323 : , s_req_schema_slash
324 : , s_req_schema_slash_slash
325 : , s_req_server_start
326 : , s_req_server
327 : , s_req_server_with_at
328 : , s_req_path
329 : , s_req_query_string_start
330 : , s_req_query_string
331 : , s_req_fragment_start
332 : , s_req_fragment
333 : , s_req_http_start
334 : , s_req_http_H
335 : , s_req_http_HT
336 : , s_req_http_HTT
337 : , s_req_http_HTTP
338 : , s_req_http_major
339 : , s_req_http_dot
340 : , s_req_http_minor
341 : , s_req_http_end
342 : , s_req_line_almost_done
343 :
344 : , s_header_field_start
345 : , s_header_field
346 : , s_header_value_discard_ws
347 : , s_header_value_discard_ws_almost_done
348 : , s_header_value_discard_lws
349 : , s_header_value_start
350 : , s_header_value
351 : , s_header_value_lws
352 :
353 : , s_header_almost_done
354 :
355 : , s_chunk_size_start
356 : , s_chunk_size
357 : , s_chunk_parameters
358 : , s_chunk_size_almost_done
359 :
360 : , s_headers_almost_done
361 : , s_headers_done
362 :
363 : /* Important: 's_headers_done' must be the last 'header' state. All
364 : * states beyond this must be 'body' states. It is used for overflow
365 : * checking. See the PARSING_HEADER() macro.
366 : */
367 :
368 : , s_chunk_data
369 : , s_chunk_data_almost_done
370 : , s_chunk_data_done
371 :
372 : , s_body_identity
373 : , s_body_identity_eof
374 :
375 : , s_message_done
376 : };
377 :
378 :
379 : #define PARSING_HEADER(state) (state <= s_headers_done)
380 :
381 :
382 : enum header_states
383 : { h_general = 0
384 : , h_C
385 : , h_CO
386 : , h_CON
387 :
388 : , h_matching_connection
389 : , h_matching_proxy_connection
390 : , h_matching_content_length
391 : , h_matching_transfer_encoding
392 : , h_matching_upgrade
393 :
394 : , h_connection
395 : , h_content_length
396 : , h_transfer_encoding
397 : , h_upgrade
398 :
399 : , h_matching_transfer_encoding_chunked
400 : , h_matching_connection_token_start
401 : , h_matching_connection_keep_alive
402 : , h_matching_connection_close
403 : , h_matching_connection_upgrade
404 : , h_matching_connection_token
405 :
406 : , h_transfer_encoding_chunked
407 : , h_connection_keep_alive
408 : , h_connection_close
409 : , h_connection_upgrade
410 : };
411 :
412 : enum http_host_state
413 : {
414 : s_http_host_dead = 1
415 : , s_http_userinfo_start
416 : , s_http_userinfo
417 : , s_http_host_start
418 : , s_http_host_v6_start
419 : , s_http_host
420 : , s_http_host_v6
421 : , s_http_host_v6_end
422 : , s_http_host_v6_zone_start
423 : , s_http_host_v6_zone
424 : , s_http_host_port_start
425 : , s_http_host_port
426 : };
427 :
428 : /* Macros for character classes; depends on strict-mode */
429 : #define CR '\r'
430 : #define LF '\n'
431 : #define LOWER(c) (unsigned char)(c | 0x20)
432 : #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
433 : #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
434 : #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
435 : #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
436 : #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
437 : (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
438 : (c) == ')')
439 : #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
440 : (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
441 : (c) == '$' || (c) == ',')
442 :
443 : #define STRICT_TOKEN(c) (tokens[(unsigned char)c])
444 :
445 : #if HTTP_PARSER_STRICT
446 : #define TOKEN(c) (tokens[(unsigned char)c])
447 : #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
448 : #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
449 : #else
450 : #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
451 : #define IS_URL_CHAR(c) \
452 : (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
453 : #define IS_HOST_CHAR(c) \
454 : (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
455 : #endif
456 :
457 : /**
458 : * Verify that a char is a valid visible (printable) US-ASCII
459 : * character or %x80-FF
460 : **/
461 : #define IS_HEADER_CHAR(ch) \
462 : (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
463 :
464 : #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
465 :
466 :
467 : #if HTTP_PARSER_STRICT
468 : # define STRICT_CHECK(cond) \
469 : do { \
470 : if (cond) { \
471 : SET_ERRNO(HPE_STRICT); \
472 : goto error; \
473 : } \
474 : } while (0)
475 : # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
476 : #else
477 : # define STRICT_CHECK(cond)
478 : # define NEW_MESSAGE() start_state
479 : #endif
480 :
481 :
482 : /* Map errno values to strings for human-readable output */
483 : #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
484 : static struct {
485 : const char *name;
486 : const char *description;
487 : } http_strerror_tab[] = {
488 : HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
489 : };
490 : #undef HTTP_STRERROR_GEN
491 :
492 : int http_message_needs_eof(const http_parser *parser);
493 :
494 : /* Our URL parser.
495 : *
496 : * This is designed to be shared by http_parser_execute() for URL validation,
497 : * hence it has a state transition + byte-for-byte interface. In addition, it
498 : * is meant to be embedded in http_parser_parse_url(), which does the dirty
499 : * work of turning state transitions URL components for its API.
500 : *
501 : * This function should only be invoked with non-space characters. It is
502 : * assumed that the caller cares about (and can detect) the transition between
503 : * URL and non-URL states by looking for these.
504 : */
505 : static enum state
506 1674 : parse_url_char(enum state s, const char ch)
507 : {
508 1674 : if (ch == ' ' || ch == '\r' || ch == '\n') {
509 0 : return s_dead;
510 : }
511 :
512 : #if HTTP_PARSER_STRICT
513 : if (ch == '\t' || ch == '\f') {
514 : return s_dead;
515 : }
516 : #endif
517 :
518 1674 : switch (s) {
519 : case s_req_spaces_before_url:
520 : /* Proxied requests are followed by scheme of an absolute URI (alpha).
521 : * All methods except CONNECT are followed by '/' or '*'.
522 : */
523 :
524 144 : if (ch == '/' || ch == '*') {
525 144 : return s_req_path;
526 : }
527 :
528 0 : if (IS_ALPHA(ch)) {
529 0 : return s_req_schema;
530 : }
531 :
532 0 : break;
533 :
534 : case s_req_schema:
535 0 : if (IS_ALPHA(ch)) {
536 0 : return s;
537 : }
538 :
539 0 : if (ch == ':') {
540 0 : return s_req_schema_slash;
541 : }
542 :
543 0 : break;
544 :
545 : case s_req_schema_slash:
546 0 : if (ch == '/') {
547 0 : return s_req_schema_slash_slash;
548 : }
549 :
550 0 : break;
551 :
552 : case s_req_schema_slash_slash:
553 0 : if (ch == '/') {
554 0 : return s_req_server_start;
555 : }
556 :
557 0 : break;
558 :
559 : case s_req_server_with_at:
560 0 : if (ch == '@') {
561 0 : return s_dead;
562 : }
563 :
564 : /* FALLTHROUGH */
565 : case s_req_server_start:
566 : case s_req_server:
567 0 : if (ch == '/') {
568 0 : return s_req_path;
569 : }
570 :
571 0 : if (ch == '?') {
572 0 : return s_req_query_string_start;
573 : }
574 :
575 0 : if (ch == '@') {
576 0 : return s_req_server_with_at;
577 : }
578 :
579 0 : if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
580 0 : return s_req_server;
581 : }
582 :
583 0 : break;
584 :
585 : case s_req_path:
586 1512 : if (IS_URL_CHAR(ch)) {
587 1510 : return s;
588 : }
589 :
590 2 : switch (ch) {
591 : case '?':
592 2 : return s_req_query_string_start;
593 :
594 : case '#':
595 0 : return s_req_fragment_start;
596 : }
597 :
598 0 : break;
599 :
600 : case s_req_query_string_start:
601 : case s_req_query_string:
602 18 : if (IS_URL_CHAR(ch)) {
603 18 : return s_req_query_string;
604 : }
605 :
606 0 : switch (ch) {
607 : case '?':
608 : /* allow extra '?' in query string */
609 0 : return s_req_query_string;
610 :
611 : case '#':
612 0 : return s_req_fragment_start;
613 : }
614 :
615 0 : break;
616 :
617 : case s_req_fragment_start:
618 0 : if (IS_URL_CHAR(ch)) {
619 0 : return s_req_fragment;
620 : }
621 :
622 0 : switch (ch) {
623 : case '?':
624 0 : return s_req_fragment;
625 :
626 : case '#':
627 0 : return s;
628 : }
629 :
630 0 : break;
631 :
632 : case s_req_fragment:
633 0 : if (IS_URL_CHAR(ch)) {
634 0 : return s;
635 : }
636 :
637 0 : switch (ch) {
638 : case '?':
639 : case '#':
640 0 : return s;
641 : }
642 :
643 0 : break;
644 :
645 : default:
646 0 : break;
647 : }
648 :
649 : /* We should never fall out of the switch above unless there's an error */
650 0 : return s_dead;
651 : }
652 :
653 144 : size_t http_parser_execute (http_parser *parser,
654 : const http_parser_settings *settings,
655 : const char *data,
656 : size_t len)
657 : {
658 : char c, ch;
659 : int8_t unhex_val;
660 144 : const char *p = data;
661 144 : const char *header_field_mark = 0;
662 144 : const char *header_value_mark = 0;
663 144 : const char *url_mark = 0;
664 144 : const char *body_mark = 0;
665 144 : const char *status_mark = 0;
666 144 : enum state p_state = (enum state) parser->state;
667 144 : const unsigned int lenient = parser->lenient_http_headers;
668 :
669 : /* We're in an error state. Don't bother doing anything. */
670 144 : if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
671 0 : return 0;
672 : }
673 :
674 144 : if (len == 0) {
675 0 : switch (CURRENT_STATE()) {
676 : case s_body_identity_eof:
677 : /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
678 : * we got paused.
679 : */
680 0 : CALLBACK_NOTIFY_NOADVANCE(message_complete);
681 0 : return 0;
682 :
683 : case s_dead:
684 : case s_start_req_or_res:
685 : case s_start_res:
686 : case s_start_req:
687 0 : return 0;
688 :
689 : default:
690 0 : SET_ERRNO(HPE_INVALID_EOF_STATE);
691 0 : return 1;
692 : }
693 : }
694 :
695 :
696 144 : if (CURRENT_STATE() == s_header_field)
697 0 : header_field_mark = data;
698 144 : if (CURRENT_STATE() == s_header_value)
699 0 : header_value_mark = data;
700 144 : switch (CURRENT_STATE()) {
701 : case s_req_path:
702 : case s_req_schema:
703 : case s_req_schema_slash:
704 : case s_req_schema_slash_slash:
705 : case s_req_server_start:
706 : case s_req_server:
707 : case s_req_server_with_at:
708 : case s_req_query_string_start:
709 : case s_req_query_string:
710 : case s_req_fragment_start:
711 : case s_req_fragment:
712 0 : url_mark = data;
713 0 : break;
714 : case s_res_status:
715 0 : status_mark = data;
716 0 : break;
717 : default:
718 144 : break;
719 : }
720 :
721 6827 : for (p=data; p != data + len; p++) {
722 6684 : ch = *p;
723 :
724 6684 : if (PARSING_HEADER(CURRENT_STATE()))
725 6684 : COUNT_HEADER_SIZE(1);
726 :
727 : reexecute:
728 7250 : switch (CURRENT_STATE()) {
729 :
730 : case s_dead:
731 : /* this state is used after a 'Connection: close' message
732 : * the parser will error out if it reads another message
733 : */
734 0 : if (LIKELY(ch == CR || ch == LF))
735 : break;
736 :
737 0 : SET_ERRNO(HPE_CLOSED_CONNECTION);
738 0 : goto error;
739 :
740 : case s_start_req_or_res:
741 : {
742 0 : if (ch == CR || ch == LF)
743 : break;
744 0 : parser->flags = 0;
745 0 : parser->content_length = ULLONG_MAX;
746 :
747 0 : if (ch == 'H') {
748 0 : UPDATE_STATE(s_res_or_resp_H);
749 :
750 0 : CALLBACK_NOTIFY(message_begin);
751 : } else {
752 0 : parser->type = HTTP_REQUEST;
753 0 : UPDATE_STATE(s_start_req);
754 0 : REEXECUTE();
755 : }
756 :
757 0 : break;
758 : }
759 :
760 : case s_res_or_resp_H:
761 0 : if (ch == 'T') {
762 0 : parser->type = HTTP_RESPONSE;
763 0 : UPDATE_STATE(s_res_HT);
764 : } else {
765 0 : if (UNLIKELY(ch != 'E')) {
766 0 : SET_ERRNO(HPE_INVALID_CONSTANT);
767 0 : goto error;
768 : }
769 :
770 0 : parser->type = HTTP_REQUEST;
771 0 : parser->method = HTTP_HEAD;
772 0 : parser->index = 2;
773 0 : UPDATE_STATE(s_req_method);
774 : }
775 0 : break;
776 :
777 : case s_start_res:
778 : {
779 0 : parser->flags = 0;
780 0 : parser->content_length = ULLONG_MAX;
781 :
782 0 : switch (ch) {
783 : case 'H':
784 0 : UPDATE_STATE(s_res_H);
785 0 : break;
786 :
787 : case CR:
788 : case LF:
789 0 : break;
790 :
791 : default:
792 0 : SET_ERRNO(HPE_INVALID_CONSTANT);
793 0 : goto error;
794 : }
795 :
796 0 : CALLBACK_NOTIFY(message_begin);
797 0 : break;
798 : }
799 :
800 : case s_res_H:
801 : STRICT_CHECK(ch != 'T');
802 0 : UPDATE_STATE(s_res_HT);
803 0 : break;
804 :
805 : case s_res_HT:
806 : STRICT_CHECK(ch != 'T');
807 0 : UPDATE_STATE(s_res_HTT);
808 0 : break;
809 :
810 : case s_res_HTT:
811 : STRICT_CHECK(ch != 'P');
812 0 : UPDATE_STATE(s_res_HTTP);
813 0 : break;
814 :
815 : case s_res_HTTP:
816 : STRICT_CHECK(ch != '/');
817 0 : UPDATE_STATE(s_res_http_major);
818 0 : break;
819 :
820 : case s_res_http_major:
821 0 : if (UNLIKELY(!IS_NUM(ch))) {
822 0 : SET_ERRNO(HPE_INVALID_VERSION);
823 0 : goto error;
824 : }
825 :
826 0 : parser->http_major = ch - '0';
827 0 : UPDATE_STATE(s_res_http_dot);
828 0 : break;
829 :
830 : case s_res_http_dot:
831 : {
832 0 : if (UNLIKELY(ch != '.')) {
833 0 : SET_ERRNO(HPE_INVALID_VERSION);
834 0 : goto error;
835 : }
836 :
837 0 : UPDATE_STATE(s_res_http_minor);
838 0 : break;
839 : }
840 :
841 : case s_res_http_minor:
842 0 : if (UNLIKELY(!IS_NUM(ch))) {
843 0 : SET_ERRNO(HPE_INVALID_VERSION);
844 0 : goto error;
845 : }
846 :
847 0 : parser->http_minor = ch - '0';
848 0 : UPDATE_STATE(s_res_http_end);
849 0 : break;
850 :
851 : case s_res_http_end:
852 : {
853 0 : if (UNLIKELY(ch != ' ')) {
854 0 : SET_ERRNO(HPE_INVALID_VERSION);
855 0 : goto error;
856 : }
857 :
858 0 : UPDATE_STATE(s_res_first_status_code);
859 0 : break;
860 : }
861 :
862 : case s_res_first_status_code:
863 : {
864 0 : if (!IS_NUM(ch)) {
865 0 : if (ch == ' ') {
866 0 : break;
867 : }
868 :
869 0 : SET_ERRNO(HPE_INVALID_STATUS);
870 0 : goto error;
871 : }
872 0 : parser->status_code = ch - '0';
873 0 : UPDATE_STATE(s_res_status_code);
874 0 : break;
875 : }
876 :
877 : case s_res_status_code:
878 : {
879 0 : if (!IS_NUM(ch)) {
880 0 : switch (ch) {
881 : case ' ':
882 0 : UPDATE_STATE(s_res_status_start);
883 0 : break;
884 : case CR:
885 : case LF:
886 0 : UPDATE_STATE(s_res_status_start);
887 0 : REEXECUTE();
888 : break;
889 : default:
890 0 : SET_ERRNO(HPE_INVALID_STATUS);
891 0 : goto error;
892 : }
893 0 : break;
894 : }
895 :
896 0 : parser->status_code *= 10;
897 0 : parser->status_code += ch - '0';
898 :
899 0 : if (UNLIKELY(parser->status_code > 999)) {
900 0 : SET_ERRNO(HPE_INVALID_STATUS);
901 0 : goto error;
902 : }
903 :
904 0 : break;
905 : }
906 :
907 : case s_res_status_start:
908 : {
909 0 : MARK(status);
910 0 : UPDATE_STATE(s_res_status);
911 0 : parser->index = 0;
912 :
913 0 : if (ch == CR || ch == LF)
914 : REEXECUTE();
915 :
916 0 : break;
917 : }
918 :
919 : case s_res_status:
920 0 : if (ch == CR) {
921 0 : UPDATE_STATE(s_res_line_almost_done);
922 0 : CALLBACK_DATA(status);
923 0 : break;
924 : }
925 :
926 0 : if (ch == LF) {
927 0 : UPDATE_STATE(s_header_field_start);
928 0 : CALLBACK_DATA(status);
929 0 : break;
930 : }
931 :
932 0 : break;
933 :
934 : case s_res_line_almost_done:
935 : STRICT_CHECK(ch != LF);
936 0 : UPDATE_STATE(s_header_field_start);
937 0 : break;
938 :
939 : case s_start_req:
940 : {
941 144 : if (ch == CR || ch == LF)
942 : break;
943 144 : parser->flags = 0;
944 144 : parser->content_length = ULLONG_MAX;
945 :
946 144 : if (UNLIKELY(!IS_ALPHA(ch))) {
947 0 : SET_ERRNO(HPE_INVALID_METHOD);
948 0 : goto error;
949 : }
950 :
951 144 : parser->method = (enum http_method) 0;
952 144 : parser->index = 1;
953 144 : switch (ch) {
954 0 : case 'A': parser->method = HTTP_ACL; break;
955 0 : case 'B': parser->method = HTTP_BIND; break;
956 0 : case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
957 0 : case 'D': parser->method = HTTP_DELETE; break;
958 144 : case 'G': parser->method = HTTP_GET; break;
959 0 : case 'H': parser->method = HTTP_HEAD; break;
960 0 : case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
961 0 : case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
962 0 : case 'N': parser->method = HTTP_NOTIFY; break;
963 0 : case 'O': parser->method = HTTP_OPTIONS; break;
964 0 : case 'P': parser->method = HTTP_POST;
965 : /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
966 0 : break;
967 0 : case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
968 0 : case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
969 0 : case 'T': parser->method = HTTP_TRACE; break;
970 0 : case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
971 : default:
972 0 : SET_ERRNO(HPE_INVALID_METHOD);
973 0 : goto error;
974 : }
975 144 : UPDATE_STATE(s_req_method);
976 :
977 144 : CALLBACK_NOTIFY(message_begin);
978 :
979 144 : break;
980 : }
981 :
982 : case s_req_method:
983 : {
984 : const char *matcher;
985 432 : if (UNLIKELY(ch == '\0')) {
986 0 : SET_ERRNO(HPE_INVALID_METHOD);
987 0 : goto error;
988 : }
989 :
990 432 : matcher = method_strings[parser->method];
991 432 : if (ch == ' ' && matcher[parser->index] == '\0') {
992 144 : UPDATE_STATE(s_req_spaces_before_url);
993 288 : } else if (ch == matcher[parser->index]) {
994 : ; /* nada */
995 0 : } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
996 :
997 0 : switch (parser->method << 16 | parser->index << 8 | ch) {
998 : #define XX(meth, pos, ch, new_meth) \
999 : case (HTTP_##meth << 16 | pos << 8 | ch): \
1000 : parser->method = HTTP_##new_meth; break;
1001 :
1002 0 : XX(POST, 1, 'U', PUT)
1003 0 : XX(POST, 1, 'A', PATCH)
1004 0 : XX(POST, 1, 'R', PROPFIND)
1005 0 : XX(PUT, 2, 'R', PURGE)
1006 0 : XX(CONNECT, 1, 'H', CHECKOUT)
1007 0 : XX(CONNECT, 2, 'P', COPY)
1008 0 : XX(MKCOL, 1, 'O', MOVE)
1009 0 : XX(MKCOL, 1, 'E', MERGE)
1010 0 : XX(MKCOL, 1, '-', MSEARCH)
1011 0 : XX(MKCOL, 2, 'A', MKACTIVITY)
1012 0 : XX(MKCOL, 3, 'A', MKCALENDAR)
1013 0 : XX(SUBSCRIBE, 1, 'E', SEARCH)
1014 0 : XX(REPORT, 2, 'B', REBIND)
1015 0 : XX(PROPFIND, 4, 'P', PROPPATCH)
1016 0 : XX(LOCK, 1, 'I', LINK)
1017 0 : XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
1018 0 : XX(UNLOCK, 2, 'B', UNBIND)
1019 0 : XX(UNLOCK, 3, 'I', UNLINK)
1020 : #undef XX
1021 : default:
1022 0 : SET_ERRNO(HPE_INVALID_METHOD);
1023 0 : goto error;
1024 : }
1025 0 : } else {
1026 0 : SET_ERRNO(HPE_INVALID_METHOD);
1027 0 : goto error;
1028 : }
1029 :
1030 432 : ++parser->index;
1031 432 : break;
1032 : }
1033 :
1034 : case s_req_spaces_before_url:
1035 : {
1036 144 : if (ch == ' ') break;
1037 :
1038 144 : MARK(url);
1039 144 : if (parser->method == HTTP_CONNECT) {
1040 0 : UPDATE_STATE(s_req_server_start);
1041 : }
1042 :
1043 144 : UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1044 144 : if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1045 0 : SET_ERRNO(HPE_INVALID_URL);
1046 0 : goto error;
1047 : }
1048 :
1049 144 : break;
1050 : }
1051 :
1052 : case s_req_schema:
1053 : case s_req_schema_slash:
1054 : case s_req_schema_slash_slash:
1055 : case s_req_server_start:
1056 : {
1057 0 : switch (ch) {
1058 : /* No whitespace allowed here */
1059 : case ' ':
1060 : case CR:
1061 : case LF:
1062 0 : SET_ERRNO(HPE_INVALID_URL);
1063 0 : goto error;
1064 : default:
1065 0 : UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1066 0 : if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1067 0 : SET_ERRNO(HPE_INVALID_URL);
1068 0 : goto error;
1069 : }
1070 : }
1071 :
1072 0 : break;
1073 : }
1074 :
1075 : case s_req_server:
1076 : case s_req_server_with_at:
1077 : case s_req_path:
1078 : case s_req_query_string_start:
1079 : case s_req_query_string:
1080 : case s_req_fragment_start:
1081 : case s_req_fragment:
1082 : {
1083 1674 : switch (ch) {
1084 : case ' ':
1085 144 : UPDATE_STATE(s_req_http_start);
1086 144 : CALLBACK_DATA(url);
1087 144 : break;
1088 : case CR:
1089 : case LF:
1090 0 : parser->http_major = 0;
1091 0 : parser->http_minor = 9;
1092 0 : UPDATE_STATE((ch == CR) ?
1093 : s_req_line_almost_done :
1094 : s_header_field_start);
1095 0 : CALLBACK_DATA(url);
1096 0 : break;
1097 : default:
1098 1530 : UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1099 1530 : if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1100 0 : SET_ERRNO(HPE_INVALID_URL);
1101 0 : goto error;
1102 : }
1103 : }
1104 1674 : break;
1105 : }
1106 :
1107 : case s_req_http_start:
1108 144 : switch (ch) {
1109 : case 'H':
1110 144 : UPDATE_STATE(s_req_http_H);
1111 144 : break;
1112 : case ' ':
1113 0 : break;
1114 : default:
1115 0 : SET_ERRNO(HPE_INVALID_CONSTANT);
1116 0 : goto error;
1117 : }
1118 144 : break;
1119 :
1120 : case s_req_http_H:
1121 : STRICT_CHECK(ch != 'T');
1122 144 : UPDATE_STATE(s_req_http_HT);
1123 144 : break;
1124 :
1125 : case s_req_http_HT:
1126 : STRICT_CHECK(ch != 'T');
1127 144 : UPDATE_STATE(s_req_http_HTT);
1128 144 : break;
1129 :
1130 : case s_req_http_HTT:
1131 : STRICT_CHECK(ch != 'P');
1132 144 : UPDATE_STATE(s_req_http_HTTP);
1133 144 : break;
1134 :
1135 : case s_req_http_HTTP:
1136 : STRICT_CHECK(ch != '/');
1137 144 : UPDATE_STATE(s_req_http_major);
1138 144 : break;
1139 :
1140 : case s_req_http_major:
1141 144 : if (UNLIKELY(!IS_NUM(ch))) {
1142 1 : SET_ERRNO(HPE_INVALID_VERSION);
1143 1 : goto error;
1144 : }
1145 :
1146 143 : parser->http_major = ch - '0';
1147 143 : UPDATE_STATE(s_req_http_dot);
1148 143 : break;
1149 :
1150 : case s_req_http_dot:
1151 : {
1152 143 : if (UNLIKELY(ch != '.')) {
1153 0 : SET_ERRNO(HPE_INVALID_VERSION);
1154 0 : goto error;
1155 : }
1156 :
1157 143 : UPDATE_STATE(s_req_http_minor);
1158 143 : break;
1159 : }
1160 :
1161 : case s_req_http_minor:
1162 143 : if (UNLIKELY(!IS_NUM(ch))) {
1163 0 : SET_ERRNO(HPE_INVALID_VERSION);
1164 0 : goto error;
1165 : }
1166 :
1167 143 : parser->http_minor = ch - '0';
1168 143 : UPDATE_STATE(s_req_http_end);
1169 143 : break;
1170 :
1171 : case s_req_http_end:
1172 : {
1173 143 : if (ch == CR) {
1174 143 : UPDATE_STATE(s_req_line_almost_done);
1175 143 : break;
1176 : }
1177 :
1178 0 : if (ch == LF) {
1179 0 : UPDATE_STATE(s_header_field_start);
1180 0 : break;
1181 : }
1182 :
1183 0 : SET_ERRNO(HPE_INVALID_VERSION);
1184 0 : goto error;
1185 : break;
1186 : }
1187 :
1188 : /* end of request line */
1189 : case s_req_line_almost_done:
1190 : {
1191 143 : if (UNLIKELY(ch != LF)) {
1192 0 : SET_ERRNO(HPE_LF_EXPECTED);
1193 0 : goto error;
1194 : }
1195 :
1196 143 : UPDATE_STATE(s_header_field_start);
1197 143 : break;
1198 : }
1199 :
1200 : case s_header_field_start:
1201 : {
1202 569 : if (ch == CR) {
1203 140 : UPDATE_STATE(s_headers_almost_done);
1204 140 : break;
1205 : }
1206 :
1207 429 : if (ch == LF) {
1208 : /* they might be just sending \n instead of \r\n so this would be
1209 : * the second \n to denote the end of headers*/
1210 0 : UPDATE_STATE(s_headers_almost_done);
1211 0 : REEXECUTE();
1212 : }
1213 :
1214 429 : c = TOKEN(ch);
1215 :
1216 429 : if (UNLIKELY(!c)) {
1217 0 : SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1218 0 : goto error;
1219 : }
1220 :
1221 429 : MARK(header_field);
1222 :
1223 429 : parser->index = 0;
1224 429 : UPDATE_STATE(s_header_field);
1225 :
1226 429 : switch (c) {
1227 : case 'c':
1228 143 : parser->header_state = h_C;
1229 143 : break;
1230 :
1231 : case 'p':
1232 0 : parser->header_state = h_matching_proxy_connection;
1233 0 : break;
1234 :
1235 : case 't':
1236 0 : parser->header_state = h_matching_transfer_encoding;
1237 0 : break;
1238 :
1239 : case 'u':
1240 143 : parser->header_state = h_matching_upgrade;
1241 143 : break;
1242 :
1243 : default:
1244 143 : parser->header_state = h_general;
1245 143 : break;
1246 : }
1247 429 : break;
1248 : }
1249 :
1250 : case s_header_field:
1251 : {
1252 429 : const char* start = p;
1253 3432 : for (; p != data + len; p++) {
1254 3432 : ch = *p;
1255 3432 : c = TOKEN(ch);
1256 :
1257 3432 : if (!c)
1258 429 : break;
1259 :
1260 3003 : switch (parser->header_state) {
1261 : case h_general:
1262 1573 : break;
1263 :
1264 : case h_C:
1265 143 : parser->index++;
1266 143 : parser->header_state = (c == 'o' ? h_CO : h_general);
1267 143 : break;
1268 :
1269 : case h_CO:
1270 143 : parser->index++;
1271 143 : parser->header_state = (c == 'n' ? h_CON : h_general);
1272 143 : break;
1273 :
1274 : case h_CON:
1275 143 : parser->index++;
1276 143 : switch (c) {
1277 : case 'n':
1278 143 : parser->header_state = h_matching_connection;
1279 143 : break;
1280 : case 't':
1281 0 : parser->header_state = h_matching_content_length;
1282 0 : break;
1283 : default:
1284 0 : parser->header_state = h_general;
1285 0 : break;
1286 : }
1287 143 : break;
1288 :
1289 : /* connection */
1290 :
1291 : case h_matching_connection:
1292 858 : parser->index++;
1293 858 : if (parser->index > sizeof(CONNECTION)-1
1294 858 : || c != CONNECTION[parser->index]) {
1295 0 : parser->header_state = h_general;
1296 858 : } else if (parser->index == sizeof(CONNECTION)-2) {
1297 143 : parser->header_state = h_connection;
1298 : }
1299 858 : break;
1300 :
1301 : /* proxy-connection */
1302 :
1303 : case h_matching_proxy_connection:
1304 0 : parser->index++;
1305 0 : if (parser->index > sizeof(PROXY_CONNECTION)-1
1306 0 : || c != PROXY_CONNECTION[parser->index]) {
1307 0 : parser->header_state = h_general;
1308 0 : } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1309 0 : parser->header_state = h_connection;
1310 : }
1311 0 : break;
1312 :
1313 : /* content-length */
1314 :
1315 : case h_matching_content_length:
1316 0 : parser->index++;
1317 0 : if (parser->index > sizeof(CONTENT_LENGTH)-1
1318 0 : || c != CONTENT_LENGTH[parser->index]) {
1319 0 : parser->header_state = h_general;
1320 0 : } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1321 0 : parser->header_state = h_content_length;
1322 : }
1323 0 : break;
1324 :
1325 : /* transfer-encoding */
1326 :
1327 : case h_matching_transfer_encoding:
1328 0 : parser->index++;
1329 0 : if (parser->index > sizeof(TRANSFER_ENCODING)-1
1330 0 : || c != TRANSFER_ENCODING[parser->index]) {
1331 0 : parser->header_state = h_general;
1332 0 : } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1333 0 : parser->header_state = h_transfer_encoding;
1334 : }
1335 0 : break;
1336 :
1337 : /* upgrade */
1338 :
1339 : case h_matching_upgrade:
1340 143 : parser->index++;
1341 143 : if (parser->index > sizeof(UPGRADE)-1
1342 143 : || c != UPGRADE[parser->index]) {
1343 143 : parser->header_state = h_general;
1344 0 : } else if (parser->index == sizeof(UPGRADE)-2) {
1345 0 : parser->header_state = h_upgrade;
1346 : }
1347 143 : break;
1348 :
1349 : case h_connection:
1350 : case h_content_length:
1351 : case h_transfer_encoding:
1352 : case h_upgrade:
1353 0 : if (ch != ' ') parser->header_state = h_general;
1354 0 : break;
1355 :
1356 : default:
1357 0 : assert(0 && "Unknown header_state");
1358 : break;
1359 : }
1360 : }
1361 :
1362 429 : COUNT_HEADER_SIZE(p - start);
1363 :
1364 429 : if (p == data + len) {
1365 0 : --p;
1366 0 : break;
1367 : }
1368 :
1369 429 : if (ch == ':') {
1370 429 : UPDATE_STATE(s_header_value_discard_ws);
1371 429 : CALLBACK_DATA(header_field);
1372 429 : break;
1373 : }
1374 :
1375 0 : SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1376 0 : goto error;
1377 : }
1378 :
1379 : case s_header_value_discard_ws:
1380 858 : if (ch == ' ' || ch == '\t') break;
1381 :
1382 429 : if (ch == CR) {
1383 0 : UPDATE_STATE(s_header_value_discard_ws_almost_done);
1384 0 : break;
1385 : }
1386 :
1387 429 : if (ch == LF) {
1388 0 : UPDATE_STATE(s_header_value_discard_lws);
1389 0 : break;
1390 : }
1391 :
1392 : /* FALLTHROUGH */
1393 :
1394 : case s_header_value_start:
1395 : {
1396 429 : MARK(header_value);
1397 :
1398 429 : UPDATE_STATE(s_header_value);
1399 429 : parser->index = 0;
1400 :
1401 429 : c = LOWER(ch);
1402 :
1403 429 : switch (parser->header_state) {
1404 : case h_upgrade:
1405 0 : parser->flags |= F_UPGRADE;
1406 0 : parser->header_state = h_general;
1407 0 : break;
1408 :
1409 : case h_transfer_encoding:
1410 : /* looking for 'Transfer-Encoding: chunked' */
1411 0 : if ('c' == c) {
1412 0 : parser->header_state = h_matching_transfer_encoding_chunked;
1413 : } else {
1414 0 : parser->header_state = h_general;
1415 : }
1416 0 : break;
1417 :
1418 : case h_content_length:
1419 0 : if (UNLIKELY(!IS_NUM(ch))) {
1420 0 : SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1421 0 : goto error;
1422 : }
1423 :
1424 0 : if (parser->flags & F_CONTENTLENGTH) {
1425 0 : SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1426 0 : goto error;
1427 : }
1428 :
1429 0 : parser->flags |= F_CONTENTLENGTH;
1430 0 : parser->content_length = ch - '0';
1431 0 : break;
1432 :
1433 : case h_connection:
1434 : /* looking for 'Connection: keep-alive' */
1435 143 : if (c == 'k') {
1436 0 : parser->header_state = h_matching_connection_keep_alive;
1437 : /* looking for 'Connection: close' */
1438 143 : } else if (c == 'c') {
1439 143 : parser->header_state = h_matching_connection_close;
1440 0 : } else if (c == 'u') {
1441 0 : parser->header_state = h_matching_connection_upgrade;
1442 : } else {
1443 0 : parser->header_state = h_matching_connection_token;
1444 : }
1445 143 : break;
1446 :
1447 : /* Multi-value `Connection` header */
1448 : case h_matching_connection_token_start:
1449 0 : break;
1450 :
1451 : default:
1452 286 : parser->header_state = h_general;
1453 286 : break;
1454 : }
1455 429 : break;
1456 : }
1457 :
1458 : case s_header_value:
1459 : {
1460 429 : const char* start = p;
1461 429 : enum header_states h_state = (enum header_states) parser->header_state;
1462 1287 : for (; p != data + len; p++) {
1463 1287 : ch = *p;
1464 1287 : if (ch == CR) {
1465 429 : UPDATE_STATE(s_header_almost_done);
1466 429 : parser->header_state = h_state;
1467 429 : CALLBACK_DATA(header_value);
1468 429 : break;
1469 : }
1470 :
1471 858 : if (ch == LF) {
1472 0 : UPDATE_STATE(s_header_almost_done);
1473 0 : COUNT_HEADER_SIZE(p - start);
1474 0 : parser->header_state = h_state;
1475 0 : CALLBACK_DATA_NOADVANCE(header_value);
1476 0 : REEXECUTE();
1477 : }
1478 :
1479 858 : if (!lenient && !IS_HEADER_CHAR(ch)) {
1480 0 : SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1481 0 : goto error;
1482 : }
1483 :
1484 858 : c = LOWER(ch);
1485 :
1486 858 : switch (h_state) {
1487 : case h_general:
1488 : {
1489 : const char* p_cr;
1490 : const char* p_lf;
1491 286 : size_t limit = data + len - p;
1492 :
1493 286 : limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1494 :
1495 286 : p_cr = (const char*) memchr(p, CR, limit);
1496 286 : p_lf = (const char*) memchr(p, LF, limit);
1497 286 : if (p_cr != NULL) {
1498 286 : if (p_lf != NULL && p_cr >= p_lf)
1499 0 : p = p_lf;
1500 : else
1501 286 : p = p_cr;
1502 0 : } else if (UNLIKELY(p_lf != NULL)) {
1503 0 : p = p_lf;
1504 : } else {
1505 0 : p = data + len;
1506 : }
1507 286 : --p;
1508 :
1509 286 : break;
1510 : }
1511 :
1512 : case h_connection:
1513 : case h_transfer_encoding:
1514 0 : assert(0 && "Shouldn't get here.");
1515 : break;
1516 :
1517 : case h_content_length:
1518 : {
1519 : uint64_t t;
1520 :
1521 0 : if (ch == ' ') break;
1522 :
1523 0 : if (UNLIKELY(!IS_NUM(ch))) {
1524 0 : SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1525 0 : parser->header_state = h_state;
1526 0 : goto error;
1527 : }
1528 :
1529 0 : t = parser->content_length;
1530 0 : t *= 10;
1531 0 : t += ch - '0';
1532 :
1533 : /* Overflow? Test against a conservative limit for simplicity. */
1534 0 : if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1535 0 : SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1536 0 : parser->header_state = h_state;
1537 0 : goto error;
1538 : }
1539 :
1540 0 : parser->content_length = t;
1541 0 : break;
1542 : }
1543 :
1544 : /* Transfer-Encoding: chunked */
1545 : case h_matching_transfer_encoding_chunked:
1546 0 : parser->index++;
1547 0 : if (parser->index > sizeof(CHUNKED)-1
1548 0 : || c != CHUNKED[parser->index]) {
1549 0 : h_state = h_general;
1550 0 : } else if (parser->index == sizeof(CHUNKED)-2) {
1551 0 : h_state = h_transfer_encoding_chunked;
1552 : }
1553 0 : break;
1554 :
1555 : case h_matching_connection_token_start:
1556 : /* looking for 'Connection: keep-alive' */
1557 0 : if (c == 'k') {
1558 0 : h_state = h_matching_connection_keep_alive;
1559 : /* looking for 'Connection: close' */
1560 0 : } else if (c == 'c') {
1561 0 : h_state = h_matching_connection_close;
1562 0 : } else if (c == 'u') {
1563 0 : h_state = h_matching_connection_upgrade;
1564 0 : } else if (STRICT_TOKEN(c)) {
1565 0 : h_state = h_matching_connection_token;
1566 0 : } else if (c == ' ' || c == '\t') {
1567 : /* Skip lws */
1568 : } else {
1569 0 : h_state = h_general;
1570 : }
1571 0 : break;
1572 :
1573 : /* looking for 'Connection: keep-alive' */
1574 : case h_matching_connection_keep_alive:
1575 0 : parser->index++;
1576 0 : if (parser->index > sizeof(KEEP_ALIVE)-1
1577 0 : || c != KEEP_ALIVE[parser->index]) {
1578 0 : h_state = h_matching_connection_token;
1579 0 : } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1580 0 : h_state = h_connection_keep_alive;
1581 : }
1582 0 : break;
1583 :
1584 : /* looking for 'Connection: close' */
1585 : case h_matching_connection_close:
1586 572 : parser->index++;
1587 572 : if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1588 0 : h_state = h_matching_connection_token;
1589 572 : } else if (parser->index == sizeof(CLOSE)-2) {
1590 143 : h_state = h_connection_close;
1591 : }
1592 572 : break;
1593 :
1594 : /* looking for 'Connection: upgrade' */
1595 : case h_matching_connection_upgrade:
1596 0 : parser->index++;
1597 0 : if (parser->index > sizeof(UPGRADE) - 1 ||
1598 0 : c != UPGRADE[parser->index]) {
1599 0 : h_state = h_matching_connection_token;
1600 0 : } else if (parser->index == sizeof(UPGRADE)-2) {
1601 0 : h_state = h_connection_upgrade;
1602 : }
1603 0 : break;
1604 :
1605 : case h_matching_connection_token:
1606 0 : if (ch == ',') {
1607 0 : h_state = h_matching_connection_token_start;
1608 0 : parser->index = 0;
1609 : }
1610 0 : break;
1611 :
1612 : case h_transfer_encoding_chunked:
1613 0 : if (ch != ' ') h_state = h_general;
1614 0 : break;
1615 :
1616 : case h_connection_keep_alive:
1617 : case h_connection_close:
1618 : case h_connection_upgrade:
1619 0 : if (ch == ',') {
1620 0 : if (h_state == h_connection_keep_alive) {
1621 0 : parser->flags |= F_CONNECTION_KEEP_ALIVE;
1622 0 : } else if (h_state == h_connection_close) {
1623 0 : parser->flags |= F_CONNECTION_CLOSE;
1624 0 : } else if (h_state == h_connection_upgrade) {
1625 0 : parser->flags |= F_CONNECTION_UPGRADE;
1626 : }
1627 0 : h_state = h_matching_connection_token_start;
1628 0 : parser->index = 0;
1629 0 : } else if (ch != ' ') {
1630 0 : h_state = h_matching_connection_token;
1631 : }
1632 0 : break;
1633 :
1634 : default:
1635 0 : UPDATE_STATE(s_header_value);
1636 0 : h_state = h_general;
1637 0 : break;
1638 : }
1639 : }
1640 429 : parser->header_state = h_state;
1641 :
1642 429 : COUNT_HEADER_SIZE(p - start);
1643 :
1644 429 : if (p == data + len)
1645 0 : --p;
1646 429 : break;
1647 : }
1648 :
1649 : case s_header_almost_done:
1650 : {
1651 429 : if (UNLIKELY(ch != LF)) {
1652 0 : SET_ERRNO(HPE_LF_EXPECTED);
1653 0 : goto error;
1654 : }
1655 :
1656 429 : UPDATE_STATE(s_header_value_lws);
1657 429 : break;
1658 : }
1659 :
1660 : case s_header_value_lws:
1661 : {
1662 426 : if (ch == ' ' || ch == '\t') {
1663 0 : UPDATE_STATE(s_header_value_start);
1664 0 : REEXECUTE();
1665 : }
1666 :
1667 : /* finished the header */
1668 426 : switch (parser->header_state) {
1669 : case h_connection_keep_alive:
1670 0 : parser->flags |= F_CONNECTION_KEEP_ALIVE;
1671 0 : break;
1672 : case h_connection_close:
1673 143 : parser->flags |= F_CONNECTION_CLOSE;
1674 143 : break;
1675 : case h_transfer_encoding_chunked:
1676 0 : parser->flags |= F_CHUNKED;
1677 0 : break;
1678 : case h_connection_upgrade:
1679 0 : parser->flags |= F_CONNECTION_UPGRADE;
1680 0 : break;
1681 : default:
1682 283 : break;
1683 : }
1684 :
1685 426 : UPDATE_STATE(s_header_field_start);
1686 426 : REEXECUTE();
1687 : }
1688 :
1689 : case s_header_value_discard_ws_almost_done:
1690 : {
1691 : STRICT_CHECK(ch != LF);
1692 0 : UPDATE_STATE(s_header_value_discard_lws);
1693 0 : break;
1694 : }
1695 :
1696 : case s_header_value_discard_lws:
1697 : {
1698 0 : if (ch == ' ' || ch == '\t') {
1699 0 : UPDATE_STATE(s_header_value_discard_ws);
1700 0 : break;
1701 : } else {
1702 0 : switch (parser->header_state) {
1703 : case h_connection_keep_alive:
1704 0 : parser->flags |= F_CONNECTION_KEEP_ALIVE;
1705 0 : break;
1706 : case h_connection_close:
1707 0 : parser->flags |= F_CONNECTION_CLOSE;
1708 0 : break;
1709 : case h_connection_upgrade:
1710 0 : parser->flags |= F_CONNECTION_UPGRADE;
1711 0 : break;
1712 : case h_transfer_encoding_chunked:
1713 0 : parser->flags |= F_CHUNKED;
1714 0 : break;
1715 : default:
1716 0 : break;
1717 : }
1718 :
1719 : /* header value was empty */
1720 0 : MARK(header_value);
1721 0 : UPDATE_STATE(s_header_field_start);
1722 0 : CALLBACK_DATA_NOADVANCE(header_value);
1723 0 : REEXECUTE();
1724 : }
1725 : }
1726 :
1727 : case s_headers_almost_done:
1728 : {
1729 : STRICT_CHECK(ch != LF);
1730 :
1731 140 : if (parser->flags & F_TRAILING) {
1732 : /* End of a chunked request */
1733 0 : UPDATE_STATE(s_message_done);
1734 0 : CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1735 0 : REEXECUTE();
1736 : }
1737 :
1738 : /* Cannot use chunked encoding and a content-length header together
1739 : per the HTTP specification. */
1740 140 : if ((parser->flags & F_CHUNKED) &&
1741 0 : (parser->flags & F_CONTENTLENGTH)) {
1742 0 : SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1743 0 : goto error;
1744 : }
1745 :
1746 140 : UPDATE_STATE(s_headers_done);
1747 :
1748 : /* Set this here so that on_headers_complete() callbacks can see it */
1749 140 : if ((parser->flags & F_UPGRADE) &&
1750 0 : (parser->flags & F_CONNECTION_UPGRADE)) {
1751 : /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1752 : * mandatory only when it is a 101 Switching Protocols response,
1753 : * otherwise it is purely informational, to announce support.
1754 : */
1755 0 : parser->upgrade =
1756 0 : (parser->type == HTTP_REQUEST || parser->status_code == 101);
1757 : } else {
1758 140 : parser->upgrade = (parser->method == HTTP_CONNECT);
1759 : }
1760 :
1761 : /* Here we call the headers_complete callback. This is somewhat
1762 : * different than other callbacks because if the user returns 1, we
1763 : * will interpret that as saying that this message has no body. This
1764 : * is needed for the annoying case of recieving a response to a HEAD
1765 : * request.
1766 : *
1767 : * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1768 : * we have to simulate it by handling a change in errno below.
1769 : */
1770 140 : if (settings->on_headers_complete) {
1771 140 : switch (settings->on_headers_complete(parser)) {
1772 : case 0:
1773 140 : break;
1774 :
1775 : case 2:
1776 0 : parser->upgrade = 1;
1777 :
1778 : /* FALLTHROUGH */
1779 : case 1:
1780 0 : parser->flags |= F_SKIPBODY;
1781 0 : break;
1782 :
1783 : default:
1784 0 : SET_ERRNO(HPE_CB_headers_complete);
1785 0 : RETURN(p - data); /* Error */
1786 : }
1787 : }
1788 :
1789 140 : if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1790 0 : RETURN(p - data);
1791 : }
1792 :
1793 140 : REEXECUTE();
1794 : }
1795 :
1796 : case s_headers_done:
1797 : {
1798 : int hasBody;
1799 : STRICT_CHECK(ch != LF);
1800 :
1801 140 : parser->nread = 0;
1802 :
1803 280 : hasBody = parser->flags & F_CHUNKED ||
1804 280 : (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1805 140 : if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1806 0 : (parser->flags & F_SKIPBODY) || !hasBody)) {
1807 : /* Exit, the rest of the message is in a different protocol. */
1808 0 : UPDATE_STATE(NEW_MESSAGE());
1809 0 : CALLBACK_NOTIFY(message_complete);
1810 0 : RETURN((p - data) + 1);
1811 : }
1812 :
1813 140 : if (parser->flags & F_SKIPBODY) {
1814 0 : UPDATE_STATE(NEW_MESSAGE());
1815 0 : CALLBACK_NOTIFY(message_complete);
1816 140 : } else if (parser->flags & F_CHUNKED) {
1817 : /* chunked encoding - ignore Content-Length header */
1818 0 : UPDATE_STATE(s_chunk_size_start);
1819 : } else {
1820 140 : if (parser->content_length == 0) {
1821 : /* Content-Length header given but zero: Content-Length: 0\r\n */
1822 0 : UPDATE_STATE(NEW_MESSAGE());
1823 0 : CALLBACK_NOTIFY(message_complete);
1824 140 : } else if (parser->content_length != ULLONG_MAX) {
1825 : /* Content-Length header given and non-zero */
1826 0 : UPDATE_STATE(s_body_identity);
1827 : } else {
1828 140 : if (!http_message_needs_eof(parser)) {
1829 : /* Assume content-length 0 - read the next */
1830 140 : UPDATE_STATE(NEW_MESSAGE());
1831 140 : CALLBACK_NOTIFY(message_complete);
1832 : } else {
1833 : /* Read body until EOF */
1834 0 : UPDATE_STATE(s_body_identity_eof);
1835 : }
1836 : }
1837 : }
1838 :
1839 140 : break;
1840 : }
1841 :
1842 : case s_body_identity:
1843 : {
1844 0 : uint64_t to_read = MIN(parser->content_length,
1845 : (uint64_t) ((data + len) - p));
1846 :
1847 0 : assert(parser->content_length != 0
1848 : && parser->content_length != ULLONG_MAX);
1849 :
1850 : /* The difference between advancing content_length and p is because
1851 : * the latter will automaticaly advance on the next loop iteration.
1852 : * Further, if content_length ends up at 0, we want to see the last
1853 : * byte again for our message complete callback.
1854 : */
1855 0 : MARK(body);
1856 0 : parser->content_length -= to_read;
1857 0 : p += to_read - 1;
1858 :
1859 0 : if (parser->content_length == 0) {
1860 0 : UPDATE_STATE(s_message_done);
1861 :
1862 : /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1863 : *
1864 : * The alternative to doing this is to wait for the next byte to
1865 : * trigger the data callback, just as in every other case. The
1866 : * problem with this is that this makes it difficult for the test
1867 : * harness to distinguish between complete-on-EOF and
1868 : * complete-on-length. It's not clear that this distinction is
1869 : * important for applications, but let's keep it for now.
1870 : */
1871 0 : CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1872 0 : REEXECUTE();
1873 : }
1874 :
1875 0 : break;
1876 : }
1877 :
1878 : /* read until EOF */
1879 : case s_body_identity_eof:
1880 0 : MARK(body);
1881 0 : p = data + len - 1;
1882 :
1883 0 : break;
1884 :
1885 : case s_message_done:
1886 0 : UPDATE_STATE(NEW_MESSAGE());
1887 0 : CALLBACK_NOTIFY(message_complete);
1888 0 : if (parser->upgrade) {
1889 : /* Exit, the rest of the message is in a different protocol. */
1890 0 : RETURN((p - data) + 1);
1891 : }
1892 0 : break;
1893 :
1894 : case s_chunk_size_start:
1895 : {
1896 0 : assert(parser->nread == 1);
1897 0 : assert(parser->flags & F_CHUNKED);
1898 :
1899 0 : unhex_val = unhex[(unsigned char)ch];
1900 0 : if (UNLIKELY(unhex_val == -1)) {
1901 0 : SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1902 0 : goto error;
1903 : }
1904 :
1905 0 : parser->content_length = unhex_val;
1906 0 : UPDATE_STATE(s_chunk_size);
1907 0 : break;
1908 : }
1909 :
1910 : case s_chunk_size:
1911 : {
1912 : uint64_t t;
1913 :
1914 0 : assert(parser->flags & F_CHUNKED);
1915 :
1916 0 : if (ch == CR) {
1917 0 : UPDATE_STATE(s_chunk_size_almost_done);
1918 0 : break;
1919 : }
1920 :
1921 0 : unhex_val = unhex[(unsigned char)ch];
1922 :
1923 0 : if (unhex_val == -1) {
1924 0 : if (ch == ';' || ch == ' ') {
1925 0 : UPDATE_STATE(s_chunk_parameters);
1926 0 : break;
1927 : }
1928 :
1929 0 : SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1930 0 : goto error;
1931 : }
1932 :
1933 0 : t = parser->content_length;
1934 0 : t *= 16;
1935 0 : t += unhex_val;
1936 :
1937 : /* Overflow? Test against a conservative limit for simplicity. */
1938 0 : if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
1939 0 : SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1940 0 : goto error;
1941 : }
1942 :
1943 0 : parser->content_length = t;
1944 0 : break;
1945 : }
1946 :
1947 : case s_chunk_parameters:
1948 : {
1949 0 : assert(parser->flags & F_CHUNKED);
1950 : /* just ignore this shit. TODO check for overflow */
1951 0 : if (ch == CR) {
1952 0 : UPDATE_STATE(s_chunk_size_almost_done);
1953 0 : break;
1954 : }
1955 0 : break;
1956 : }
1957 :
1958 : case s_chunk_size_almost_done:
1959 : {
1960 0 : assert(parser->flags & F_CHUNKED);
1961 : STRICT_CHECK(ch != LF);
1962 :
1963 0 : parser->nread = 0;
1964 :
1965 0 : if (parser->content_length == 0) {
1966 0 : parser->flags |= F_TRAILING;
1967 0 : UPDATE_STATE(s_header_field_start);
1968 : } else {
1969 0 : UPDATE_STATE(s_chunk_data);
1970 : }
1971 0 : CALLBACK_NOTIFY(chunk_header);
1972 0 : break;
1973 : }
1974 :
1975 : case s_chunk_data:
1976 : {
1977 0 : uint64_t to_read = MIN(parser->content_length,
1978 : (uint64_t) ((data + len) - p));
1979 :
1980 0 : assert(parser->flags & F_CHUNKED);
1981 0 : assert(parser->content_length != 0
1982 : && parser->content_length != ULLONG_MAX);
1983 :
1984 : /* See the explanation in s_body_identity for why the content
1985 : * length and data pointers are managed this way.
1986 : */
1987 0 : MARK(body);
1988 0 : parser->content_length -= to_read;
1989 0 : p += to_read - 1;
1990 :
1991 0 : if (parser->content_length == 0) {
1992 0 : UPDATE_STATE(s_chunk_data_almost_done);
1993 : }
1994 :
1995 0 : break;
1996 : }
1997 :
1998 : case s_chunk_data_almost_done:
1999 0 : assert(parser->flags & F_CHUNKED);
2000 0 : assert(parser->content_length == 0);
2001 : STRICT_CHECK(ch != CR);
2002 0 : UPDATE_STATE(s_chunk_data_done);
2003 0 : CALLBACK_DATA(body);
2004 0 : break;
2005 :
2006 : case s_chunk_data_done:
2007 0 : assert(parser->flags & F_CHUNKED);
2008 : STRICT_CHECK(ch != LF);
2009 0 : parser->nread = 0;
2010 0 : UPDATE_STATE(s_chunk_size_start);
2011 0 : CALLBACK_NOTIFY(chunk_complete);
2012 0 : break;
2013 :
2014 : default:
2015 0 : assert(0 && "unhandled state");
2016 : SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2017 : goto error;
2018 : }
2019 : }
2020 :
2021 : /* Run callbacks for any marks that we have leftover after we ran our of
2022 : * bytes. There should be at most one of these set, so it's OK to invoke
2023 : * them in series (unset marks will not result in callbacks).
2024 : *
2025 : * We use the NOADVANCE() variety of callbacks here because 'p' has already
2026 : * overflowed 'data' and this allows us to correct for the off-by-one that
2027 : * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2028 : * value that's in-bounds).
2029 : */
2030 :
2031 143 : assert(((header_field_mark ? 1 : 0) +
2032 : (header_value_mark ? 1 : 0) +
2033 : (url_mark ? 1 : 0) +
2034 : (body_mark ? 1 : 0) +
2035 : (status_mark ? 1 : 0)) <= 1);
2036 :
2037 143 : CALLBACK_DATA_NOADVANCE(header_field);
2038 143 : CALLBACK_DATA_NOADVANCE(header_value);
2039 143 : CALLBACK_DATA_NOADVANCE(url);
2040 143 : CALLBACK_DATA_NOADVANCE(body);
2041 143 : CALLBACK_DATA_NOADVANCE(status);
2042 :
2043 143 : RETURN(len);
2044 :
2045 : error:
2046 1 : if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2047 0 : SET_ERRNO(HPE_UNKNOWN);
2048 : }
2049 :
2050 1 : RETURN(p - data);
2051 : }
2052 :
2053 :
2054 : /* Does the parser need to see an EOF to find the end of the message? */
2055 : int
2056 140 : http_message_needs_eof (const http_parser *parser)
2057 : {
2058 140 : if (parser->type == HTTP_REQUEST) {
2059 140 : return 0;
2060 : }
2061 :
2062 : /* See RFC 2616 section 4.4 */
2063 0 : if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2064 0 : parser->status_code == 204 || /* No Content */
2065 0 : parser->status_code == 304 || /* Not Modified */
2066 0 : parser->flags & F_SKIPBODY) { /* response to a HEAD request */
2067 0 : return 0;
2068 : }
2069 :
2070 0 : if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2071 0 : return 0;
2072 : }
2073 :
2074 0 : return 1;
2075 : }
2076 :
2077 :
2078 : int
2079 0 : http_should_keep_alive (const http_parser *parser)
2080 : {
2081 0 : if (parser->http_major > 0 && parser->http_minor > 0) {
2082 : /* HTTP/1.1 */
2083 0 : if (parser->flags & F_CONNECTION_CLOSE) {
2084 0 : return 0;
2085 : }
2086 : } else {
2087 : /* HTTP/1.0 or earlier */
2088 0 : if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2089 0 : return 0;
2090 : }
2091 : }
2092 :
2093 0 : return !http_message_needs_eof(parser);
2094 : }
2095 :
2096 :
2097 : const char *
2098 0 : http_method_str (enum http_method m)
2099 : {
2100 0 : return ELEM_AT(method_strings, m, "<unknown>");
2101 : }
2102 :
2103 :
2104 : void
2105 144 : http_parser_init (http_parser *parser, enum http_parser_type t)
2106 : {
2107 144 : void *data = parser->data; /* preserve application data */
2108 144 : memset(parser, 0, sizeof(*parser));
2109 144 : parser->data = data;
2110 144 : parser->type = t;
2111 144 : parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2112 144 : parser->http_errno = HPE_OK;
2113 144 : }
2114 :
2115 : void
2116 0 : http_parser_settings_init(http_parser_settings *settings)
2117 : {
2118 0 : memset(settings, 0, sizeof(*settings));
2119 0 : }
2120 :
2121 : const char *
2122 0 : http_errno_name(enum http_errno err) {
2123 0 : assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2124 0 : return http_strerror_tab[err].name;
2125 : }
2126 :
2127 : const char *
2128 0 : http_errno_description(enum http_errno err) {
2129 0 : assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2130 0 : return http_strerror_tab[err].description;
2131 : }
2132 :
2133 : static enum http_host_state
2134 0 : http_parse_host_char(enum http_host_state s, const char ch) {
2135 0 : switch(s) {
2136 : case s_http_userinfo:
2137 : case s_http_userinfo_start:
2138 0 : if (ch == '@') {
2139 0 : return s_http_host_start;
2140 : }
2141 :
2142 0 : if (IS_USERINFO_CHAR(ch)) {
2143 0 : return s_http_userinfo;
2144 : }
2145 0 : break;
2146 :
2147 : case s_http_host_start:
2148 0 : if (ch == '[') {
2149 0 : return s_http_host_v6_start;
2150 : }
2151 :
2152 0 : if (IS_HOST_CHAR(ch)) {
2153 0 : return s_http_host;
2154 : }
2155 :
2156 0 : break;
2157 :
2158 : case s_http_host:
2159 0 : if (IS_HOST_CHAR(ch)) {
2160 0 : return s_http_host;
2161 : }
2162 :
2163 : /* FALLTHROUGH */
2164 : case s_http_host_v6_end:
2165 0 : if (ch == ':') {
2166 0 : return s_http_host_port_start;
2167 : }
2168 :
2169 0 : break;
2170 :
2171 : case s_http_host_v6:
2172 0 : if (ch == ']') {
2173 0 : return s_http_host_v6_end;
2174 : }
2175 :
2176 : /* FALLTHROUGH */
2177 : case s_http_host_v6_start:
2178 0 : if (IS_HEX(ch) || ch == ':' || ch == '.') {
2179 0 : return s_http_host_v6;
2180 : }
2181 :
2182 0 : if (s == s_http_host_v6 && ch == '%') {
2183 0 : return s_http_host_v6_zone_start;
2184 : }
2185 0 : break;
2186 :
2187 : case s_http_host_v6_zone:
2188 0 : if (ch == ']') {
2189 0 : return s_http_host_v6_end;
2190 : }
2191 :
2192 : /* FALLTHROUGH */
2193 : case s_http_host_v6_zone_start:
2194 : /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2195 0 : if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2196 : ch == '~') {
2197 0 : return s_http_host_v6_zone;
2198 : }
2199 0 : break;
2200 :
2201 : case s_http_host_port:
2202 : case s_http_host_port_start:
2203 0 : if (IS_NUM(ch)) {
2204 0 : return s_http_host_port;
2205 : }
2206 :
2207 0 : break;
2208 :
2209 : default:
2210 0 : break;
2211 : }
2212 0 : return s_http_host_dead;
2213 : }
2214 :
2215 : static int
2216 0 : http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2217 : enum http_host_state s;
2218 :
2219 : const char *p;
2220 0 : size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2221 :
2222 0 : assert(u->field_set & (1 << UF_HOST));
2223 :
2224 0 : u->field_data[UF_HOST].len = 0;
2225 :
2226 0 : s = found_at ? s_http_userinfo_start : s_http_host_start;
2227 :
2228 0 : for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2229 0 : enum http_host_state new_s = http_parse_host_char(s, *p);
2230 :
2231 0 : if (new_s == s_http_host_dead) {
2232 0 : return 1;
2233 : }
2234 :
2235 0 : switch(new_s) {
2236 : case s_http_host:
2237 0 : if (s != s_http_host) {
2238 0 : u->field_data[UF_HOST].off = p - buf;
2239 : }
2240 0 : u->field_data[UF_HOST].len++;
2241 0 : break;
2242 :
2243 : case s_http_host_v6:
2244 0 : if (s != s_http_host_v6) {
2245 0 : u->field_data[UF_HOST].off = p - buf;
2246 : }
2247 0 : u->field_data[UF_HOST].len++;
2248 0 : break;
2249 :
2250 : case s_http_host_v6_zone_start:
2251 : case s_http_host_v6_zone:
2252 0 : u->field_data[UF_HOST].len++;
2253 0 : break;
2254 :
2255 : case s_http_host_port:
2256 0 : if (s != s_http_host_port) {
2257 0 : u->field_data[UF_PORT].off = p - buf;
2258 0 : u->field_data[UF_PORT].len = 0;
2259 0 : u->field_set |= (1 << UF_PORT);
2260 : }
2261 0 : u->field_data[UF_PORT].len++;
2262 0 : break;
2263 :
2264 : case s_http_userinfo:
2265 0 : if (s != s_http_userinfo) {
2266 0 : u->field_data[UF_USERINFO].off = p - buf ;
2267 0 : u->field_data[UF_USERINFO].len = 0;
2268 0 : u->field_set |= (1 << UF_USERINFO);
2269 : }
2270 0 : u->field_data[UF_USERINFO].len++;
2271 0 : break;
2272 :
2273 : default:
2274 0 : break;
2275 : }
2276 0 : s = new_s;
2277 : }
2278 :
2279 : /* Make sure we don't end somewhere unexpected */
2280 0 : switch (s) {
2281 : case s_http_host_start:
2282 : case s_http_host_v6_start:
2283 : case s_http_host_v6:
2284 : case s_http_host_v6_zone_start:
2285 : case s_http_host_v6_zone:
2286 : case s_http_host_port_start:
2287 : case s_http_userinfo:
2288 : case s_http_userinfo_start:
2289 0 : return 1;
2290 : default:
2291 0 : break;
2292 : }
2293 :
2294 0 : return 0;
2295 : }
2296 :
2297 : void
2298 0 : http_parser_url_init(struct http_parser_url *u) {
2299 0 : memset(u, 0, sizeof(*u));
2300 0 : }
2301 :
2302 : int
2303 0 : http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2304 : struct http_parser_url *u)
2305 : {
2306 : enum state s;
2307 : const char *p;
2308 : enum http_parser_url_fields uf, old_uf;
2309 0 : int found_at = 0;
2310 :
2311 0 : u->port = u->field_set = 0;
2312 0 : s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2313 0 : old_uf = UF_MAX;
2314 :
2315 0 : for (p = buf; p < buf + buflen; p++) {
2316 0 : s = parse_url_char(s, *p);
2317 :
2318 : /* Figure out the next field that we're operating on */
2319 0 : switch (s) {
2320 : case s_dead:
2321 0 : return 1;
2322 :
2323 : /* Skip delimeters */
2324 : case s_req_schema_slash:
2325 : case s_req_schema_slash_slash:
2326 : case s_req_server_start:
2327 : case s_req_query_string_start:
2328 : case s_req_fragment_start:
2329 0 : continue;
2330 :
2331 : case s_req_schema:
2332 0 : uf = UF_SCHEMA;
2333 0 : break;
2334 :
2335 : case s_req_server_with_at:
2336 0 : found_at = 1;
2337 :
2338 : /* FALLTHROUGH */
2339 : case s_req_server:
2340 0 : uf = UF_HOST;
2341 0 : break;
2342 :
2343 : case s_req_path:
2344 0 : uf = UF_PATH;
2345 0 : break;
2346 :
2347 : case s_req_query_string:
2348 0 : uf = UF_QUERY;
2349 0 : break;
2350 :
2351 : case s_req_fragment:
2352 0 : uf = UF_FRAGMENT;
2353 0 : break;
2354 :
2355 : default:
2356 0 : assert(!"Unexpected state");
2357 : return 1;
2358 : }
2359 :
2360 : /* Nothing's changed; soldier on */
2361 0 : if (uf == old_uf) {
2362 0 : u->field_data[uf].len++;
2363 0 : continue;
2364 : }
2365 :
2366 0 : u->field_data[uf].off = p - buf;
2367 0 : u->field_data[uf].len = 1;
2368 :
2369 0 : u->field_set |= (1 << uf);
2370 0 : old_uf = uf;
2371 : }
2372 :
2373 : /* host must be present if there is a schema */
2374 : /* parsing http:///toto will fail */
2375 0 : if ((u->field_set & (1 << UF_SCHEMA)) &&
2376 0 : (u->field_set & (1 << UF_HOST)) == 0) {
2377 0 : return 1;
2378 : }
2379 :
2380 0 : if (u->field_set & (1 << UF_HOST)) {
2381 0 : if (http_parse_host(buf, u, found_at) != 0) {
2382 0 : return 1;
2383 : }
2384 : }
2385 :
2386 : /* CONNECT requests can only contain "hostname:port" */
2387 0 : if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2388 0 : return 1;
2389 : }
2390 :
2391 0 : if (u->field_set & (1 << UF_PORT)) {
2392 : /* Don't bother with endp; we've already validated the string */
2393 0 : unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2394 :
2395 : /* Ports have a max value of 2^16 */
2396 0 : if (v > 0xffff) {
2397 0 : return 1;
2398 : }
2399 :
2400 0 : u->port = (uint16_t) v;
2401 : }
2402 :
2403 0 : return 0;
2404 : }
2405 :
2406 : void
2407 0 : http_parser_pause(http_parser *parser, int paused) {
2408 : /* Users should only be pausing/unpausing a parser that is not in an error
2409 : * state. In non-debug builds, there's not much that we can do about this
2410 : * other than ignore it.
2411 : */
2412 0 : if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2413 0 : HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2414 0 : SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2415 : } else {
2416 0 : assert(0 && "Attempting to pause parser in error state");
2417 : }
2418 0 : }
2419 :
2420 : int
2421 0 : http_body_is_final(const struct http_parser *parser) {
2422 0 : return parser->state == s_message_done;
2423 : }
2424 :
2425 : unsigned long
2426 0 : http_parser_version(void) {
2427 0 : return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2428 : HTTP_PARSER_VERSION_MINOR * 0x00100 |
2429 : HTTP_PARSER_VERSION_PATCH * 0x00001;
2430 : }
|