-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathhttp_parser.ml
More file actions
179 lines (159 loc) · 6.32 KB
/
http_parser.ml
File metadata and controls
179 lines (159 loc) · 6.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
(*
OCaml HTTP - do it yourself (fully OCaml) HTTP daemon
Copyright (C) <2002-2010> Stefano Zacchiroli <zack@cs.unibo.it>
<2010> Arlen Cuss <celtic@sairyx.org>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Library General Public License as
published by the Free Software Foundation, version 2.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
USA
*)
open Printf;;
open Http_common;;
open Http_types;;
open Http_constants;;
let (bindings_sep, binding_sep, pieces_sep, header_sep) =
(Pcre.regexp "&", Pcre.regexp "=", Pcre.regexp " ", Pcre.regexp ":")
let header_RE = Pcre.regexp "([^:]*):(.*)"
let url_decode url = Netencoding.Url.decode ~plus:true url
let split_query_params query =
let bindings = Pcre.split ~rex:bindings_sep query in
match bindings with
| [] -> []
| bindings ->
List.map
(fun binding ->
match Pcre.split ~rex:binding_sep binding with
| [ ""; b ] -> (* '=b' *)
raise (Malformed_query_part (binding, query))
| [ a; b ] -> (* 'a=b' *) (url_decode a, url_decode b)
| [ a ] -> (* 'a=' || 'a' *) (url_decode a, "")
| _ -> raise (Malformed_query_part (binding, query)))
bindings
(** internal, used by generic_input_line *)
exception Line_completed;;
(** given an input channel and a separator
@return a line read from it (like Pervasives.input_line)
line is returned only after reading a separator string; separator string isn't
included in the returned value
TODO what about efficiency?, input is performed char-by-char
*)
let generic_input_line ~sep ~ic =
let sep_len = String.length sep in
if sep_len < 1 then
failwith ("Separator '" ^ sep ^ "' is too short!")
else (* valid separator *)
let line = ref "" in
let sep_pointer = ref 0 in
try
while true do
if !sep_pointer >= String.length sep then (* line completed *)
raise Line_completed
else begin (* incomplete line: need to read more *)
let ch = input_char ic in
if ch = String.get sep !sep_pointer then (* next piece of sep *)
incr sep_pointer
else begin (* useful char *)
for i = 0 to !sep_pointer - 1 do
line := !line ^ (String.make 1 (String.get sep i))
done;
sep_pointer := 0;
line := !line ^ (String.make 1 ch)
end
end
done;
assert false (* unreacheable statement *)
with Line_completed -> !line
let patch_empty_path = function "" -> "/" | s -> s
let debug_dump_request path params =
debug_print
(sprintf
"recevied request; path: %s; params: %s"
path
(String.concat ", " (List.map (fun (n, v) -> n ^ "=" ^ v) params)))
let parse_request_fst_line ic =
let request_line = generic_input_line ~sep:crlf ~ic in
debug_print (sprintf "HTTP request line (not yet parsed): %s" request_line);
try
(match Pcre.split ~rex:pieces_sep request_line with
| [ meth_raw; uri_raw ] -> (* ancient HTTP request line *)
(method_of_string meth_raw, (* method *)
Http_parser_sanity.url_of_string uri_raw, (* uri *)
None) (* no version given *)
| [ meth_raw; uri_raw; http_version_raw ] -> (* HTTP 1.{0,1} *)
(method_of_string meth_raw, (* method *)
Http_parser_sanity.url_of_string uri_raw, (* uri *)
Some (version_of_string http_version_raw)) (* version *)
| _ -> raise (Malformed_request request_line))
with Malformed_URL url -> raise (Malformed_request_URI url)
let parse_response_fst_line ic =
let response_line = generic_input_line ~sep:crlf ~ic in
debug_print (sprintf "HTTP response line (not yet parsed): %s" response_line);
try
(match Pcre.split ~rex:pieces_sep response_line with
| version_raw :: code_raw :: _ ->
(version_of_string version_raw, (* method *)
status_of_code (int_of_string code_raw)) (* status *)
| _ -> raise (Malformed_response response_line))
with
| Malformed_URL _ | Invalid_code _ | Failure "int_of_string" ->
raise (Malformed_response response_line)
let parse_path uri = patch_empty_path (String.concat "/" (Neturl.url_path uri))
let parse_query_get_params uri =
try (* act on HTTP encoded URIs *)
split_query_params (Neturl.url_query ~encoded:true uri)
with Not_found -> []
let parse_headers ic =
(* consume also trailing "^\r\n$" line *)
let rec parse_headers' headers =
match generic_input_line ~sep:crlf ~ic with
| "" -> List.rev headers
| line ->
(let subs =
try
Pcre.extract ~rex:header_RE line
with Not_found -> raise (Invalid_header line)
in
let header =
try
subs.(1)
with Invalid_argument "Array.get" -> raise (Invalid_header line)
in
let value =
try
Http_parser_sanity.normalize_header_value subs.(2)
with Invalid_argument "Array.get" -> ""
in
Http_parser_sanity.heal_header (header, value);
parse_headers' ((header, value) :: headers))
in
parse_headers' []
let parse_cookies raw_cookies =
let tokens =
let lexbuf = Lexing.from_string raw_cookies in
let rec aux acc =
match Cookie_lexer.token lexbuf with
| `EOF -> acc
| token -> aux (token :: acc)
in
List.rev (aux [])
in
let rec aux = function
| [ `ASSIGNMENT (n,v) ] -> [ (n,v) ]
| `ASSIGNMENT (n,v) :: `SEP :: tl -> (n,v) :: aux tl
| _ -> prerr_endline ("failed to read raw cookies: '" ^ raw_cookies ^ "'");
raise (Malformed_cookies raw_cookies)
in
aux tokens
let parse_request ic =
let (meth, uri, version) = parse_request_fst_line ic in
let path = parse_path uri in
let query_get_params = parse_query_get_params uri in
debug_dump_request path query_get_params;
(path, query_get_params)