diff --git a/docs/connections.md b/docs/connections.md index a442439..c2d0186 100644 --- a/docs/connections.md +++ b/docs/connections.md @@ -139,5 +139,5 @@ with httpx.open_connection("http://127.0.0.1:8080") as conn: --- ← [Content Types](content-types.md) -[Low Level Networking](networking.md) → +[Parsers](parsers.md) →   diff --git a/docs/index.md b/docs/index.md index ab3c22e..716675f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -69,6 +69,7 @@ The httpx 1.0 [design proposal](https://www.encode.io/httpnext/) is now availabl * [Headers](docs/headers.md) * [Content Types](docs/content-types.md) * [Connections](docs/connections.md) +* [Parsers](docs/parsers.md) * [Low Level Networking](docs/networking.md) * [About](docs/about.md) diff --git a/docs/networking.md b/docs/networking.md index 0be863d..ac78a55 100644 --- a/docs/networking.md +++ b/docs/networking.md @@ -231,6 +231,6 @@ Custom network backends can also be used to provide functionality such as handli --- -← [Connections](connections.md) +← [Parsers](parsers.md) [About](about.md) →   diff --git a/docs/parsers.md b/docs/parsers.md new file mode 100644 index 0000000..152c33b --- /dev/null +++ b/docs/parsers.md @@ -0,0 +1,30 @@ +# Parsers + +```python +writer = io.BytesIO() +reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: 23\r\n" + b"Content-Type: application/json\r\n" + b"\r\n" + b'{"msg": "hello, world"}' +) +p = httpx.HTTPParser(writer, reader) + +# Send the request... +p.send_method_line(b"GET", b"/", b"HTTP/1.1") +p.send_headers([(b"Host", b"example.com")]) +p.send_body(b'') + +# Receive the response... +protocol, code, reason_phase = p.recv_status_line() +headers = p.recv_headers() +body = b'' +while buffer := p.recv_body(): + body += buffer +``` + +--- + +← [Connections](connections.md) +[Low Level Networking](networking.md) → diff --git a/docs/templates/base.html b/docs/templates/base.html index e6a6ca5..c5965f0 100644 --- a/docs/templates/base.html +++ b/docs/templates/base.html @@ -3,8 +3,8 @@ - - httpx + + ʜᴛᴛᴘx diff --git a/scripts/docs b/scripts/docs index 91a89b7..19771e1 100755 --- a/scripts/docs +++ b/scripts/docs @@ -23,6 +23,7 @@ pages = { '/headers': 'docs/headers.md', '/content-types': 'docs/content-types.md', '/connections': 'docs/connections.md', + '/parsers': 'docs/parsers.md', '/networking': 'docs/networking.md', '/about': 'docs/about.md', } diff --git a/scripts/unasync b/scripts/unasync index 8ae754e..dafa094 100755 --- a/scripts/unasync +++ b/scripts/unasync @@ -7,6 +7,7 @@ unasync.unasync_files( "src/ahttpx/_client.py", "src/ahttpx/_content.py", "src/ahttpx/_headers.py", + "src/ahttpx/_parsers.py", "src/ahttpx/_pool.py", "src/ahttpx/_quickstart.py", "src/ahttpx/_response.py", diff --git a/src/ahttpx/__init__.py b/src/ahttpx/__init__.py index 532cf2c..71df364 100644 --- a/src/ahttpx/__init__.py +++ b/src/ahttpx/__init__.py @@ -2,6 +2,7 @@ from ._content import * # Content, File, Files, Form, HTML, JSON, MultiPart, Text from ._headers import * # Headers from ._network import * # NetworkBackend, NetworkStream, timeout +from ._parsers import * # HTTPParser, ProtocolError from ._pool import * # Connection, ConnectionPool, Transport from ._quickstart import * # get, post, put, patch, delete from ._response import * # Response @@ -18,35 +19,37 @@ "Connection", "ConnectionPool", "Content", - "delete", "File", "FileStream", "Files", "Form", - "get", "Headers", "HTML", + "HTTPParser", "IterByteStream", "JSON", "MultiPart", "NetworkBackend", "NetworkStream", - "open_connection", - "post", - "put", - "patch", + "ProtocolError", "Response", "Request", - "serve_http", - "serve_tcp", "Stream", "Text", - "timeout", "Transport", "QueryParams", + "URL", + "delete", + "get", + "open_connection", + "patch", + "post", + "put", "quote", + "serve_http", + "serve_tcp", + "timeout", "unquote", - "URL", "urldecode", "urlencode", ] diff --git a/src/ahttpx/_content.py b/src/ahttpx/_content.py index e75d30f..ea0ceac 100644 --- a/src/ahttpx/_content.py +++ b/src/ahttpx/_content.py @@ -77,7 +77,7 @@ def encode(self) -> tuple[Stream, str]: stream = ByteStream(str(self).encode("ascii")) content_type = "application/x-www-form-urlencoded" return (stream, content_type) - + # Dict operations def keys(self) -> typing.KeysView[str]: diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py new file mode 100644 index 0000000..fb658dd --- /dev/null +++ b/src/ahttpx/_parsers.py @@ -0,0 +1,412 @@ +import enum + +__all__ = ['HTTPParser', 'ProtocolError'] + + +# TODO... + +# * Upgrade +# * CONNECT + +# * Support 'Expect: 100 Continue' +# * Add 'Error' state transitions +# * Add tests to trickle data +# * Add type annotations + +# * Optional... HTTP/1.0 support +# * Read trailing headers on Transfer-Encoding: chunked. Not just '\r\n'. +# * When writing Transfer-Encoding: chunked, split large writes into buffer size. +# * When reading Transfer-Encoding: chunked, handle incomplete reads from large chunk sizes. +# * .read() doesn't document if will always return maximum available. + +# * validate method, target, protocol in request line +# * validate protocol, status_code, reason_phrase in response line +# * validate name, value on headers + + +class State(enum.Enum): + IDLE = 0 + SEND_STATUS_LINE = 1 + SEND_HEADERS = 2 + SEND_BODY = 3 + DONE = 4 + MUST_CLOSE = 5 + CLOSED = 6 + ERROR = 7 + + +class ProtocolError(Exception): + pass + + +class HTTPParser: + """ + Usage... + + client = HTTPParser(writer, reader) + client.send_method_line() + client.send_headers() + client.send_body() + client.recv_status_line() + client.recv_headers() + client.recv_body() + client.start_next_cycle() + client.close() + """ + def __init__(self, writer, reader): + self.writer = writer + self.reader = reader + self.parser = ReadAheadParser(reader) + + # Track client and server state... + self.our_state = State.IDLE + self.their_state = State.IDLE + + # Track message framing... + self.our_content_length = 0 + self.their_content_length = 0 + self.our_seen_length = 0 + self.their_seen_length = 0 + + # Track connection keep alive... + self.our_keep_alive = True + self.their_keep_alive = True + + # Special states... + self.processing_1xx = False + + def send_method_line(self, method, target, protocol): + """ + Send the initial request line: + + >>> p.send_method_line(b'GET', b'/', b'HTTP/1.1') + + The client will switch to SEND_HEADERS state. + The server will switch to SEND_STATUS_LINE state. + """ + if self.our_state != State.IDLE: + msg = f"Called 'send_method_line' in invalid state {self.description()}" + raise ProtocolError(msg) + + # Send initial request line, eg. "GET / HTTP/1.1" + if protocol != b'HTTP/1.1': + raise ProtocolError("Sent unsupported protocol version") + data = b" ".join([method, target, protocol]) + b"\r\n" + self.writer.write(data) + + self.our_state = State.SEND_HEADERS + self.their_state = State.SEND_STATUS_LINE + + def send_headers(self, headers): + """ + Send the request headers: + + >>> p.send_headers([(b'Host', b'www.example.com')]) + + The client will switch to SEND_BODY state. + """ + if self.our_state != State.SEND_HEADERS: + msg = f"Called 'send_headers' in invalid state {self.description()}" + raise ProtocolError(msg) + + # Update header state + seen_host = False + for name, value in headers: + lname = name.lower() + if lname == b'host': + seen_host = True + elif lname == b'content-length': + self.our_content_length = bounded_int( + value, + max_digits=20, + exc_text="Sent invalid Content-Length" + ) + elif lname == b'connection' and value == b'close': + self.our_keep_alive = False + elif lname == b'transfer-encoding' and value == b'chunked': + self.our_content_length = None + if not seen_host: + raise ProtocolError("Request missing 'Host' header") + + # Send request headers + lines = [name + b": " + value + b"\r\n" for name, value in headers] + data = b"".join(lines) + b"\r\n" + self.writer.write(data) + + self.our_state = State.SEND_BODY + + def send_body(self, body): + """ + Send the request body. An empty bytes argument indicates the end of the stream: + + >>> p.send_body(b'') + + The client will switch to DONE by default. + The client will switch to MUST_CLOSE for requests a with 'Connection: close' header. + """ + if self.our_state != State.SEND_BODY: + msg = f"Called 'send_body' in invalid state {self.description()}" + raise ProtocolError(msg) + + if self.our_content_length is None: + # Transfer-Encoding: chunked + self.our_seen_length += len(body) + self.writer.write(f'{len(body):x}\r\n'.encode('ascii')) + self.writer.write(body + b'\r\n') + + else: + # Content-Length: xxx + self.our_seen_length += len(body) + if self.our_seen_length > self.our_content_length: + msg = 'Too much data sent for declared Content-Length' + raise ProtocolError(msg) + if self.our_seen_length < self.our_content_length and body == b'': + msg = 'Not enough data sent for declared Content-Length' + raise ProtocolError(msg) + if body: + self.writer.write(body) + + if body == b'': + # Handle body close + if self.our_keep_alive: + self.our_state = State.DONE + else: + self.our_state = State.MUST_CLOSE + + def recv_status_line(self): + """ + Receive the initial response status line: + + >>> protocol, status_code, reason_phrase = p.recv_status_line() + + The server will switch to SEND_HEADERS. + """ + if self.their_state != State.SEND_STATUS_LINE: + msg = f"Called 'recv_status_line' in invalid state {self.description()}" + raise ProtocolError(msg) + + # Read initial response line, eg. "HTTP/1.1 200 OK" + exc_text = "reading response status line" + line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text) + protocol, status_code_str, reason_phrase = line.split(b" ", 2) + if protocol != b'HTTP/1.1': + raise ProtocolError("Received unsupported protocol version") + + status_code = bounded_int( + status_code_str, + max_digits=3, + exc_text="Received invalid status code" + ) + if status_code < 100: + raise ProtocolError("Received invalid status code") + # 1xx status codes preceed the final response status code + self.processing_1xx = status_code < 200 + + self.their_state = State.SEND_HEADERS + return protocol, status_code, reason_phrase + + def recv_headers(self): + """ + Receive the response headers: + + >>> headers = p.recv_status_line() + + The server will switch to SEND_BODY by default. + The server will switch to SEND_STATUS_LINE for preceeding 1xx responses. + """ + if self.their_state != State.SEND_HEADERS: + msg = f"Called 'recv_headers' in invalid state {self.description()}" + raise ProtocolError(msg) + + # Read response headers + headers = [] + exc_text = "reading response headers" + while line := self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text): + name, value = line.split(b":", 1) + value = value.strip(b" ") + headers.append((name, value)) + + # Update header state + for name, value in headers: + lname = name.lower() + if lname == b'content-length': + self.their_content_length = bounded_int( + value, + max_digits=20, + exc_text="Received invalid Content-Length" + ) + elif lname == b'connection' and value == b'close': + self.their_keep_alive = False + elif lname == b'transfer-encoding' and value == b'chunked': + self.their_content_length = None + + if self.processing_1xx: + # 1xx status codes preceed the final response status code + self.their_state = State.SEND_STATUS_LINE + else: + self.their_state = State.SEND_BODY + return headers + + def recv_body(self): + """ + Receive the response body. An empty byte string indicates the end of the stream: + + >>> buffer = bytearray() + >>> while body := p.recv_body() + >>> buffer.extend(body) + + The server will switch to DONE by default. + The server will switch to MUST_CLOSE for responses a with 'Connection: close' header. + """ + if self.their_state != State.SEND_BODY: + msg = f"Called 'recv_body' in invalid state {self.description()}" + raise ProtocolError(msg) + + if self.their_content_length is None: + # Transfer-Encoding: chunked + exc_text = 'reading chunk size' + line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text) + sizestr, _, _ = line.partition(b";") + + exc_text = "Received invalid chunk size" + size = bounded_hex(sizestr, max_digits=8, exc_text=exc_text) + if size > 0: + body = self.parser.read(size=size) + exc_text = 'reading chunk data' + self.parser.read_until(b"\r\n", max_size=2, exc_text=exc_text) + self.their_seen_length += len(body) + else: + body = b'' + exc_text = 'reading chunk termination' + self.parser.read_until(b"\r\n", max_size=2, exc_text=exc_text) + + else: + # Content-Length: xxx + remaining = self.their_content_length - self.their_seen_length + size = min(remaining, 4096) + body = self.parser.read(size=size) + self.their_seen_length += len(body) + if self.their_seen_length < self.their_content_length and body == b'': + msg = 'Not enough data received for declared Content-Length' + raise ProtocolError(msg) + + if body == b'': + # Handle body close + if self.their_keep_alive: + self.their_state = State.DONE + else: + self.their_state = State.MUST_CLOSE + return body + + def start_next_cycle(self): + if self.our_state != State.DONE or self.their_state != State.DONE: + msg = f"Called 'start_next_cycle' in invalid state {self.description()}" + raise ProtocolError(msg) + + self.our_state = State.IDLE + self.their_state = State.IDLE + self.our_content_length = 0 + self.their_content_length = 0 + self.our_seen_length = 0 + self.their_seen_length = 0 + self.our_keep_alive = True + self.their_keep_alive = True + self.processing_1xx = False + + def description(self) -> str: + cl_state = self.our_state.name + sr_state = self.their_state.name + return f"client {cl_state}, server {sr_state}" + + def __repr__(self) -> str: + desc = self.description() + return f'' + + +class ReadAheadParser: + """ + A buffered I/O stream, with methods for read-ahead parsing. + """ + def __init__(self, stream): + self._buffer = b'' + self._stream = stream + self._chunk_size = 4096 + + def _read_some(self): + if self._buffer: + ret, self._buffer = self._buffer, b'' + return ret + return self._stream.read(self._chunk_size) + + def _push_back(self, buffer): + assert self._buffer == b'' + self._buffer = buffer + + def read(self, size): + """ + Read and return up to 'size' bytes from the stream, with I/O buffering provided. + + * Returns b'' to indicate connection close. + """ + buffer = bytearray() + while len(buffer) < size: + chunk = self._read_some() + if not chunk: + break + buffer.extend(chunk) + + if len(buffer) > size: + buffer, push_back = buffer[:size], buffer[size:] + self._push_back(bytes(push_back)) + return bytes(buffer) + + def read_until(self, marker, max_size, exc_text): + """ + Read and return bytes from the stream, delimited by marker. + + * The marker is not included in the return bytes. + * The marker is consumed from the I/O stream. + * Raises `ProtocolError` if the stream closes before a marker occurance. + * Raises `ProtocolError` if marker did not occur within 'max_size + len(marker)' bytes. + """ + buffer = bytearray() + while len(buffer) <= max_size: + chunk = self._read_some() + if not chunk: + # stream closed before marker found. + raise ProtocolError(f"Stream closed early {exc_text}") + start_search = max(len(buffer) - len(marker), 0) + buffer.extend(chunk) + index = buffer.find(marker, start_search) + + if index > max_size: + # marker was found, though 'max_size' exceeded. + raise ProtocolError(f"Exceeded maximum size {exc_text}") + elif index >= 0: + endindex = index + len(marker) + self._push_back(bytes(buffer[endindex:])) + return bytes(buffer[:index]) + + raise ProtocolError(f"Exceeded maximum size {exc_text}") + + +def bounded_int(intstr: bytes, max_digits: int, exc_text: str): + if len(intstr) > max_digits: + # Length of bytestring exceeds maximum. + raise ProtocolError(exc_text) + if len(intstr.strip(b'0123456789')) != 0: + # Contains invalid characters. + raise ProtocolError(exc_text) + + return int(intstr) + + +def bounded_hex(hexstr: bytes, max_digits: int, exc_text: str): + if len(hexstr) > max_digits: + # Length of bytestring exceeds maximum. + raise ProtocolError(exc_text) + if len(hexstr.strip(b'0123456789abcdefABCDEF')) != 0: + # Contains invalid characters. + raise ProtocolError(exc_text) + + return int(hexstr, base=16) diff --git a/src/httpx/__init__.py b/src/httpx/__init__.py index 532cf2c..71df364 100644 --- a/src/httpx/__init__.py +++ b/src/httpx/__init__.py @@ -2,6 +2,7 @@ from ._content import * # Content, File, Files, Form, HTML, JSON, MultiPart, Text from ._headers import * # Headers from ._network import * # NetworkBackend, NetworkStream, timeout +from ._parsers import * # HTTPParser, ProtocolError from ._pool import * # Connection, ConnectionPool, Transport from ._quickstart import * # get, post, put, patch, delete from ._response import * # Response @@ -18,35 +19,37 @@ "Connection", "ConnectionPool", "Content", - "delete", "File", "FileStream", "Files", "Form", - "get", "Headers", "HTML", + "HTTPParser", "IterByteStream", "JSON", "MultiPart", "NetworkBackend", "NetworkStream", - "open_connection", - "post", - "put", - "patch", + "ProtocolError", "Response", "Request", - "serve_http", - "serve_tcp", "Stream", "Text", - "timeout", "Transport", "QueryParams", + "URL", + "delete", + "get", + "open_connection", + "patch", + "post", + "put", "quote", + "serve_http", + "serve_tcp", + "timeout", "unquote", - "URL", "urldecode", "urlencode", ] diff --git a/src/httpx/_content.py b/src/httpx/_content.py index 1ee0ef2..4798b45 100644 --- a/src/httpx/_content.py +++ b/src/httpx/_content.py @@ -77,7 +77,7 @@ def encode(self) -> tuple[Stream, str]: stream = ByteStream(str(self).encode("ascii")) content_type = "application/x-www-form-urlencoded" return (stream, content_type) - + # Dict operations def keys(self) -> typing.KeysView[str]: diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py new file mode 100644 index 0000000..fb658dd --- /dev/null +++ b/src/httpx/_parsers.py @@ -0,0 +1,412 @@ +import enum + +__all__ = ['HTTPParser', 'ProtocolError'] + + +# TODO... + +# * Upgrade +# * CONNECT + +# * Support 'Expect: 100 Continue' +# * Add 'Error' state transitions +# * Add tests to trickle data +# * Add type annotations + +# * Optional... HTTP/1.0 support +# * Read trailing headers on Transfer-Encoding: chunked. Not just '\r\n'. +# * When writing Transfer-Encoding: chunked, split large writes into buffer size. +# * When reading Transfer-Encoding: chunked, handle incomplete reads from large chunk sizes. +# * .read() doesn't document if will always return maximum available. + +# * validate method, target, protocol in request line +# * validate protocol, status_code, reason_phrase in response line +# * validate name, value on headers + + +class State(enum.Enum): + IDLE = 0 + SEND_STATUS_LINE = 1 + SEND_HEADERS = 2 + SEND_BODY = 3 + DONE = 4 + MUST_CLOSE = 5 + CLOSED = 6 + ERROR = 7 + + +class ProtocolError(Exception): + pass + + +class HTTPParser: + """ + Usage... + + client = HTTPParser(writer, reader) + client.send_method_line() + client.send_headers() + client.send_body() + client.recv_status_line() + client.recv_headers() + client.recv_body() + client.start_next_cycle() + client.close() + """ + def __init__(self, writer, reader): + self.writer = writer + self.reader = reader + self.parser = ReadAheadParser(reader) + + # Track client and server state... + self.our_state = State.IDLE + self.their_state = State.IDLE + + # Track message framing... + self.our_content_length = 0 + self.their_content_length = 0 + self.our_seen_length = 0 + self.their_seen_length = 0 + + # Track connection keep alive... + self.our_keep_alive = True + self.their_keep_alive = True + + # Special states... + self.processing_1xx = False + + def send_method_line(self, method, target, protocol): + """ + Send the initial request line: + + >>> p.send_method_line(b'GET', b'/', b'HTTP/1.1') + + The client will switch to SEND_HEADERS state. + The server will switch to SEND_STATUS_LINE state. + """ + if self.our_state != State.IDLE: + msg = f"Called 'send_method_line' in invalid state {self.description()}" + raise ProtocolError(msg) + + # Send initial request line, eg. "GET / HTTP/1.1" + if protocol != b'HTTP/1.1': + raise ProtocolError("Sent unsupported protocol version") + data = b" ".join([method, target, protocol]) + b"\r\n" + self.writer.write(data) + + self.our_state = State.SEND_HEADERS + self.their_state = State.SEND_STATUS_LINE + + def send_headers(self, headers): + """ + Send the request headers: + + >>> p.send_headers([(b'Host', b'www.example.com')]) + + The client will switch to SEND_BODY state. + """ + if self.our_state != State.SEND_HEADERS: + msg = f"Called 'send_headers' in invalid state {self.description()}" + raise ProtocolError(msg) + + # Update header state + seen_host = False + for name, value in headers: + lname = name.lower() + if lname == b'host': + seen_host = True + elif lname == b'content-length': + self.our_content_length = bounded_int( + value, + max_digits=20, + exc_text="Sent invalid Content-Length" + ) + elif lname == b'connection' and value == b'close': + self.our_keep_alive = False + elif lname == b'transfer-encoding' and value == b'chunked': + self.our_content_length = None + if not seen_host: + raise ProtocolError("Request missing 'Host' header") + + # Send request headers + lines = [name + b": " + value + b"\r\n" for name, value in headers] + data = b"".join(lines) + b"\r\n" + self.writer.write(data) + + self.our_state = State.SEND_BODY + + def send_body(self, body): + """ + Send the request body. An empty bytes argument indicates the end of the stream: + + >>> p.send_body(b'') + + The client will switch to DONE by default. + The client will switch to MUST_CLOSE for requests a with 'Connection: close' header. + """ + if self.our_state != State.SEND_BODY: + msg = f"Called 'send_body' in invalid state {self.description()}" + raise ProtocolError(msg) + + if self.our_content_length is None: + # Transfer-Encoding: chunked + self.our_seen_length += len(body) + self.writer.write(f'{len(body):x}\r\n'.encode('ascii')) + self.writer.write(body + b'\r\n') + + else: + # Content-Length: xxx + self.our_seen_length += len(body) + if self.our_seen_length > self.our_content_length: + msg = 'Too much data sent for declared Content-Length' + raise ProtocolError(msg) + if self.our_seen_length < self.our_content_length and body == b'': + msg = 'Not enough data sent for declared Content-Length' + raise ProtocolError(msg) + if body: + self.writer.write(body) + + if body == b'': + # Handle body close + if self.our_keep_alive: + self.our_state = State.DONE + else: + self.our_state = State.MUST_CLOSE + + def recv_status_line(self): + """ + Receive the initial response status line: + + >>> protocol, status_code, reason_phrase = p.recv_status_line() + + The server will switch to SEND_HEADERS. + """ + if self.their_state != State.SEND_STATUS_LINE: + msg = f"Called 'recv_status_line' in invalid state {self.description()}" + raise ProtocolError(msg) + + # Read initial response line, eg. "HTTP/1.1 200 OK" + exc_text = "reading response status line" + line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text) + protocol, status_code_str, reason_phrase = line.split(b" ", 2) + if protocol != b'HTTP/1.1': + raise ProtocolError("Received unsupported protocol version") + + status_code = bounded_int( + status_code_str, + max_digits=3, + exc_text="Received invalid status code" + ) + if status_code < 100: + raise ProtocolError("Received invalid status code") + # 1xx status codes preceed the final response status code + self.processing_1xx = status_code < 200 + + self.their_state = State.SEND_HEADERS + return protocol, status_code, reason_phrase + + def recv_headers(self): + """ + Receive the response headers: + + >>> headers = p.recv_status_line() + + The server will switch to SEND_BODY by default. + The server will switch to SEND_STATUS_LINE for preceeding 1xx responses. + """ + if self.their_state != State.SEND_HEADERS: + msg = f"Called 'recv_headers' in invalid state {self.description()}" + raise ProtocolError(msg) + + # Read response headers + headers = [] + exc_text = "reading response headers" + while line := self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text): + name, value = line.split(b":", 1) + value = value.strip(b" ") + headers.append((name, value)) + + # Update header state + for name, value in headers: + lname = name.lower() + if lname == b'content-length': + self.their_content_length = bounded_int( + value, + max_digits=20, + exc_text="Received invalid Content-Length" + ) + elif lname == b'connection' and value == b'close': + self.their_keep_alive = False + elif lname == b'transfer-encoding' and value == b'chunked': + self.their_content_length = None + + if self.processing_1xx: + # 1xx status codes preceed the final response status code + self.their_state = State.SEND_STATUS_LINE + else: + self.their_state = State.SEND_BODY + return headers + + def recv_body(self): + """ + Receive the response body. An empty byte string indicates the end of the stream: + + >>> buffer = bytearray() + >>> while body := p.recv_body() + >>> buffer.extend(body) + + The server will switch to DONE by default. + The server will switch to MUST_CLOSE for responses a with 'Connection: close' header. + """ + if self.their_state != State.SEND_BODY: + msg = f"Called 'recv_body' in invalid state {self.description()}" + raise ProtocolError(msg) + + if self.their_content_length is None: + # Transfer-Encoding: chunked + exc_text = 'reading chunk size' + line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text) + sizestr, _, _ = line.partition(b";") + + exc_text = "Received invalid chunk size" + size = bounded_hex(sizestr, max_digits=8, exc_text=exc_text) + if size > 0: + body = self.parser.read(size=size) + exc_text = 'reading chunk data' + self.parser.read_until(b"\r\n", max_size=2, exc_text=exc_text) + self.their_seen_length += len(body) + else: + body = b'' + exc_text = 'reading chunk termination' + self.parser.read_until(b"\r\n", max_size=2, exc_text=exc_text) + + else: + # Content-Length: xxx + remaining = self.their_content_length - self.their_seen_length + size = min(remaining, 4096) + body = self.parser.read(size=size) + self.their_seen_length += len(body) + if self.their_seen_length < self.their_content_length and body == b'': + msg = 'Not enough data received for declared Content-Length' + raise ProtocolError(msg) + + if body == b'': + # Handle body close + if self.their_keep_alive: + self.their_state = State.DONE + else: + self.their_state = State.MUST_CLOSE + return body + + def start_next_cycle(self): + if self.our_state != State.DONE or self.their_state != State.DONE: + msg = f"Called 'start_next_cycle' in invalid state {self.description()}" + raise ProtocolError(msg) + + self.our_state = State.IDLE + self.their_state = State.IDLE + self.our_content_length = 0 + self.their_content_length = 0 + self.our_seen_length = 0 + self.their_seen_length = 0 + self.our_keep_alive = True + self.their_keep_alive = True + self.processing_1xx = False + + def description(self) -> str: + cl_state = self.our_state.name + sr_state = self.their_state.name + return f"client {cl_state}, server {sr_state}" + + def __repr__(self) -> str: + desc = self.description() + return f'' + + +class ReadAheadParser: + """ + A buffered I/O stream, with methods for read-ahead parsing. + """ + def __init__(self, stream): + self._buffer = b'' + self._stream = stream + self._chunk_size = 4096 + + def _read_some(self): + if self._buffer: + ret, self._buffer = self._buffer, b'' + return ret + return self._stream.read(self._chunk_size) + + def _push_back(self, buffer): + assert self._buffer == b'' + self._buffer = buffer + + def read(self, size): + """ + Read and return up to 'size' bytes from the stream, with I/O buffering provided. + + * Returns b'' to indicate connection close. + """ + buffer = bytearray() + while len(buffer) < size: + chunk = self._read_some() + if not chunk: + break + buffer.extend(chunk) + + if len(buffer) > size: + buffer, push_back = buffer[:size], buffer[size:] + self._push_back(bytes(push_back)) + return bytes(buffer) + + def read_until(self, marker, max_size, exc_text): + """ + Read and return bytes from the stream, delimited by marker. + + * The marker is not included in the return bytes. + * The marker is consumed from the I/O stream. + * Raises `ProtocolError` if the stream closes before a marker occurance. + * Raises `ProtocolError` if marker did not occur within 'max_size + len(marker)' bytes. + """ + buffer = bytearray() + while len(buffer) <= max_size: + chunk = self._read_some() + if not chunk: + # stream closed before marker found. + raise ProtocolError(f"Stream closed early {exc_text}") + start_search = max(len(buffer) - len(marker), 0) + buffer.extend(chunk) + index = buffer.find(marker, start_search) + + if index > max_size: + # marker was found, though 'max_size' exceeded. + raise ProtocolError(f"Exceeded maximum size {exc_text}") + elif index >= 0: + endindex = index + len(marker) + self._push_back(bytes(buffer[endindex:])) + return bytes(buffer[:index]) + + raise ProtocolError(f"Exceeded maximum size {exc_text}") + + +def bounded_int(intstr: bytes, max_digits: int, exc_text: str): + if len(intstr) > max_digits: + # Length of bytestring exceeds maximum. + raise ProtocolError(exc_text) + if len(intstr.strip(b'0123456789')) != 0: + # Contains invalid characters. + raise ProtocolError(exc_text) + + return int(intstr) + + +def bounded_hex(hexstr: bytes, max_digits: int, exc_text: str): + if len(hexstr) > max_digits: + # Length of bytestring exceeds maximum. + raise ProtocolError(exc_text) + if len(hexstr.strip(b'0123456789abcdefABCDEF')) != 0: + # Contains invalid characters. + raise ProtocolError(exc_text) + + return int(hexstr, base=16) diff --git a/tests/test_parsers.py b/tests/test_parsers.py new file mode 100644 index 0000000..b76a97a --- /dev/null +++ b/tests/test_parsers.py @@ -0,0 +1,684 @@ +import httpx +import io +import pytest + + +class TrickleIO(): + def __init__(self, content): + self.buffer = io.BytesIO(content) + + def read(self, size): + return self.buffer.read(1) + + +def test_parser(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: 12\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"hello, world" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"POST", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Content-Type", b"application/json"), + (b"Content-Length", b"23"), + ]) + p.send_body(b'{"msg": "hello, world"}') + p.send_body(b'') + + assert writer.getvalue() == ( + b"POST / HTTP/1.1\r\n" + b"Host: example.com\r\n" + b"Content-Type: application/json\r\n" + b"Content-Length: 23\r\n" + b"\r\n" + b'{"msg": "hello, world"}' + ) + + protocol, code, reason_phase = p.recv_status_line() + headers = p.recv_headers() + body = p.recv_body() + terminator = p.recv_body() + + assert protocol == b'HTTP/1.1' + assert code == 200 + assert reason_phase == b'OK' + assert headers == [ + (b'Content-Length', b'12'), + (b'Content-Type', b'text/plain'), + ] + assert body == b'hello, world' + assert terminator == b'' + + +def test_parser_trickle(): + writer = io.BytesIO() + reader = TrickleIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: 12\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"hello, world" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"POST", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Content-Type", b"application/json"), + (b"Content-Length", b"23"), + ]) + p.send_body(b'{"msg": "hello, world"}') + p.send_body(b'') + + assert writer.getvalue() == ( + b"POST / HTTP/1.1\r\n" + b"Host: example.com\r\n" + b"Content-Type: application/json\r\n" + b"Content-Length: 23\r\n" + b"\r\n" + b'{"msg": "hello, world"}' + ) + + protocol, code, reason_phase = p.recv_status_line() + headers = p.recv_headers() + body = p.recv_body() + terminator = p.recv_body() + + assert protocol == b'HTTP/1.1' + assert code == 200 + assert reason_phase == b'OK' + assert headers == [ + (b'Content-Length', b'12'), + (b'Content-Type', b'text/plain'), + ] + assert body == b'hello, world' + assert terminator == b'' + + +def test_parser_transfer_encoding_chunked(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Type: text/plain\r\n" + b"Transfer-Encoding: chunked\r\n" + b"\r\n" + b"c\r\n" + b"hello, world\r\n" + b"0\r\n\r\n" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"POST", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Content-Type", b"application/json"), + (b"Transfer-Encoding", b"chunked"), + ]) + p.send_body(b'{"msg": "hello, world"}') + p.send_body(b'') + + assert writer.getvalue() == ( + b"POST / HTTP/1.1\r\n" + b"Host: example.com\r\n" + b"Content-Type: application/json\r\n" + b"Transfer-Encoding: chunked\r\n" + b"\r\n" + b'17\r\n' + b'{"msg": "hello, world"}\r\n' + b'0\r\n\r\n' + ) + + protocol, code, reason_phase = p.recv_status_line() + headers = p.recv_headers() + body = p.recv_body() + terminator = p.recv_body() + + assert protocol == b'HTTP/1.1' + assert code == 200 + assert reason_phase == b'OK' + assert headers == [ + (b'Content-Type', b'text/plain'), + (b'Transfer-Encoding', b'chunked'), + ] + assert body == b'hello, world' + assert terminator == b'' + + +def test_parser_transfer_encoding_chunked_trickle(): + writer = io.BytesIO() + reader = TrickleIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Type: text/plain\r\n" + b"Transfer-Encoding: chunked\r\n" + b"\r\n" + b"c\r\n" + b"hello, world\r\n" + b"0\r\n\r\n" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"POST", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Content-Type", b"application/json"), + (b"Transfer-Encoding", b"chunked"), + ]) + p.send_body(b'{"msg": "hello, world"}') + p.send_body(b'') + + assert writer.getvalue() == ( + b"POST / HTTP/1.1\r\n" + b"Host: example.com\r\n" + b"Content-Type: application/json\r\n" + b"Transfer-Encoding: chunked\r\n" + b"\r\n" + b'17\r\n' + b'{"msg": "hello, world"}\r\n' + b'0\r\n\r\n' + ) + + protocol, code, reason_phase = p.recv_status_line() + headers = p.recv_headers() + body = p.recv_body() + terminator = p.recv_body() + + assert protocol == b'HTTP/1.1' + assert code == 200 + assert reason_phase == b'OK' + assert headers == [ + (b'Content-Type', b'text/plain'), + (b'Transfer-Encoding', b'chunked'), + ] + assert body == b'hello, world' + assert terminator == b'' + + +def test_parser_repr(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Type: application/json\r\n" + b"Content-Length: 23\r\n" + b"\r\n" + b'{"msg": "hello, world"}' + ) + + p = httpx.HTTPParser(writer, reader) + assert repr(p) == "" + + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + assert repr(p) == "" + + p.send_headers([(b"Host", b"example.com")]) + assert repr(p) == "" + + p.send_body(b'') + assert repr(p) == "" + + p.recv_status_line() + assert repr(p) == "" + + p.recv_headers() + assert repr(p) == "" + + p.recv_body() + assert repr(p) == "" + + p.recv_body() + assert repr(p) == "" + + p.start_next_cycle() + assert repr(p) == "" + + +def test_parser_invalid_transitions(): + writer = io.BytesIO() + reader = io.BytesIO() + + with pytest.raises(httpx.ProtocolError): + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b'GET', b'/', b'HTTP/1.1') + p.send_method_line(b'GET', b'/', b'HTTP/1.1') + + with pytest.raises(httpx.ProtocolError): + p = httpx.HTTPParser(writer, reader) + p.send_headers([]) + + with pytest.raises(httpx.ProtocolError): + p = httpx.HTTPParser(writer, reader) + p.send_body(b'') + + with pytest.raises(httpx.ProtocolError): + reader = io.BytesIO(b'HTTP/1.1 200 OK\r\n') + p = httpx.HTTPParser(writer, reader) + p.recv_status_line() + + with pytest.raises(httpx.ProtocolError): + p = httpx.HTTPParser(writer, reader) + p.recv_headers() + + with pytest.raises(httpx.ProtocolError): + p = httpx.HTTPParser(writer, reader) + p.recv_body() + + with pytest.raises(httpx.ProtocolError): + p = httpx.HTTPParser(writer, reader) + p.start_next_cycle() + + +def test_parser_invalid_status_line(): + # ... + writer = io.BytesIO() + reader = io.BytesIO(b'...') + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([(b"Host", b"example.com")]) + p.send_body(b'') + + msg = 'Stream closed early reading response status line' + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_status_line() + + # ... + writer = io.BytesIO() + reader = io.BytesIO(b'HTTP/1.1' + b'x' * 5000) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([(b"Host", b"example.com")]) + p.send_body(b'') + + msg = 'Exceeded maximum size reading response status line' + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_status_line() + + # ... + writer = io.BytesIO() + reader = io.BytesIO(b'HTTP/1.1' + b'x' * 5000 + b'\r\n') + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([(b"Host", b"example.com")]) + p.send_body(b'') + + msg = 'Exceeded maximum size reading response status line' + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_status_line() + + +def test_parser_sent_unsupported_protocol(): + # Currently only HTTP/1.1 is supported. + writer = io.BytesIO() + reader = io.BytesIO() + + p = httpx.HTTPParser(writer, reader) + msg = 'Sent unsupported protocol version' + with pytest.raises(httpx.ProtocolError, match=msg): + p.send_method_line(b"GET", b"/", b"HTTP/1.0") + + +def test_parser_recv_unsupported_protocol(): + # Currently only HTTP/1.1 is supported. + writer = io.BytesIO() + reader = io.BytesIO(b"HTTP/1.0 200 OK\r\n") + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + msg = 'Received unsupported protocol version' + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_status_line() + + +def test_parser_large_body(): + body = b"x" * 6988 + + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: 6988\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + body + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([(b"Host", b"example.com")]) + p.send_body(b'') + + # Checkout our buffer sizes. + p.recv_status_line() + p.recv_headers() + assert len(p.recv_body()) == 4096 + assert len(p.recv_body()) == 2892 + assert len(p.recv_body()) == 0 + + +def test_parser_stream_large_body(): + body = b"x" * 6956 + + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Transfer-Encoding: chunked\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"1b2c\r\n" + body + b'\r\n0\r\n\r\n' + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([(b"Host", b"example.com")]) + p.send_body(b'') + + # Checkout our buffer sizes. + p.recv_status_line() + p.recv_headers() + # assert len(p.recv_body()) == 4096 + # assert len(p.recv_body()) == 2860 + assert len(p.recv_body()) == 6956 + assert len(p.recv_body()) == 0 + + +def test_parser_not_enough_data_received(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: 188\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"truncated" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([(b"Host", b"example.com")]) + p.send_body(b'') + + # Checkout our buffer sizes. + p.recv_status_line() + p.recv_headers() + p.recv_body() + msg = 'Not enough data received for declared Content-Length' + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_body() + + +def test_parser_not_enough_data_sent(): + writer = io.BytesIO() + reader = io.BytesIO() + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"POST", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Content-Type", b"application/json"), + (b"Content-Length", b"23"), + ]) + p.send_body(b'{"msg": "too smol"}') + msg = 'Not enough data sent for declared Content-Length' + with pytest.raises(httpx.ProtocolError, match=msg): + p.send_body(b'') + + +def test_parser_too_much_data_sent(): + writer = io.BytesIO() + reader = io.BytesIO() + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"POST", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Content-Type", b"application/json"), + (b"Content-Length", b"19"), + ]) + msg = 'Too much data sent for declared Content-Length' + with pytest.raises(httpx.ProtocolError, match=msg): + p.send_body(b'{"msg": "too chonky"}') + + +def test_parser_missing_host_header(): + writer = io.BytesIO() + reader = io.BytesIO() + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + msg = "Request missing 'Host' header" + with pytest.raises(httpx.ProtocolError, match=msg): + p.send_headers([]) + + +def test_client_connection_close(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: 12\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"hello, world" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Connection", b"close"), + ]) + p.send_body(b'') + + protocol, code, reason_phase = p.recv_status_line() + headers = p.recv_headers() + body = p.recv_body() + terminator = p.recv_body() + + assert protocol == b'HTTP/1.1' + assert code == 200 + assert reason_phase == b"OK" + assert headers == [ + (b'Content-Length', b'12'), + (b'Content-Type', b'text/plain'), + ] + assert body == b"hello, world" + assert terminator == b"" + + assert repr(p) == "" + + +def test_server_connection_close(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: 12\r\n" + b"Content-Type: text/plain\r\n" + b"Connection: close\r\n" + b"\r\n" + b"hello, world" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([(b"Host", b"example.com")]) + p.send_body(b'') + + protocol, code, reason_phase = p.recv_status_line() + headers = p.recv_headers() + body = p.recv_body() + terminator = p.recv_body() + + assert protocol == b'HTTP/1.1' + assert code == 200 + assert reason_phase == b"OK" + assert headers == [ + (b'Content-Length', b'12'), + (b'Content-Type', b'text/plain'), + (b'Connection', b'close'), + ] + assert body == b"hello, world" + assert terminator == b"" + + assert repr(p) == "" + + +def test_invalid_status_code(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 99 OK\r\n" + b"Content-Length: 12\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"hello, world" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Connection", b"close"), + ]) + p.send_body(b'') + + msg = "Received invalid status code" + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_status_line() + + +def test_1xx_status_code(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 103 Early Hints\r\n" + b"Link: ; rel=preload; as=style\r\n" + b"Link: ; rel=preload; as=script\r\n" + b"\r\n" + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: 12\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"hello, world" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([(b"Host", b"example.com")]) + p.send_body(b'') + + protocol, code, reason_phase = p.recv_status_line() + headers = p.recv_headers() + + assert protocol == b'HTTP/1.1' + assert code == 103 + assert reason_phase == b'Early Hints' + assert headers == [ + (b'Link', b'; rel=preload; as=style'), + (b'Link', b'; rel=preload; as=script'), + ] + + protocol, code, reason_phase = p.recv_status_line() + headers = p.recv_headers() + body = p.recv_body() + terminator = p.recv_body() + + assert protocol == b'HTTP/1.1' + assert code == 200 + assert reason_phase == b"OK" + assert headers == [ + (b'Content-Length', b'12'), + (b'Content-Type', b'text/plain'), + ] + assert body == b"hello, world" + assert terminator == b"" + + +def test_received_invalid_content_length(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: -999\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"hello, world" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Connection", b"close"), + ]) + p.send_body(b'') + + p.recv_status_line() + msg = "Received invalid Content-Length" + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_headers() + + +def test_sent_invalid_content_length(): + writer = io.BytesIO() + reader = io.BytesIO() + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + msg = "Sent invalid Content-Length" + with pytest.raises(httpx.ProtocolError, match=msg): + # Limited to 20 digits. + # 100 million terabytes should be enough for anyone. + p.send_headers([ + (b"Host", b"example.com"), + (b"Content-Length", b"100000000000000000000"), + ]) + + +def test_received_invalid_characters_in_chunk_size(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Transfer-Encoding: chunked\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"0xFF\r\n..." + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Connection", b"close"), + ]) + p.send_body(b'') + + p.recv_status_line() + p.recv_headers() + msg = "Received invalid chunk size" + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_body() + + +def test_received_oversized_chunk(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Transfer-Encoding: chunked\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"FFFFFFFFFF\r\n..." + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Connection", b"close"), + ]) + p.send_body(b'') + + p.recv_status_line() + p.recv_headers() + msg = "Received invalid chunk size" + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_body()