From 46b079ba812f2c840828c213d9ea47c6ce0c5d7e Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 10 Jun 2025 11:07:53 +0100 Subject: [PATCH 01/19] First pass HTTPParser --- docs/connections.md | 2 +- docs/index.md | 1 + docs/networking.md | 2 +- docs/parsers.md | 30 +++++++ docs/templates/base.html | 4 +- scripts/docs | 1 + scripts/unasync | 1 + src/ahttpx/__init__.py | 3 + src/ahttpx/_parsers.py | 178 +++++++++++++++++++++++++++++++++++++ src/httpx/__init__.py | 3 + src/httpx/_parsers.py | 185 +++++++++++++++++++++++++++++++++++++++ 11 files changed, 406 insertions(+), 4 deletions(-) create mode 100644 docs/parsers.md create mode 100644 src/ahttpx/_parsers.py create mode 100644 src/httpx/_parsers.py diff --git a/docs/connections.md b/docs/connections.md index 234fb43..c355d5f 100644 --- a/docs/connections.md +++ b/docs/connections.md @@ -139,5 +139,5 @@ with httpx.open_connection("http://127.0.0.1:8080") as conn: --- ← [Content Types](content-types.md) -[Low Level Networking](networking.md) → +[Parsers](parsers.md) →   diff --git a/docs/index.md b/docs/index.md index 6e49bd7..cdca6c5 100644 --- a/docs/index.md +++ b/docs/index.md @@ -85,6 +85,7 @@ The httpx 1.0 [design proposal](https://www.encode.io/httpnext/) is now availabl * [Headers](headers.md) * [Content Types](content-types.md) * [Connections](connections.md) +* [Parsers](parsers.md) * [Low Level Networking](networking.md) * [About](about.md) diff --git a/docs/networking.md b/docs/networking.md index aa7c88f..4f399a9 100644 --- a/docs/networking.md +++ b/docs/networking.md @@ -231,6 +231,6 @@ Custom network backends can also be used to provide functionality such as handli --- -← [Connections](connections.md) +← [Parsers](parsers.md) [About](about.md) →   diff --git a/docs/parsers.md b/docs/parsers.md new file mode 100644 index 0000000..152c33b --- /dev/null +++ b/docs/parsers.md @@ -0,0 +1,30 @@ +# Parsers + +```python +writer = io.BytesIO() +reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: 23\r\n" + b"Content-Type: application/json\r\n" + b"\r\n" + b'{"msg": "hello, world"}' +) +p = httpx.HTTPParser(writer, reader) + +# Send the request... +p.send_method_line(b"GET", b"/", b"HTTP/1.1") +p.send_headers([(b"Host", b"example.com")]) +p.send_body(b'') + +# Receive the response... +protocol, code, reason_phase = p.recv_status_line() +headers = p.recv_headers() +body = b'' +while buffer := p.recv_body(): + body += buffer +``` + +--- + +← [Connections](connections.md) +[Low Level Networking](networking.md) → diff --git a/docs/templates/base.html b/docs/templates/base.html index e6a6ca5..c5965f0 100644 --- a/docs/templates/base.html +++ b/docs/templates/base.html @@ -3,8 +3,8 @@ - - httpx + + ʜᴛᴛᴘx diff --git a/scripts/docs b/scripts/docs index d34124e..64be3e5 100755 --- a/scripts/docs +++ b/scripts/docs @@ -22,6 +22,7 @@ pages = { '/headers': 'docs/headers.md', '/content-types': 'docs/content-types.md', '/connections': 'docs/connections.md', + '/parsers': 'docs/parsers.md', '/networking': 'docs/networking.md', '/about': 'docs/about.md', } diff --git a/scripts/unasync b/scripts/unasync index 7fcac1a..3f8b642 100755 --- a/scripts/unasync +++ b/scripts/unasync @@ -7,6 +7,7 @@ unasync.unasync_files( "src/ahttpx/_client.py", "src/ahttpx/_content.py", "src/ahttpx/_headers.py", + "src/ahttpx/_parsers.py", "src/ahttpx/_pool.py", "src/ahttpx/_response.py", "src/ahttpx/_request.py", diff --git a/src/ahttpx/__init__.py b/src/ahttpx/__init__.py index 12d0fbf..68376b7 100644 --- a/src/ahttpx/__init__.py +++ b/src/ahttpx/__init__.py @@ -2,6 +2,7 @@ from ._content import * # Content, File, Files, Form, HTML, JSON, MultiPart, Text from ._headers import * # Headers from ._network import * # NetworkBackend, NetworkStream, timeout +from ._parsers import * # HTTPParser, ProtocolError from ._pool import * # Connection, ConnectionPool, Transport, open_connection_pool, open_connection from ._response import * # Response from ._request import * # Request @@ -23,6 +24,7 @@ "Form", "Headers", "HTML", + "HTTPParser", "IterByteStream", "JSON", "MultiPart", @@ -31,6 +33,7 @@ "open_client", "open_connection_pool", "open_connection", + "ProtocolError", "Response", "Request", "serve_http", diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py new file mode 100644 index 0000000..e3b43fa --- /dev/null +++ b/src/ahttpx/_parsers.py @@ -0,0 +1,178 @@ +__all__ = ['HTTPParser', 'ProtocolError'] + + +class State: + IDLE = 0 + SEND_HEADERS = 1 + SEND_BODY = 2 + DONE = 3 + MUST_CLOSE = 4 + CLOSED = 5 + ERROR = 6 + + +class ProtocolError(Exception): + pass + + +class HTTPParser: + """ + Usage... + + client = HTTPParser(stream) + client.send_method_line() IDLE -> SEND_HEADERS + client.send_headers() SEND_HEADERS -> SEND_BODY + client.send_body() SEND_BODY -> SEND_BODY or DONE or MUST_CLOSE + client.recv_status_line() + client.recv_headers() + client.recv_body() + client.start_next_cycle() DONE -> IDLE + client.close() CLOSED + """ + def __init__(self, writer, reader): + self.writer = writer + self.reader = reader + self.parser = ReadAheadParser(reader) + self.our_state = State.IDLE + self.their_state = State.IDLE + + def send_method_line(self, method, target, protocol): + if self.our_state != State.IDLE: + raise ProtocolError(f"Called 'send_method_line' in state {self.our_state}") + + data = b" ".join([method, target, protocol]) + b"\r\n" + self.writer.write(data) + + self.our_state = State.SEND_HEADERS + + def send_headers(self, headers): + if self.our_state != State.SEND_HEADERS: + raise ProtocolError(f"Called 'send_headers' in state {self.our_state}") + + lines = [name + b": " + value + b"\r\n" for name, value in headers] + data = b"".join(lines) + b"\r\n" + self.writer.write(data) + + self.our_state = State.SEND_BODY + + def send_body(self, body): + if self.our_state != State.SEND_BODY: + raise ProtocolError(f"Called 'send_body' in state {self.our_state}") + + # enforce max chunk size, enforce correct content length + if body: + self.writer.write(body) + self.our_state = State.SEND_BODY + else: + self.our_state = State.DONE + + def recv_status_line(self): + if self.their_state != State.IDLE: + raise ProtocolError("Called 'recv_status_line' against state {name}") + + msg = "reading response status line" + line = self.parser.read_until(b"\r\n", max_size=4096, msg=msg) + protocol, status_code, reason_phrase = line.split(b" ", 2) + + self.their_state = State.SEND_HEADERS + return protocol, status_code, reason_phrase + + def recv_headers(self): + if self.their_state != State.SEND_HEADERS: + raise ProtocolError("Called 'recv_headers' against state {name}") + + # Handle Expect: 100-continue + headers = [] + while line := self.parser.read_until(b"\r\n", max_size=4096, msg='recv_headers'): + name, value = line.split(b":", 1) + value = value.strip(b" ") + headers.append((name, value)) + + self.their_state = State.SEND_BODY + return headers + + def recv_body(self): + if self.their_state != State.SEND_BODY: + raise ProtocolError("Called 'recv_body' against state {name}") + + # Validate against Content-Length + # Handle Transfer-Encoding: chunked (inc. trailers) + body = self.parser.read(size=4096) + + # Handle 'Connection: close' and HTTP/1.0 defaults. + if body == b'': + self.their_state = State.DONE + return body + + def start_next_cycle(self): + if self.our_state != State.DONE: + raise ProtocolError("Called 'start_next_cycle' against state {name}") + if self.their_state != State.DONE: + raise ProtocolError("Called 'start_next_cycle' against state {name}") + + self.our_state = State.IDLE + self.their_state = State.IDLE + + +class ReadAheadParser: + """ + A buffered I/O stream, with methods for read-ahead parsing. + """ + def __init__(self, stream): + self._buffer = b'' + self._stream = stream + self._chunk_size = 4096 + + def _read_some(self): + if self._buffer: + ret, self._buffer = self._buffer, b'' + return ret + return self._stream.read(self._chunk_size) + + def _push_back(self, buffer): + assert self._buffer == b'' + self._buffer = buffer + + def read(self, size): + """ + Read and return up to 'size' bytes from the stream, with I/O buffering provided. + """ + buffer = bytearray() + while len(buffer) < size: + chunk = self._read_some() + if not chunk: + break + buffer.extend(chunk) + + if len(buffer) > size: + buffer, push_back = buffer[:size], buffer[size:] + self._push_back(bytes(push_back)) + return bytes(buffer) + + def read_until(self, marker, max_size, msg): + """ + Read and return bytes from the stream, delimited by marker. + * The marker is not included in the return bytes. + * The marker is consumed from the I/O stream. + * Raises `StreamClosed` if the stream closes before a marker occurance. + * Raises `ExceededMaxSize` if marker did not occur within 'max_size + len(marker)' bytes. + """ + buffer = bytearray() + while len(buffer) <= max_size: + chunk = self._read_some() + if not chunk: + # stream closed before marker found. + raise ProtocolError(f"Stream closed early - {msg}") + start_search = max(len(buffer) - len(marker), 0) + buffer.extend(chunk) + index = buffer.find(marker, start_search) + + if index > max_size: + # marker was found, though 'max_size' exceeded. + raise ProtocolError(f"Exceeded maximum size - {msg}") + elif index >= 0: + endindex = index + len(marker) + self._push_back(bytes(buffer[endindex:])) + return bytes(buffer[:index]) + + raise ProtocolError(f"Exceeded maximum size - {msg}") diff --git a/src/httpx/__init__.py b/src/httpx/__init__.py index 12d0fbf..68376b7 100644 --- a/src/httpx/__init__.py +++ b/src/httpx/__init__.py @@ -2,6 +2,7 @@ from ._content import * # Content, File, Files, Form, HTML, JSON, MultiPart, Text from ._headers import * # Headers from ._network import * # NetworkBackend, NetworkStream, timeout +from ._parsers import * # HTTPParser, ProtocolError from ._pool import * # Connection, ConnectionPool, Transport, open_connection_pool, open_connection from ._response import * # Response from ._request import * # Request @@ -23,6 +24,7 @@ "Form", "Headers", "HTML", + "HTTPParser", "IterByteStream", "JSON", "MultiPart", @@ -31,6 +33,7 @@ "open_client", "open_connection_pool", "open_connection", + "ProtocolError", "Response", "Request", "serve_http", diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py new file mode 100644 index 0000000..ccba1c4 --- /dev/null +++ b/src/httpx/_parsers.py @@ -0,0 +1,185 @@ +import enum + +__all__ = ['HTTPParser', 'ProtocolError'] + + +class State(enum.Enum): + IDLE = 0 + SEND_HEADERS = 1 + SEND_BODY = 2 + DONE = 3 + MUST_CLOSE = 4 + CLOSED = 5 + ERROR = 6 + + +class ProtocolError(Exception): + pass + + +class HTTPParser: + """ + Usage... + + client = HTTPParser(stream) + client.send_method_line() IDLE -> SEND_HEADERS + client.send_headers() SEND_HEADERS -> SEND_BODY + client.send_body() SEND_BODY -> SEND_BODY or DONE or MUST_CLOSE + client.recv_status_line() + client.recv_headers() + client.recv_body() + client.start_next_cycle() DONE -> IDLE + client.close() CLOSED + """ + def __init__(self, writer, reader): + self.writer = writer + self.reader = reader + self.parser = ReadAheadParser(reader) + self.our_state = State.IDLE + self.their_state = State.IDLE + + def send_method_line(self, method, target, protocol): + if self.our_state != State.IDLE: + raise ProtocolError(f"Called 'send_method_line' in state {self.our_state}") + + data = b" ".join([method, target, protocol]) + b"\r\n" + self.writer.write(data) + + self.our_state = State.SEND_HEADERS + + def send_headers(self, headers): + if self.our_state != State.SEND_HEADERS: + raise ProtocolError(f"Called 'send_headers' in state {self.our_state}") + + lines = [name + b": " + value + b"\r\n" for name, value in headers] + data = b"".join(lines) + b"\r\n" + self.writer.write(data) + + self.our_state = State.SEND_BODY + + def send_body(self, body): + if self.our_state != State.SEND_BODY: + raise ProtocolError(f"Called 'send_body' in state {self.our_state}") + + # enforce max chunk size, enforce correct content length + if body: + self.writer.write(body) + self.our_state = State.SEND_BODY + else: + self.our_state = State.DONE + + def recv_status_line(self): + if self.their_state != State.IDLE: + raise ProtocolError("Called 'recv_status_line' against state {name}") + + msg = "reading response status line" + line = self.parser.read_until(b"\r\n", max_size=4096, msg=msg) + protocol, status_code, reason_phrase = line.split(b" ", 2) + + self.their_state = State.SEND_HEADERS + return protocol, status_code, reason_phrase + + def recv_headers(self): + if self.their_state != State.SEND_HEADERS: + raise ProtocolError("Called 'recv_headers' against state {name}") + + # Handle Expect: 100-continue + headers = [] + while line := self.parser.read_until(b"\r\n", max_size=4096, msg='recv_headers'): + name, value = line.split(b":", 1) + value = value.strip(b" ") + headers.append((name, value)) + + self.their_state = State.SEND_BODY + return headers + + def recv_body(self): + if self.their_state != State.SEND_BODY: + raise ProtocolError("Called 'recv_body' against state {name}") + + # Validate against Content-Length + # Handle Transfer-Encoding: chunked (inc. trailers) + body = self.parser.read(size=4096) + + # Handle 'Connection: close' and HTTP/1.0 defaults. + if body == b'': + self.their_state = State.DONE + return body + + def start_next_cycle(self): + if self.our_state != State.DONE: + raise ProtocolError("Called 'start_next_cycle' against state {name}") + if self.their_state != State.DONE: + raise ProtocolError("Called 'start_next_cycle' against state {name}") + + self.our_state = State.IDLE + self.their_state = State.IDLE + + def __repr__(self) -> str: + cl_state = self.our_state.name + sr_state = self.their_state.name + return f'' + + +class ReadAheadParser: + """ + A buffered I/O stream, with methods for read-ahead parsing. + """ + def __init__(self, stream): + self._buffer = b'' + self._stream = stream + self._chunk_size = 4096 + + def _read_some(self): + if self._buffer: + ret, self._buffer = self._buffer, b'' + return ret + return self._stream.read(self._chunk_size) + + def _push_back(self, buffer): + assert self._buffer == b'' + self._buffer = buffer + + def read(self, size): + """ + Read and return up to 'size' bytes from the stream, with I/O buffering provided. + """ + buffer = bytearray() + while len(buffer) < size: + chunk = self._read_some() + if not chunk: + break + buffer.extend(chunk) + + if len(buffer) > size: + buffer, push_back = buffer[:size], buffer[size:] + self._push_back(bytes(push_back)) + return bytes(buffer) + + def read_until(self, marker, max_size, msg): + """ + Read and return bytes from the stream, delimited by marker. + * The marker is not included in the return bytes. + * The marker is consumed from the I/O stream. + * Raises `StreamClosed` if the stream closes before a marker occurance. + * Raises `ExceededMaxSize` if marker did not occur within 'max_size + len(marker)' bytes. + """ + buffer = bytearray() + while len(buffer) <= max_size: + chunk = self._read_some() + if not chunk: + # stream closed before marker found. + raise ProtocolError(f"Stream closed early - {msg}") + start_search = max(len(buffer) - len(marker), 0) + buffer.extend(chunk) + index = buffer.find(marker, start_search) + + if index > max_size: + # marker was found, though 'max_size' exceeded. + raise ProtocolError(f"Exceeded maximum size - {msg}") + elif index >= 0: + endindex = index + len(marker) + self._push_back(bytes(buffer[endindex:])) + return bytes(buffer[:index]) + + raise ProtocolError(f"Exceeded maximum size - {msg}") From 93349392e65ad7239679678f9d5a053a2cd9bf14 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 10 Jun 2025 13:07:15 +0100 Subject: [PATCH 02/19] Checking Content-Length --- src/ahttpx/_parsers.py | 47 ++++++++- src/httpx/_parsers.py | 38 ++++++- tests/test_parsers.py | 226 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 306 insertions(+), 5 deletions(-) create mode 100644 tests/test_parsers.py diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py index e3b43fa..6b84a29 100644 --- a/src/ahttpx/_parsers.py +++ b/src/ahttpx/_parsers.py @@ -1,7 +1,9 @@ +import enum + __all__ = ['HTTPParser', 'ProtocolError'] -class State: +class State(enum.Enum): IDLE = 0 SEND_HEADERS = 1 SEND_BODY = 2 @@ -35,6 +37,10 @@ def __init__(self, writer, reader): self.parser = ReadAheadParser(reader) self.our_state = State.IDLE self.their_state = State.IDLE + self.our_content_length = 0 + self.their_content_length = 0 + self.our_seen_length = 0 + self.their_seen_length = 0 def send_method_line(self, method, target, protocol): if self.our_state != State.IDLE: @@ -49,6 +55,13 @@ def send_headers(self, headers): if self.our_state != State.SEND_HEADERS: raise ProtocolError(f"Called 'send_headers' in state {self.our_state}") + # Header state + for name, value in headers: + lname = name.lower() + if lname == b'content-length': + self.our_content_length = int(value) + + # Data on the wire lines = [name + b": " + value + b"\r\n" for name, value in headers] data = b"".join(lines) + b"\r\n" self.writer.write(data) @@ -61,9 +74,16 @@ def send_body(self, body): # enforce max chunk size, enforce correct content length if body: + self.our_seen_length += len(body) + if self.our_seen_length > self.our_content_length: + msg = 'Too much data sent for declared Content-Length' + raise ProtocolError(msg) self.writer.write(body) self.our_state = State.SEND_BODY else: + if self.our_seen_length < self.our_content_length: + msg = 'Not enough data sent for declared Content-Length' + raise ProtocolError(msg) self.our_state = State.DONE def recv_status_line(self): @@ -81,13 +101,19 @@ def recv_headers(self): if self.their_state != State.SEND_HEADERS: raise ProtocolError("Called 'recv_headers' against state {name}") - # Handle Expect: 100-continue + # Data off the wire headers = [] while line := self.parser.read_until(b"\r\n", max_size=4096, msg='recv_headers'): name, value = line.split(b":", 1) value = value.strip(b" ") headers.append((name, value)) + # Header state + for name, value in headers: + lname = name.lower() + if lname == b'content-length': + self.their_content_length = int(value) + self.their_state = State.SEND_BODY return headers @@ -97,10 +123,16 @@ def recv_body(self): # Validate against Content-Length # Handle Transfer-Encoding: chunked (inc. trailers) - body = self.parser.read(size=4096) + remaining = self.their_content_length - self.their_seen_length + size = min(remaining, 4096) + body = self.parser.read(size=size) + self.their_seen_length += len(body) # Handle 'Connection: close' and HTTP/1.0 defaults. if body == b'': + if self.their_seen_length < self.their_content_length: + msg = 'Not enough data received for declared Content-Length' + raise ProtocolError(msg) self.their_state = State.DONE return body @@ -112,6 +144,15 @@ def start_next_cycle(self): self.our_state = State.IDLE self.their_state = State.IDLE + self.our_content_length = 0 + self.their_content_length = 0 + self.our_seen_length = 0 + self.their_seen_length = 0 + + def __repr__(self) -> str: + cl_state = self.our_state.name + sr_state = self.their_state.name + return f'' class ReadAheadParser: diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py index ccba1c4..6b84a29 100644 --- a/src/httpx/_parsers.py +++ b/src/httpx/_parsers.py @@ -37,6 +37,10 @@ def __init__(self, writer, reader): self.parser = ReadAheadParser(reader) self.our_state = State.IDLE self.their_state = State.IDLE + self.our_content_length = 0 + self.their_content_length = 0 + self.our_seen_length = 0 + self.their_seen_length = 0 def send_method_line(self, method, target, protocol): if self.our_state != State.IDLE: @@ -51,6 +55,13 @@ def send_headers(self, headers): if self.our_state != State.SEND_HEADERS: raise ProtocolError(f"Called 'send_headers' in state {self.our_state}") + # Header state + for name, value in headers: + lname = name.lower() + if lname == b'content-length': + self.our_content_length = int(value) + + # Data on the wire lines = [name + b": " + value + b"\r\n" for name, value in headers] data = b"".join(lines) + b"\r\n" self.writer.write(data) @@ -63,9 +74,16 @@ def send_body(self, body): # enforce max chunk size, enforce correct content length if body: + self.our_seen_length += len(body) + if self.our_seen_length > self.our_content_length: + msg = 'Too much data sent for declared Content-Length' + raise ProtocolError(msg) self.writer.write(body) self.our_state = State.SEND_BODY else: + if self.our_seen_length < self.our_content_length: + msg = 'Not enough data sent for declared Content-Length' + raise ProtocolError(msg) self.our_state = State.DONE def recv_status_line(self): @@ -83,13 +101,19 @@ def recv_headers(self): if self.their_state != State.SEND_HEADERS: raise ProtocolError("Called 'recv_headers' against state {name}") - # Handle Expect: 100-continue + # Data off the wire headers = [] while line := self.parser.read_until(b"\r\n", max_size=4096, msg='recv_headers'): name, value = line.split(b":", 1) value = value.strip(b" ") headers.append((name, value)) + # Header state + for name, value in headers: + lname = name.lower() + if lname == b'content-length': + self.their_content_length = int(value) + self.their_state = State.SEND_BODY return headers @@ -99,10 +123,16 @@ def recv_body(self): # Validate against Content-Length # Handle Transfer-Encoding: chunked (inc. trailers) - body = self.parser.read(size=4096) + remaining = self.their_content_length - self.their_seen_length + size = min(remaining, 4096) + body = self.parser.read(size=size) + self.their_seen_length += len(body) # Handle 'Connection: close' and HTTP/1.0 defaults. if body == b'': + if self.their_seen_length < self.their_content_length: + msg = 'Not enough data received for declared Content-Length' + raise ProtocolError(msg) self.their_state = State.DONE return body @@ -114,6 +144,10 @@ def start_next_cycle(self): self.our_state = State.IDLE self.their_state = State.IDLE + self.our_content_length = 0 + self.their_content_length = 0 + self.our_seen_length = 0 + self.their_seen_length = 0 def __repr__(self) -> str: cl_state = self.our_state.name diff --git a/tests/test_parsers.py b/tests/test_parsers.py new file mode 100644 index 0000000..5559022 --- /dev/null +++ b/tests/test_parsers.py @@ -0,0 +1,226 @@ +import httpx +import io +import pytest + + +def test_parser(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: 12\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"hello, world" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"POST", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Content-Type", b"application/json"), + (b"Content-Length", b"23"), + ]) + p.send_body(b'{"msg": "hello, world"}') + p.send_body(b'') + + assert writer.getvalue() == ( + b"POST / HTTP/1.1\r\n" + b"Host: example.com\r\n" + b"Content-Type: application/json\r\n" + b"Content-Length: 23\r\n" + b"\r\n" + b'{"msg": "hello, world"}' + ) + + protocol, code, reason_phase = p.recv_status_line() + headers = p.recv_headers() + body = p.recv_body() + terminator = p.recv_body() + + assert protocol == b'HTTP/1.1' + assert code == b'200' + assert reason_phase == b'OK' + assert headers == [ + (b'Content-Length', b'12'), + (b'Content-Type', b'text/plain'), + ] + assert body == b'hello, world' + assert terminator == b'' + + +def test_parser_repr(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Type: application/json\r\n" + b"Content-Length: 23\r\n" + b"\r\n" + b'{"msg": "hello, world"}' + ) + + p = httpx.HTTPParser(writer, reader) + assert repr(p) == "" + + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + assert repr(p) == "" + + p.send_headers([(b"Host", b"example.com")]) + assert repr(p) == "" + + p.send_body(b'') + assert repr(p) == "" + + p.recv_status_line() + assert repr(p) == "" + + p.recv_headers() + assert repr(p) == "" + + p.recv_body() + assert repr(p) == "" + + p.recv_body() + assert repr(p) == "" + + +def test_parser_invalid_transitions(): + writer = io.BytesIO() + reader = io.BytesIO() + + with pytest.raises(httpx.ProtocolError): + p = httpx.HTTPParser(writer, reader) + p.send_headers([]) + + with pytest.raises(httpx.ProtocolError): + p = httpx.HTTPParser(writer, reader) + p.send_body(b'') + + with pytest.raises(httpx.ProtocolError): + p = httpx.HTTPParser(writer, reader) + p.recv_headers() + + with pytest.raises(httpx.ProtocolError): + p = httpx.HTTPParser(writer, reader) + p.recv_body() + + +def test_parser_invalid_status_line(): + # ... + writer = io.BytesIO() + reader = io.BytesIO(b'HTTP/1.1') + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([(b"Host", b"example.com")]) + p.send_body(b'') + + msg = 'Stream closed early - reading response status line' + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_status_line() + + # ... + writer = io.BytesIO() + reader = io.BytesIO(b'HTTP/1.1' + b'x' * 5000) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([(b"Host", b"example.com")]) + p.send_body(b'') + + msg = 'Exceeded maximum size - reading response status line' + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_status_line() + + # ... + writer = io.BytesIO() + reader = io.BytesIO(b'HTTP/1.1' + b'x' * 5000 + b'\r\n') + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([(b"Host", b"example.com")]) + p.send_body(b'') + + msg = 'Exceeded maximum size - reading response status line' + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_status_line() + + +def test_parser_large_body(): + body = b"x" * 6988 + + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: 6988\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + body + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([(b"Host", b"example.com")]) + p.send_body(b'') + + # Checkout our buffer sizes. + p.recv_status_line() + p.recv_headers() + assert len(p.recv_body()) == 4096 + assert len(p.recv_body()) == 2892 + assert len(p.recv_body()) == 0 + + +def test_parser_not_enough_data_received(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: 188\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"truncated" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([(b"Host", b"example.com")]) + p.send_body(b'') + + # Checkout our buffer sizes. + p.recv_status_line() + p.recv_headers() + p.recv_body() + msg = 'Not enough data received for declared Content-Length' + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_body() + + +def test_parser_not_enough_data_sent(): + writer = io.BytesIO() + reader = io.BytesIO() + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"POST", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Content-Type", b"application/json"), + (b"Content-Length", b"23"), + ]) + p.send_body(b'{"msg": "too smol"}') + msg = 'Not enough data sent for declared Content-Length' + with pytest.raises(httpx.ProtocolError, match=msg): + p.send_body(b'') + + +def test_parser_too_much_data_sent(): + writer = io.BytesIO() + reader = io.BytesIO() + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"POST", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Content-Type", b"application/json"), + (b"Content-Length", b"19"), + ]) + msg = 'Too much data sent for declared Content-Length' + with pytest.raises(httpx.ProtocolError, match=msg): + p.send_body(b'{"msg": "too chonky"}') From e7fc4704ea7c9df1e6084db7936f5ae4855003b6 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 10 Jun 2025 13:22:06 +0100 Subject: [PATCH 03/19] Ensure supported protocol version --- src/ahttpx/_parsers.py | 4 ++++ src/httpx/_parsers.py | 4 ++++ tests/test_parsers.py | 13 ++++++++++++- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py index 6b84a29..02ae1a8 100644 --- a/src/ahttpx/_parsers.py +++ b/src/ahttpx/_parsers.py @@ -46,6 +46,8 @@ def send_method_line(self, method, target, protocol): if self.our_state != State.IDLE: raise ProtocolError(f"Called 'send_method_line' in state {self.our_state}") + if protocol != b'HTTP/1.1': + raise ProtocolError("Sent unsupported protocol version") data = b" ".join([method, target, protocol]) + b"\r\n" self.writer.write(data) @@ -93,6 +95,8 @@ def recv_status_line(self): msg = "reading response status line" line = self.parser.read_until(b"\r\n", max_size=4096, msg=msg) protocol, status_code, reason_phrase = line.split(b" ", 2) + if protocol != b'HTTP/1.1': + raise ProtocolError("Received unsupported protocol version") self.their_state = State.SEND_HEADERS return protocol, status_code, reason_phrase diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py index 6b84a29..02ae1a8 100644 --- a/src/httpx/_parsers.py +++ b/src/httpx/_parsers.py @@ -46,6 +46,8 @@ def send_method_line(self, method, target, protocol): if self.our_state != State.IDLE: raise ProtocolError(f"Called 'send_method_line' in state {self.our_state}") + if protocol != b'HTTP/1.1': + raise ProtocolError("Sent unsupported protocol version") data = b" ".join([method, target, protocol]) + b"\r\n" self.writer.write(data) @@ -93,6 +95,8 @@ def recv_status_line(self): msg = "reading response status line" line = self.parser.read_until(b"\r\n", max_size=4096, msg=msg) protocol, status_code, reason_phrase = line.split(b" ", 2) + if protocol != b'HTTP/1.1': + raise ProtocolError("Received unsupported protocol version") self.their_state = State.SEND_HEADERS return protocol, status_code, reason_phrase diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 5559022..efc1519 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -107,7 +107,7 @@ def test_parser_invalid_transitions(): def test_parser_invalid_status_line(): # ... writer = io.BytesIO() - reader = io.BytesIO(b'HTTP/1.1') + reader = io.BytesIO(b'...') p = httpx.HTTPParser(writer, reader) p.send_method_line(b"GET", b"/", b"HTTP/1.1") @@ -145,6 +145,17 @@ def test_parser_invalid_status_line(): p.recv_status_line() +def test_parser_sent_supported_protocol(): + # Currently only HTTP/1.1 is supported. + writer = io.BytesIO() + reader = io.BytesIO() + + p = httpx.HTTPParser(writer, reader) + msg = 'Sent unsupported protocol version' + with pytest.raises(httpx.ProtocolError, match=msg): + p.send_method_line(b"GET", b"/", b"HTTP/1.0") + + def test_parser_large_body(): body = b"x" * 6988 From bed6e1fd79e6196e382c94916e3a75797c73c5e4 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 10 Jun 2025 13:35:48 +0100 Subject: [PATCH 04/19] Enforce Host header --- src/ahttpx/_parsers.py | 15 +++++++++++++++ src/httpx/_parsers.py | 15 +++++++++++++++ tests/test_parsers.py | 11 +++++++++++ 3 files changed, 41 insertions(+) diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py index 02ae1a8..e1b7057 100644 --- a/src/ahttpx/_parsers.py +++ b/src/ahttpx/_parsers.py @@ -3,6 +3,16 @@ __all__ = ['HTTPParser', 'ProtocolError'] +# TODO... +# * Expect: 100 continue +# * Connection: keep-alive / close +# * Transfer-Encoding: chunked +# * Upgrade: +# * Host: required & ordering + +# * HTTP/1.0 support +# * Bounded integer conversions + class State(enum.Enum): IDLE = 0 SEND_HEADERS = 1 @@ -58,10 +68,15 @@ def send_headers(self, headers): raise ProtocolError(f"Called 'send_headers' in state {self.our_state}") # Header state + seen_host = False for name, value in headers: lname = name.lower() if lname == b'content-length': self.our_content_length = int(value) + elif lname == b'host': + seen_host = True + if not seen_host: + raise ProtocolError("Request missing 'Host' header") # Data on the wire lines = [name + b": " + value + b"\r\n" for name, value in headers] diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py index 02ae1a8..e1b7057 100644 --- a/src/httpx/_parsers.py +++ b/src/httpx/_parsers.py @@ -3,6 +3,16 @@ __all__ = ['HTTPParser', 'ProtocolError'] +# TODO... +# * Expect: 100 continue +# * Connection: keep-alive / close +# * Transfer-Encoding: chunked +# * Upgrade: +# * Host: required & ordering + +# * HTTP/1.0 support +# * Bounded integer conversions + class State(enum.Enum): IDLE = 0 SEND_HEADERS = 1 @@ -58,10 +68,15 @@ def send_headers(self, headers): raise ProtocolError(f"Called 'send_headers' in state {self.our_state}") # Header state + seen_host = False for name, value in headers: lname = name.lower() if lname == b'content-length': self.our_content_length = int(value) + elif lname == b'host': + seen_host = True + if not seen_host: + raise ProtocolError("Request missing 'Host' header") # Data on the wire lines = [name + b": " + value + b"\r\n" for name, value in headers] diff --git a/tests/test_parsers.py b/tests/test_parsers.py index efc1519..edc38df 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -235,3 +235,14 @@ def test_parser_too_much_data_sent(): msg = 'Too much data sent for declared Content-Length' with pytest.raises(httpx.ProtocolError, match=msg): p.send_body(b'{"msg": "too chonky"}') + + +def test_parser_missing_host_header(): + writer = io.BytesIO() + reader = io.BytesIO() + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + msg = "Request missing 'Host' header" + with pytest.raises(httpx.ProtocolError, match=msg): + p.send_headers([]) From ad15b1f7b1019dab724d1add0a9dcf48fb94ef25 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 10 Jun 2025 15:36:57 +0100 Subject: [PATCH 05/19] Update tests --- tests/test_parsers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_parsers.py b/tests/test_parsers.py index edc38df..71972f2 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -82,6 +82,9 @@ def test_parser_repr(): p.recv_body() assert repr(p) == "" + p.start_next_cycle() + assert repr(p) == "" + def test_parser_invalid_transitions(): writer = io.BytesIO() From ed138bc5843af2934faf58cdb6166ae2bfa78d21 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 10 Jun 2025 15:37:50 +0100 Subject: [PATCH 06/19] Add TODOs --- src/ahttpx/_parsers.py | 15 ++++++++++++--- src/httpx/_parsers.py | 15 ++++++++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py index e1b7057..b1882ef 100644 --- a/src/ahttpx/_parsers.py +++ b/src/ahttpx/_parsers.py @@ -4,14 +4,23 @@ # TODO... + # * Expect: 100 continue # * Connection: keep-alive / close # * Transfer-Encoding: chunked -# * Upgrade: -# * Host: required & ordering +# * Upgrade: ... / (CONNECT?) +# * Host: required (ordering?) # * HTTP/1.0 support # * Bounded integer conversions +# * trailers + +# * validate method, target in request line +# * validate status code in response line +# * validate name, value on headers + +# * Fixup invalid state transition messages + class State(enum.Enum): IDLE = 0 @@ -31,7 +40,7 @@ class HTTPParser: """ Usage... - client = HTTPParser(stream) + client = HTTPParser(writer, reader) client.send_method_line() IDLE -> SEND_HEADERS client.send_headers() SEND_HEADERS -> SEND_BODY client.send_body() SEND_BODY -> SEND_BODY or DONE or MUST_CLOSE diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py index e1b7057..b1882ef 100644 --- a/src/httpx/_parsers.py +++ b/src/httpx/_parsers.py @@ -4,14 +4,23 @@ # TODO... + # * Expect: 100 continue # * Connection: keep-alive / close # * Transfer-Encoding: chunked -# * Upgrade: -# * Host: required & ordering +# * Upgrade: ... / (CONNECT?) +# * Host: required (ordering?) # * HTTP/1.0 support # * Bounded integer conversions +# * trailers + +# * validate method, target in request line +# * validate status code in response line +# * validate name, value on headers + +# * Fixup invalid state transition messages + class State(enum.Enum): IDLE = 0 @@ -31,7 +40,7 @@ class HTTPParser: """ Usage... - client = HTTPParser(stream) + client = HTTPParser(writer, reader) client.send_method_line() IDLE -> SEND_HEADERS client.send_headers() SEND_HEADERS -> SEND_BODY client.send_body() SEND_BODY -> SEND_BODY or DONE or MUST_CLOSE From 52689d5dbde469ca563bcfe364ccfd34d7ed1c26 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 11 Jun 2025 11:44:32 +0100 Subject: [PATCH 07/19] Add chunked encoding to HTTPParser --- src/ahttpx/_parsers.py | 128 ++++++++++++++++++++++++++++------------- src/httpx/_parsers.py | 128 ++++++++++++++++++++++++++++------------- tests/test_parsers.py | 126 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 301 insertions(+), 81 deletions(-) diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py index b1882ef..5a7dc2a 100644 --- a/src/ahttpx/_parsers.py +++ b/src/ahttpx/_parsers.py @@ -54,16 +54,20 @@ def __init__(self, writer, reader): self.writer = writer self.reader = reader self.parser = ReadAheadParser(reader) + self.our_state = State.IDLE self.their_state = State.IDLE self.our_content_length = 0 self.their_content_length = 0 self.our_seen_length = 0 self.their_seen_length = 0 + self.our_keep_alive = True + self.their_keep_alive = True def send_method_line(self, method, target, protocol): if self.our_state != State.IDLE: - raise ProtocolError(f"Called 'send_method_line' in state {self.our_state}") + msg = f"Called 'send_method_line' in invalid state {self.description()}" + raise ProtocolError(msg) if protocol != b'HTTP/1.1': raise ProtocolError("Sent unsupported protocol version") @@ -74,16 +78,21 @@ def send_method_line(self, method, target, protocol): def send_headers(self, headers): if self.our_state != State.SEND_HEADERS: - raise ProtocolError(f"Called 'send_headers' in state {self.our_state}") + msg = f"Called 'send_headers' in invalid state {self.description()}" + raise ProtocolError(msg) # Header state seen_host = False for name, value in headers: lname = name.lower() - if lname == b'content-length': - self.our_content_length = int(value) - elif lname == b'host': + if lname == b'host': seen_host = True + elif lname == b'content-length': + self.our_content_length = int(value) + elif lname == b'connection' and value == b'close': + self.our_keep_alive = False + elif lname == b'transfer-encoding' and value == b'chunked': + self.our_content_length = None if not seen_host: raise ProtocolError("Request missing 'Host' header") @@ -96,25 +105,39 @@ def send_headers(self, headers): def send_body(self, body): if self.our_state != State.SEND_BODY: - raise ProtocolError(f"Called 'send_body' in state {self.our_state}") + msg = f"Called 'send_body' in invalid state {self.description()}" + raise ProtocolError(msg) + + # Track content + self.our_seen_length += len(body) - # enforce max chunk size, enforce correct content length - if body: - self.our_seen_length += len(body) + if self.our_content_length is None: + # Transfer-Encoding: chunked + self.writer.write(f'{len(body):x}\r\n'.encode('ascii')) + if body: + self.writer.write(body + b'\r\n') + else: + # Content-Length: xxx if self.our_seen_length > self.our_content_length: msg = 'Too much data sent for declared Content-Length' raise ProtocolError(msg) - self.writer.write(body) - self.our_state = State.SEND_BODY - else: - if self.our_seen_length < self.our_content_length: + if self.our_seen_length < self.our_content_length and body == b'': msg = 'Not enough data sent for declared Content-Length' raise ProtocolError(msg) - self.our_state = State.DONE + if body: + self.writer.write(body) + + if body == b'': + # Handle body close + if self.our_keep_alive: + self.our_state = State.DONE + else: + self.our_state = State.MUST_CLOSE def recv_status_line(self): if self.their_state != State.IDLE: - raise ProtocolError("Called 'recv_status_line' against state {name}") + msg = f"Called 'recv_status_line' in invalid state {self.description()}" + raise ProtocolError(msg) msg = "reading response status line" line = self.parser.read_until(b"\r\n", max_size=4096, msg=msg) @@ -127,11 +150,13 @@ def recv_status_line(self): def recv_headers(self): if self.their_state != State.SEND_HEADERS: - raise ProtocolError("Called 'recv_headers' against state {name}") + msg = f"Called 'recv_headers' in invalid state {self.description()}" + raise ProtocolError(msg) # Data off the wire headers = [] - while line := self.parser.read_until(b"\r\n", max_size=4096, msg='recv_headers'): + msg = "reading response headers" + while line := self.parser.read_until(b"\r\n", max_size=4096, msg=msg): name, value = line.split(b":", 1) value = value.strip(b" ") headers.append((name, value)) @@ -141,34 +166,53 @@ def recv_headers(self): lname = name.lower() if lname == b'content-length': self.their_content_length = int(value) + elif lname == b'connection' and value == b'close': + self.their_keep_alive = False + elif lname == b'transfer-encoding' and value == b'chunked': + self.their_content_length = None self.their_state = State.SEND_BODY return headers def recv_body(self): if self.their_state != State.SEND_BODY: - raise ProtocolError("Called 'recv_body' against state {name}") - - # Validate against Content-Length - # Handle Transfer-Encoding: chunked (inc. trailers) - remaining = self.their_content_length - self.their_seen_length - size = min(remaining, 4096) - body = self.parser.read(size=size) - self.their_seen_length += len(body) - - # Handle 'Connection: close' and HTTP/1.0 defaults. - if body == b'': - if self.their_seen_length < self.their_content_length: + msg = f"Called 'recv_body' in invalid state {self.description()}" + raise ProtocolError(msg) + + msg = 'reading response body' + if self.their_content_length is None: + line = self.parser.read_until(b"\r\n", max_size=4096, msg=msg) + sizestr, _, _ = line.partition(b";") + size = int(sizestr, base=16) + if size == 0: + body = b'' + else: + body = self.parser.read(size=size) + self.parser.read_until(b"\r\n", max_size=2, msg=msg) + self.their_seen_length += len(body) + else: + remaining = self.their_content_length - self.their_seen_length + size = min(remaining, 4096) + body = self.parser.read(size=size) + self.their_seen_length += len(body) + if self.their_seen_length < self.their_content_length and body == b'': msg = 'Not enough data received for declared Content-Length' raise ProtocolError(msg) - self.their_state = State.DONE + + if body == b'': + # Handle body close + if self.their_keep_alive: + self.their_state = State.DONE + else: + self.their_state = State.MUST_CLOSE + + # Return body return body def start_next_cycle(self): - if self.our_state != State.DONE: - raise ProtocolError("Called 'start_next_cycle' against state {name}") - if self.their_state != State.DONE: - raise ProtocolError("Called 'start_next_cycle' against state {name}") + if self.our_state != State.DONE or self.their_state != State.DONE: + msg = f"Called 'start_next_cycle' in invalid state {self.description()}" + raise ProtocolError(msg) self.our_state = State.IDLE self.their_state = State.IDLE @@ -176,11 +220,17 @@ def start_next_cycle(self): self.their_content_length = 0 self.our_seen_length = 0 self.their_seen_length = 0 + self.our_keep_alive = True + self.their_keep_alive = True - def __repr__(self) -> str: + def description(self) -> str: cl_state = self.our_state.name sr_state = self.their_state.name - return f'' + return f"client {cl_state}, server {sr_state}" + + def __repr__(self) -> str: + desc = self.description() + return f'' class ReadAheadParser: @@ -231,17 +281,17 @@ def read_until(self, marker, max_size, msg): chunk = self._read_some() if not chunk: # stream closed before marker found. - raise ProtocolError(f"Stream closed early - {msg}") + raise ProtocolError(f"Stream closed early {msg}") start_search = max(len(buffer) - len(marker), 0) buffer.extend(chunk) index = buffer.find(marker, start_search) if index > max_size: # marker was found, though 'max_size' exceeded. - raise ProtocolError(f"Exceeded maximum size - {msg}") + raise ProtocolError(f"Exceeded maximum size {msg}") elif index >= 0: endindex = index + len(marker) self._push_back(bytes(buffer[endindex:])) return bytes(buffer[:index]) - raise ProtocolError(f"Exceeded maximum size - {msg}") + raise ProtocolError(f"Exceeded maximum size {msg}") diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py index b1882ef..5a7dc2a 100644 --- a/src/httpx/_parsers.py +++ b/src/httpx/_parsers.py @@ -54,16 +54,20 @@ def __init__(self, writer, reader): self.writer = writer self.reader = reader self.parser = ReadAheadParser(reader) + self.our_state = State.IDLE self.their_state = State.IDLE self.our_content_length = 0 self.their_content_length = 0 self.our_seen_length = 0 self.their_seen_length = 0 + self.our_keep_alive = True + self.their_keep_alive = True def send_method_line(self, method, target, protocol): if self.our_state != State.IDLE: - raise ProtocolError(f"Called 'send_method_line' in state {self.our_state}") + msg = f"Called 'send_method_line' in invalid state {self.description()}" + raise ProtocolError(msg) if protocol != b'HTTP/1.1': raise ProtocolError("Sent unsupported protocol version") @@ -74,16 +78,21 @@ def send_method_line(self, method, target, protocol): def send_headers(self, headers): if self.our_state != State.SEND_HEADERS: - raise ProtocolError(f"Called 'send_headers' in state {self.our_state}") + msg = f"Called 'send_headers' in invalid state {self.description()}" + raise ProtocolError(msg) # Header state seen_host = False for name, value in headers: lname = name.lower() - if lname == b'content-length': - self.our_content_length = int(value) - elif lname == b'host': + if lname == b'host': seen_host = True + elif lname == b'content-length': + self.our_content_length = int(value) + elif lname == b'connection' and value == b'close': + self.our_keep_alive = False + elif lname == b'transfer-encoding' and value == b'chunked': + self.our_content_length = None if not seen_host: raise ProtocolError("Request missing 'Host' header") @@ -96,25 +105,39 @@ def send_headers(self, headers): def send_body(self, body): if self.our_state != State.SEND_BODY: - raise ProtocolError(f"Called 'send_body' in state {self.our_state}") + msg = f"Called 'send_body' in invalid state {self.description()}" + raise ProtocolError(msg) + + # Track content + self.our_seen_length += len(body) - # enforce max chunk size, enforce correct content length - if body: - self.our_seen_length += len(body) + if self.our_content_length is None: + # Transfer-Encoding: chunked + self.writer.write(f'{len(body):x}\r\n'.encode('ascii')) + if body: + self.writer.write(body + b'\r\n') + else: + # Content-Length: xxx if self.our_seen_length > self.our_content_length: msg = 'Too much data sent for declared Content-Length' raise ProtocolError(msg) - self.writer.write(body) - self.our_state = State.SEND_BODY - else: - if self.our_seen_length < self.our_content_length: + if self.our_seen_length < self.our_content_length and body == b'': msg = 'Not enough data sent for declared Content-Length' raise ProtocolError(msg) - self.our_state = State.DONE + if body: + self.writer.write(body) + + if body == b'': + # Handle body close + if self.our_keep_alive: + self.our_state = State.DONE + else: + self.our_state = State.MUST_CLOSE def recv_status_line(self): if self.their_state != State.IDLE: - raise ProtocolError("Called 'recv_status_line' against state {name}") + msg = f"Called 'recv_status_line' in invalid state {self.description()}" + raise ProtocolError(msg) msg = "reading response status line" line = self.parser.read_until(b"\r\n", max_size=4096, msg=msg) @@ -127,11 +150,13 @@ def recv_status_line(self): def recv_headers(self): if self.their_state != State.SEND_HEADERS: - raise ProtocolError("Called 'recv_headers' against state {name}") + msg = f"Called 'recv_headers' in invalid state {self.description()}" + raise ProtocolError(msg) # Data off the wire headers = [] - while line := self.parser.read_until(b"\r\n", max_size=4096, msg='recv_headers'): + msg = "reading response headers" + while line := self.parser.read_until(b"\r\n", max_size=4096, msg=msg): name, value = line.split(b":", 1) value = value.strip(b" ") headers.append((name, value)) @@ -141,34 +166,53 @@ def recv_headers(self): lname = name.lower() if lname == b'content-length': self.their_content_length = int(value) + elif lname == b'connection' and value == b'close': + self.their_keep_alive = False + elif lname == b'transfer-encoding' and value == b'chunked': + self.their_content_length = None self.their_state = State.SEND_BODY return headers def recv_body(self): if self.their_state != State.SEND_BODY: - raise ProtocolError("Called 'recv_body' against state {name}") - - # Validate against Content-Length - # Handle Transfer-Encoding: chunked (inc. trailers) - remaining = self.their_content_length - self.their_seen_length - size = min(remaining, 4096) - body = self.parser.read(size=size) - self.their_seen_length += len(body) - - # Handle 'Connection: close' and HTTP/1.0 defaults. - if body == b'': - if self.their_seen_length < self.their_content_length: + msg = f"Called 'recv_body' in invalid state {self.description()}" + raise ProtocolError(msg) + + msg = 'reading response body' + if self.their_content_length is None: + line = self.parser.read_until(b"\r\n", max_size=4096, msg=msg) + sizestr, _, _ = line.partition(b";") + size = int(sizestr, base=16) + if size == 0: + body = b'' + else: + body = self.parser.read(size=size) + self.parser.read_until(b"\r\n", max_size=2, msg=msg) + self.their_seen_length += len(body) + else: + remaining = self.their_content_length - self.their_seen_length + size = min(remaining, 4096) + body = self.parser.read(size=size) + self.their_seen_length += len(body) + if self.their_seen_length < self.their_content_length and body == b'': msg = 'Not enough data received for declared Content-Length' raise ProtocolError(msg) - self.their_state = State.DONE + + if body == b'': + # Handle body close + if self.their_keep_alive: + self.their_state = State.DONE + else: + self.their_state = State.MUST_CLOSE + + # Return body return body def start_next_cycle(self): - if self.our_state != State.DONE: - raise ProtocolError("Called 'start_next_cycle' against state {name}") - if self.their_state != State.DONE: - raise ProtocolError("Called 'start_next_cycle' against state {name}") + if self.our_state != State.DONE or self.their_state != State.DONE: + msg = f"Called 'start_next_cycle' in invalid state {self.description()}" + raise ProtocolError(msg) self.our_state = State.IDLE self.their_state = State.IDLE @@ -176,11 +220,17 @@ def start_next_cycle(self): self.their_content_length = 0 self.our_seen_length = 0 self.their_seen_length = 0 + self.our_keep_alive = True + self.their_keep_alive = True - def __repr__(self) -> str: + def description(self) -> str: cl_state = self.our_state.name sr_state = self.their_state.name - return f'' + return f"client {cl_state}, server {sr_state}" + + def __repr__(self) -> str: + desc = self.description() + return f'' class ReadAheadParser: @@ -231,17 +281,17 @@ def read_until(self, marker, max_size, msg): chunk = self._read_some() if not chunk: # stream closed before marker found. - raise ProtocolError(f"Stream closed early - {msg}") + raise ProtocolError(f"Stream closed early {msg}") start_search = max(len(buffer) - len(marker), 0) buffer.extend(chunk) index = buffer.find(marker, start_search) if index > max_size: # marker was found, though 'max_size' exceeded. - raise ProtocolError(f"Exceeded maximum size - {msg}") + raise ProtocolError(f"Exceeded maximum size {msg}") elif index >= 0: endindex = index + len(marker) self._push_back(bytes(buffer[endindex:])) return bytes(buffer[:index]) - raise ProtocolError(f"Exceeded maximum size - {msg}") + raise ProtocolError(f"Exceeded maximum size {msg}") diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 71972f2..cc5b576 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -48,6 +48,55 @@ def test_parser(): assert terminator == b'' +def test_parser_transfer_encoding_chunked(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Type: text/plain\r\n" + b"Transfer-Encoding: chunked\r\n" + b"\r\n" + b"c\r\n" + b"hello, world\r\n" + b"0\r\n" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"POST", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Content-Type", b"application/json"), + (b"Transfer-Encoding", b"chunked"), + ]) + p.send_body(b'{"msg": "hello, world"}') + p.send_body(b'') + + assert writer.getvalue() == ( + b"POST / HTTP/1.1\r\n" + b"Host: example.com\r\n" + b"Content-Type: application/json\r\n" + b"Transfer-Encoding: chunked\r\n" + b"\r\n" + b'17\r\n' + b'{"msg": "hello, world"}\r\n' + b'0\r\n' + ) + + protocol, code, reason_phase = p.recv_status_line() + headers = p.recv_headers() + body = p.recv_body() + terminator = p.recv_body() + + assert protocol == b'HTTP/1.1' + assert code == b'200' + assert reason_phase == b'OK' + assert headers == [ + (b'Content-Type', b'text/plain'), + (b'Transfer-Encoding', b'chunked'), + ] + assert body == b'hello, world' + assert terminator == b'' + + def test_parser_repr(): writer = io.BytesIO() reader = io.BytesIO( @@ -117,7 +166,7 @@ def test_parser_invalid_status_line(): p.send_headers([(b"Host", b"example.com")]) p.send_body(b'') - msg = 'Stream closed early - reading response status line' + msg = 'Stream closed early reading response status line' with pytest.raises(httpx.ProtocolError, match=msg): p.recv_status_line() @@ -130,7 +179,7 @@ def test_parser_invalid_status_line(): p.send_headers([(b"Host", b"example.com")]) p.send_body(b'') - msg = 'Exceeded maximum size - reading response status line' + msg = 'Exceeded maximum size reading response status line' with pytest.raises(httpx.ProtocolError, match=msg): p.recv_status_line() @@ -143,7 +192,7 @@ def test_parser_invalid_status_line(): p.send_headers([(b"Host", b"example.com")]) p.send_body(b'') - msg = 'Exceeded maximum size - reading response status line' + msg = 'Exceeded maximum size reading response status line' with pytest.raises(httpx.ProtocolError, match=msg): p.recv_status_line() @@ -249,3 +298,74 @@ def test_parser_missing_host_header(): msg = "Request missing 'Host' header" with pytest.raises(httpx.ProtocolError, match=msg): p.send_headers([]) + + +def test_client_connection_close(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: 12\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"hello, world" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Connection", b"close"), + ]) + p.send_body(b'') + + protocol, code, reason_phase = p.recv_status_line() + headers = p.recv_headers() + body = p.recv_body() + terminator = p.recv_body() + + assert protocol == b'HTTP/1.1' + assert code == b'200' + assert reason_phase == b"OK" + assert headers == [ + (b'Content-Length', b'12'), + (b'Content-Type', b'text/plain'), + ] + assert body == b"hello, world" + assert terminator == b"" + + assert repr(p) == "" + + +def test_server_connection_close(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: 12\r\n" + b"Content-Type: text/plain\r\n" + b"Connection: close\r\n" + b"\r\n" + b"hello, world" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([(b"Host", b"example.com")]) + p.send_body(b'') + + protocol, code, reason_phase = p.recv_status_line() + headers = p.recv_headers() + body = p.recv_body() + terminator = p.recv_body() + + assert protocol == b'HTTP/1.1' + assert code == b'200' + assert reason_phase == b"OK" + assert headers == [ + (b'Content-Length', b'12'), + (b'Content-Type', b'text/plain'), + (b'Connection', b'close'), + ] + assert body == b"hello, world" + assert terminator == b"" + + assert repr(p) == "" From 17b9954385eac4740d31a0e53be52e561bb84d83 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 11 Jun 2025 12:57:44 +0100 Subject: [PATCH 08/19] Transfer-Encoding: chunked --- src/ahttpx/_parsers.py | 52 ++++++++++++++++++++++++------------------ src/httpx/_parsers.py | 52 ++++++++++++++++++++++++------------------ tests/test_parsers.py | 4 ++-- 3 files changed, 62 insertions(+), 46 deletions(-) diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py index 5a7dc2a..7e73e4d 100644 --- a/src/ahttpx/_parsers.py +++ b/src/ahttpx/_parsers.py @@ -21,6 +21,9 @@ # * Fixup invalid state transition messages +# * read() fixup +# * chunk size + class State(enum.Enum): IDLE = 0 @@ -108,16 +111,15 @@ def send_body(self, body): msg = f"Called 'send_body' in invalid state {self.description()}" raise ProtocolError(msg) - # Track content - self.our_seen_length += len(body) - if self.our_content_length is None: # Transfer-Encoding: chunked + self.our_seen_length += len(body) self.writer.write(f'{len(body):x}\r\n'.encode('ascii')) - if body: - self.writer.write(body + b'\r\n') + self.writer.write(body + b'\r\n') + else: # Content-Length: xxx + self.our_seen_length += len(body) if self.our_seen_length > self.our_content_length: msg = 'Too much data sent for declared Content-Length' raise ProtocolError(msg) @@ -139,8 +141,8 @@ def recv_status_line(self): msg = f"Called 'recv_status_line' in invalid state {self.description()}" raise ProtocolError(msg) - msg = "reading response status line" - line = self.parser.read_until(b"\r\n", max_size=4096, msg=msg) + exc_text = "reading response status line" + line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text) protocol, status_code, reason_phrase = line.split(b" ", 2) if protocol != b'HTTP/1.1': raise ProtocolError("Received unsupported protocol version") @@ -155,8 +157,8 @@ def recv_headers(self): # Data off the wire headers = [] - msg = "reading response headers" - while line := self.parser.read_until(b"\r\n", max_size=4096, msg=msg): + exc_text = "reading response headers" + while line := self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text): name, value = line.split(b":", 1) value = value.strip(b" ") headers.append((name, value)) @@ -179,18 +181,24 @@ def recv_body(self): msg = f"Called 'recv_body' in invalid state {self.description()}" raise ProtocolError(msg) - msg = 'reading response body' if self.their_content_length is None: - line = self.parser.read_until(b"\r\n", max_size=4096, msg=msg) + # Transfer-Encoding: chunked + exc_text = 'reading chunk size' + line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text) sizestr, _, _ = line.partition(b";") size = int(sizestr, base=16) - if size == 0: - body = b'' - else: + if size > 0: body = self.parser.read(size=size) - self.parser.read_until(b"\r\n", max_size=2, msg=msg) + exc_text = 'reading chunk data' + self.parser.read_until(b"\r\n", max_size=2, exc_text=exc_text) self.their_seen_length += len(body) + else: + body = b'' + exc_text = 'reading chunk termination' + self.parser.read_until(b"\r\n", max_size=2, exc_text=exc_text) + else: + # Content-Length: xxx remaining = self.their_content_length - self.their_seen_length size = min(remaining, 4096) body = self.parser.read(size=size) @@ -206,7 +214,6 @@ def recv_body(self): else: self.their_state = State.MUST_CLOSE - # Return body return body def start_next_cycle(self): @@ -268,30 +275,31 @@ def read(self, size): self._push_back(bytes(push_back)) return bytes(buffer) - def read_until(self, marker, max_size, msg): + def read_until(self, marker, max_size, exc_text): """ Read and return bytes from the stream, delimited by marker. + * The marker is not included in the return bytes. * The marker is consumed from the I/O stream. - * Raises `StreamClosed` if the stream closes before a marker occurance. - * Raises `ExceededMaxSize` if marker did not occur within 'max_size + len(marker)' bytes. + * Raises `ProtocolError` if the stream closes before a marker occurance. + * Raises `ProtocolError` if marker did not occur within 'max_size + len(marker)' bytes. """ buffer = bytearray() while len(buffer) <= max_size: chunk = self._read_some() if not chunk: # stream closed before marker found. - raise ProtocolError(f"Stream closed early {msg}") + raise ProtocolError(f"Stream closed early {exc_text}") start_search = max(len(buffer) - len(marker), 0) buffer.extend(chunk) index = buffer.find(marker, start_search) if index > max_size: # marker was found, though 'max_size' exceeded. - raise ProtocolError(f"Exceeded maximum size {msg}") + raise ProtocolError(f"Exceeded maximum size {exc_text}") elif index >= 0: endindex = index + len(marker) self._push_back(bytes(buffer[endindex:])) return bytes(buffer[:index]) - raise ProtocolError(f"Exceeded maximum size {msg}") + raise ProtocolError(f"Exceeded maximum size {exc_text}") diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py index 5a7dc2a..7e73e4d 100644 --- a/src/httpx/_parsers.py +++ b/src/httpx/_parsers.py @@ -21,6 +21,9 @@ # * Fixup invalid state transition messages +# * read() fixup +# * chunk size + class State(enum.Enum): IDLE = 0 @@ -108,16 +111,15 @@ def send_body(self, body): msg = f"Called 'send_body' in invalid state {self.description()}" raise ProtocolError(msg) - # Track content - self.our_seen_length += len(body) - if self.our_content_length is None: # Transfer-Encoding: chunked + self.our_seen_length += len(body) self.writer.write(f'{len(body):x}\r\n'.encode('ascii')) - if body: - self.writer.write(body + b'\r\n') + self.writer.write(body + b'\r\n') + else: # Content-Length: xxx + self.our_seen_length += len(body) if self.our_seen_length > self.our_content_length: msg = 'Too much data sent for declared Content-Length' raise ProtocolError(msg) @@ -139,8 +141,8 @@ def recv_status_line(self): msg = f"Called 'recv_status_line' in invalid state {self.description()}" raise ProtocolError(msg) - msg = "reading response status line" - line = self.parser.read_until(b"\r\n", max_size=4096, msg=msg) + exc_text = "reading response status line" + line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text) protocol, status_code, reason_phrase = line.split(b" ", 2) if protocol != b'HTTP/1.1': raise ProtocolError("Received unsupported protocol version") @@ -155,8 +157,8 @@ def recv_headers(self): # Data off the wire headers = [] - msg = "reading response headers" - while line := self.parser.read_until(b"\r\n", max_size=4096, msg=msg): + exc_text = "reading response headers" + while line := self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text): name, value = line.split(b":", 1) value = value.strip(b" ") headers.append((name, value)) @@ -179,18 +181,24 @@ def recv_body(self): msg = f"Called 'recv_body' in invalid state {self.description()}" raise ProtocolError(msg) - msg = 'reading response body' if self.their_content_length is None: - line = self.parser.read_until(b"\r\n", max_size=4096, msg=msg) + # Transfer-Encoding: chunked + exc_text = 'reading chunk size' + line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text) sizestr, _, _ = line.partition(b";") size = int(sizestr, base=16) - if size == 0: - body = b'' - else: + if size > 0: body = self.parser.read(size=size) - self.parser.read_until(b"\r\n", max_size=2, msg=msg) + exc_text = 'reading chunk data' + self.parser.read_until(b"\r\n", max_size=2, exc_text=exc_text) self.their_seen_length += len(body) + else: + body = b'' + exc_text = 'reading chunk termination' + self.parser.read_until(b"\r\n", max_size=2, exc_text=exc_text) + else: + # Content-Length: xxx remaining = self.their_content_length - self.their_seen_length size = min(remaining, 4096) body = self.parser.read(size=size) @@ -206,7 +214,6 @@ def recv_body(self): else: self.their_state = State.MUST_CLOSE - # Return body return body def start_next_cycle(self): @@ -268,30 +275,31 @@ def read(self, size): self._push_back(bytes(push_back)) return bytes(buffer) - def read_until(self, marker, max_size, msg): + def read_until(self, marker, max_size, exc_text): """ Read and return bytes from the stream, delimited by marker. + * The marker is not included in the return bytes. * The marker is consumed from the I/O stream. - * Raises `StreamClosed` if the stream closes before a marker occurance. - * Raises `ExceededMaxSize` if marker did not occur within 'max_size + len(marker)' bytes. + * Raises `ProtocolError` if the stream closes before a marker occurance. + * Raises `ProtocolError` if marker did not occur within 'max_size + len(marker)' bytes. """ buffer = bytearray() while len(buffer) <= max_size: chunk = self._read_some() if not chunk: # stream closed before marker found. - raise ProtocolError(f"Stream closed early {msg}") + raise ProtocolError(f"Stream closed early {exc_text}") start_search = max(len(buffer) - len(marker), 0) buffer.extend(chunk) index = buffer.find(marker, start_search) if index > max_size: # marker was found, though 'max_size' exceeded. - raise ProtocolError(f"Exceeded maximum size {msg}") + raise ProtocolError(f"Exceeded maximum size {exc_text}") elif index >= 0: endindex = index + len(marker) self._push_back(bytes(buffer[endindex:])) return bytes(buffer[:index]) - raise ProtocolError(f"Exceeded maximum size {msg}") + raise ProtocolError(f"Exceeded maximum size {exc_text}") diff --git a/tests/test_parsers.py b/tests/test_parsers.py index cc5b576..077ff7b 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -57,7 +57,7 @@ def test_parser_transfer_encoding_chunked(): b"\r\n" b"c\r\n" b"hello, world\r\n" - b"0\r\n" + b"0\r\n\r\n" ) p = httpx.HTTPParser(writer, reader) @@ -78,7 +78,7 @@ def test_parser_transfer_encoding_chunked(): b"\r\n" b'17\r\n' b'{"msg": "hello, world"}\r\n' - b'0\r\n' + b'0\r\n\r\n' ) protocol, code, reason_phase = p.recv_status_line() From c53905902e28b21c6baf4cc1f71ee5f72d5bf112 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 11 Jun 2025 13:07:00 +0100 Subject: [PATCH 09/19] Cleaner code --- src/ahttpx/_parsers.py | 10 ++++++---- src/httpx/_parsers.py | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py index 7e73e4d..2dd82af 100644 --- a/src/ahttpx/_parsers.py +++ b/src/ahttpx/_parsers.py @@ -72,6 +72,7 @@ def send_method_line(self, method, target, protocol): msg = f"Called 'send_method_line' in invalid state {self.description()}" raise ProtocolError(msg) + # Send initial request line, eg. "GET / HTTP/1.1" if protocol != b'HTTP/1.1': raise ProtocolError("Sent unsupported protocol version") data = b" ".join([method, target, protocol]) + b"\r\n" @@ -84,7 +85,7 @@ def send_headers(self, headers): msg = f"Called 'send_headers' in invalid state {self.description()}" raise ProtocolError(msg) - # Header state + # Update header state seen_host = False for name, value in headers: lname = name.lower() @@ -99,7 +100,7 @@ def send_headers(self, headers): if not seen_host: raise ProtocolError("Request missing 'Host' header") - # Data on the wire + # Send request headers lines = [name + b": " + value + b"\r\n" for name, value in headers] data = b"".join(lines) + b"\r\n" self.writer.write(data) @@ -141,6 +142,7 @@ def recv_status_line(self): msg = f"Called 'recv_status_line' in invalid state {self.description()}" raise ProtocolError(msg) + # Read initial response line, eg. "HTTP/1.1 200 OK" exc_text = "reading response status line" line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text) protocol, status_code, reason_phrase = line.split(b" ", 2) @@ -155,7 +157,7 @@ def recv_headers(self): msg = f"Called 'recv_headers' in invalid state {self.description()}" raise ProtocolError(msg) - # Data off the wire + # Read response headers headers = [] exc_text = "reading response headers" while line := self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text): @@ -163,7 +165,7 @@ def recv_headers(self): value = value.strip(b" ") headers.append((name, value)) - # Header state + # Update header state for name, value in headers: lname = name.lower() if lname == b'content-length': diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py index 7e73e4d..2dd82af 100644 --- a/src/httpx/_parsers.py +++ b/src/httpx/_parsers.py @@ -72,6 +72,7 @@ def send_method_line(self, method, target, protocol): msg = f"Called 'send_method_line' in invalid state {self.description()}" raise ProtocolError(msg) + # Send initial request line, eg. "GET / HTTP/1.1" if protocol != b'HTTP/1.1': raise ProtocolError("Sent unsupported protocol version") data = b" ".join([method, target, protocol]) + b"\r\n" @@ -84,7 +85,7 @@ def send_headers(self, headers): msg = f"Called 'send_headers' in invalid state {self.description()}" raise ProtocolError(msg) - # Header state + # Update header state seen_host = False for name, value in headers: lname = name.lower() @@ -99,7 +100,7 @@ def send_headers(self, headers): if not seen_host: raise ProtocolError("Request missing 'Host' header") - # Data on the wire + # Send request headers lines = [name + b": " + value + b"\r\n" for name, value in headers] data = b"".join(lines) + b"\r\n" self.writer.write(data) @@ -141,6 +142,7 @@ def recv_status_line(self): msg = f"Called 'recv_status_line' in invalid state {self.description()}" raise ProtocolError(msg) + # Read initial response line, eg. "HTTP/1.1 200 OK" exc_text = "reading response status line" line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text) protocol, status_code, reason_phrase = line.split(b" ", 2) @@ -155,7 +157,7 @@ def recv_headers(self): msg = f"Called 'recv_headers' in invalid state {self.description()}" raise ProtocolError(msg) - # Data off the wire + # Read response headers headers = [] exc_text = "reading response headers" while line := self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text): @@ -163,7 +165,7 @@ def recv_headers(self): value = value.strip(b" ") headers.append((name, value)) - # Header state + # Update header state for name, value in headers: lname = name.lower() if lname == b'content-length': From 4c12350b1a8d0e69e98750485e97cf30a66d1a46 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 11 Jun 2025 13:20:23 +0100 Subject: [PATCH 10/19] Cleanup --- src/ahttpx/_parsers.py | 2 +- src/httpx/_parsers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py index 2dd82af..334ddfc 100644 --- a/src/ahttpx/_parsers.py +++ b/src/ahttpx/_parsers.py @@ -42,7 +42,7 @@ class ProtocolError(Exception): class HTTPParser: """ Usage... - + client = HTTPParser(writer, reader) client.send_method_line() IDLE -> SEND_HEADERS client.send_headers() SEND_HEADERS -> SEND_BODY diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py index 2dd82af..334ddfc 100644 --- a/src/httpx/_parsers.py +++ b/src/httpx/_parsers.py @@ -42,7 +42,7 @@ class ProtocolError(Exception): class HTTPParser: """ Usage... - + client = HTTPParser(writer, reader) client.send_method_line() IDLE -> SEND_HEADERS client.send_headers() SEND_HEADERS -> SEND_BODY From c2d806ca316b7d462184e07dde93f425525ec1c1 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 11 Jun 2025 19:39:30 +0100 Subject: [PATCH 11/19] Additional tests --- src/ahttpx/_parsers.py | 28 +++++---- src/httpx/_parsers.py | 27 +++++---- tests/test_parsers.py | 131 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 161 insertions(+), 25 deletions(-) diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py index 334ddfc..ff79012 100644 --- a/src/ahttpx/_parsers.py +++ b/src/ahttpx/_parsers.py @@ -11,19 +11,22 @@ # * Upgrade: ... / (CONNECT?) # * Host: required (ordering?) -# * HTTP/1.0 support -# * Bounded integer conversions -# * trailers - -# * validate method, target in request line -# * validate status code in response line +# * Support 'Expect: 100 Continue' +# * Add 'Error' state transitions +# * Add tests to trickle data +# * Add type annotations + +# * Integer conversions should always be bounded and +ve. `boundedint(..., maxdigits, base)` +# * Optional... HTTP/1.0 support +# * Read trailing headers on Transfer-Encoding: chunked. Not just '\r\n'. +# * When writing Transfer-Encoding: chunked, split large writes into buffer size. +# * When reading Transfer-Encoding: chunked, handle incomplete reads from large chunk sizes. +# * .read() doesn't document if will always return maximum available. + +# * validate method, target, protocol in request line +# * validate protocol, status_code, reason_phrase in response line # * validate name, value on headers -# * Fixup invalid state transition messages - -# * read() fixup -# * chunk size - class State(enum.Enum): IDLE = 0 @@ -215,7 +218,6 @@ def recv_body(self): self.their_state = State.DONE else: self.their_state = State.MUST_CLOSE - return body def start_next_cycle(self): @@ -264,6 +266,8 @@ def _push_back(self, buffer): def read(self, size): """ Read and return up to 'size' bytes from the stream, with I/O buffering provided. + + * Returns b'' to indicate connection close. """ buffer = bytearray() while len(buffer) < size: diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py index 334ddfc..8084209 100644 --- a/src/httpx/_parsers.py +++ b/src/httpx/_parsers.py @@ -11,19 +11,21 @@ # * Upgrade: ... / (CONNECT?) # * Host: required (ordering?) -# * HTTP/1.0 support -# * Bounded integer conversions -# * trailers - -# * validate method, target in request line -# * validate status code in response line +# * Add 'Error' state transitions +# * Add tests to trickle data +# * Add type annotations + +# * Integer conversions should always be bounded and +ve. `boundedint(..., maxdigits, base)` +# * Optional... HTTP/1.0 support +# * Read trailing headers on Transfer-Encoding: chunked. Not just '\r\n'. +# * When writing Transfer-Encoding: chunked, split large writes into buffer size. +# * When reading Transfer-Encoding: chunked, handle incomplete reads from large chunk sizes. +# * .read() doesn't document if will always return maximum available. + +# * validate method, target, protocol in request line +# * validate protocol, status_code, reason_phrase in response line # * validate name, value on headers -# * Fixup invalid state transition messages - -# * read() fixup -# * chunk size - class State(enum.Enum): IDLE = 0 @@ -215,7 +217,6 @@ def recv_body(self): self.their_state = State.DONE else: self.their_state = State.MUST_CLOSE - return body def start_next_cycle(self): @@ -264,6 +265,8 @@ def _push_back(self, buffer): def read(self, size): """ Read and return up to 'size' bytes from the stream, with I/O buffering provided. + + * Returns b'' to indicate connection close. """ buffer = bytearray() while len(buffer) < size: diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 077ff7b..055f424 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -3,6 +3,14 @@ import pytest +class TrickleIO(): + def __init__(self, content): + self.buffer = io.BytesIO(content) + + def read(self, size): + return self.buffer.read(1) + + def test_parser(): writer = io.BytesIO() reader = io.BytesIO( @@ -48,6 +56,51 @@ def test_parser(): assert terminator == b'' +def test_parser_trickle(): + writer = io.BytesIO() + reader = TrickleIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: 12\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"hello, world" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"POST", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Content-Type", b"application/json"), + (b"Content-Length", b"23"), + ]) + p.send_body(b'{"msg": "hello, world"}') + p.send_body(b'') + + assert writer.getvalue() == ( + b"POST / HTTP/1.1\r\n" + b"Host: example.com\r\n" + b"Content-Type: application/json\r\n" + b"Content-Length: 23\r\n" + b"\r\n" + b'{"msg": "hello, world"}' + ) + + protocol, code, reason_phase = p.recv_status_line() + headers = p.recv_headers() + body = p.recv_body() + terminator = p.recv_body() + + assert protocol == b'HTTP/1.1' + assert code == b'200' + assert reason_phase == b'OK' + assert headers == [ + (b'Content-Length', b'12'), + (b'Content-Type', b'text/plain'), + ] + assert body == b'hello, world' + assert terminator == b'' + + def test_parser_transfer_encoding_chunked(): writer = io.BytesIO() reader = io.BytesIO( @@ -97,6 +150,55 @@ def test_parser_transfer_encoding_chunked(): assert terminator == b'' +def test_parser_transfer_encoding_chunked_trickle(): + writer = io.BytesIO() + reader = TrickleIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Type: text/plain\r\n" + b"Transfer-Encoding: chunked\r\n" + b"\r\n" + b"c\r\n" + b"hello, world\r\n" + b"0\r\n\r\n" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"POST", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Content-Type", b"application/json"), + (b"Transfer-Encoding", b"chunked"), + ]) + p.send_body(b'{"msg": "hello, world"}') + p.send_body(b'') + + assert writer.getvalue() == ( + b"POST / HTTP/1.1\r\n" + b"Host: example.com\r\n" + b"Content-Type: application/json\r\n" + b"Transfer-Encoding: chunked\r\n" + b"\r\n" + b'17\r\n' + b'{"msg": "hello, world"}\r\n' + b'0\r\n\r\n' + ) + + protocol, code, reason_phase = p.recv_status_line() + headers = p.recv_headers() + body = p.recv_body() + terminator = p.recv_body() + + assert protocol == b'HTTP/1.1' + assert code == b'200' + assert reason_phase == b'OK' + assert headers == [ + (b'Content-Type', b'text/plain'), + (b'Transfer-Encoding', b'chunked'), + ] + assert body == b'hello, world' + assert terminator == b'' + + def test_parser_repr(): writer = io.BytesIO() reader = io.BytesIO( @@ -139,6 +241,11 @@ def test_parser_invalid_transitions(): writer = io.BytesIO() reader = io.BytesIO() + with pytest.raises(httpx.ProtocolError): + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b'GET', b'/', b'HTTP/1.1') + p.send_method_line(b'GET', b'/', b'HTTP/1.1') + with pytest.raises(httpx.ProtocolError): p = httpx.HTTPParser(writer, reader) p.send_headers([]) @@ -147,6 +254,12 @@ def test_parser_invalid_transitions(): p = httpx.HTTPParser(writer, reader) p.send_body(b'') + with pytest.raises(httpx.ProtocolError): + reader = io.BytesIO(b'HTTP/1.1 200 OK\r\n') + p = httpx.HTTPParser(writer, reader) + p.recv_status_line() + p.recv_status_line() + with pytest.raises(httpx.ProtocolError): p = httpx.HTTPParser(writer, reader) p.recv_headers() @@ -155,6 +268,10 @@ def test_parser_invalid_transitions(): p = httpx.HTTPParser(writer, reader) p.recv_body() + with pytest.raises(httpx.ProtocolError): + p = httpx.HTTPParser(writer, reader) + p.start_next_cycle() + def test_parser_invalid_status_line(): # ... @@ -197,7 +314,7 @@ def test_parser_invalid_status_line(): p.recv_status_line() -def test_parser_sent_supported_protocol(): +def test_parser_sent_unsupported_protocol(): # Currently only HTTP/1.1 is supported. writer = io.BytesIO() reader = io.BytesIO() @@ -208,6 +325,18 @@ def test_parser_sent_supported_protocol(): p.send_method_line(b"GET", b"/", b"HTTP/1.0") +def test_parser_recv_unsupported_protocol(): + # Currently only HTTP/1.1 is supported. + writer = io.BytesIO() + reader = io.BytesIO(b"HTTP/1.0 200 OK\r\n") + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + msg = 'Received unsupported protocol version' + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_status_line() + + def test_parser_large_body(): body = b"x" * 6988 From 2182889ec1e77b78b05e1e6624d1fa3399f839f3 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 17 Jun 2025 13:20:08 +0100 Subject: [PATCH 12/19] Merge dev --- src/httpx/__init__.py | 27 ++++++++++----------------- src/httpx/_parsers.py | 1 + 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/src/httpx/__init__.py b/src/httpx/__init__.py index c343200..71df364 100644 --- a/src/httpx/__init__.py +++ b/src/httpx/__init__.py @@ -2,13 +2,9 @@ from ._content import * # Content, File, Files, Form, HTML, JSON, MultiPart, Text from ._headers import * # Headers from ._network import * # NetworkBackend, NetworkStream, timeout -<<<<<<< HEAD from ._parsers import * # HTTPParser, ProtocolError -from ._pool import * # Connection, ConnectionPool, Transport, open_connection_pool, open_connection -======= from ._pool import * # Connection, ConnectionPool, Transport from ._quickstart import * # get, post, put, patch, delete ->>>>>>> dev from ._response import * # Response from ._request import * # Request from ._streams import * # ByteStream, IterByteStream, FileStream, Stream @@ -23,12 +19,10 @@ "Connection", "ConnectionPool", "Content", - "delete", "File", "FileStream", "Files", "Form", - "get", "Headers", "HTML", "HTTPParser", @@ -37,26 +31,25 @@ "MultiPart", "NetworkBackend", "NetworkStream", - "open_connection", -<<<<<<< HEAD "ProtocolError", -======= - "post", - "put", - "patch", ->>>>>>> dev "Response", "Request", - "serve_http", - "serve_tcp", "Stream", "Text", - "timeout", "Transport", "QueryParams", + "URL", + "delete", + "get", + "open_connection", + "patch", + "post", + "put", "quote", + "serve_http", + "serve_tcp", + "timeout", "unquote", - "URL", "urldecode", "urlencode", ] diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py index 8084209..ff79012 100644 --- a/src/httpx/_parsers.py +++ b/src/httpx/_parsers.py @@ -11,6 +11,7 @@ # * Upgrade: ... / (CONNECT?) # * Host: required (ordering?) +# * Support 'Expect: 100 Continue' # * Add 'Error' state transitions # * Add tests to trickle data # * Add type annotations From 9adaeadbe84cd74f203087d904c8d21e1a2d6d4e Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 17 Jun 2025 13:59:41 +0100 Subject: [PATCH 13/19] Validate status codes --- src/ahttpx/_parsers.py | 18 +++++++++++++++++- src/httpx/_parsers.py | 21 ++++++++++++++++++++- tests/test_parsers.py | 35 +++++++++++++++++++++++++++++------ 3 files changed, 66 insertions(+), 8 deletions(-) diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py index ff79012..aea38e4 100644 --- a/src/ahttpx/_parsers.py +++ b/src/ahttpx/_parsers.py @@ -148,10 +148,17 @@ def recv_status_line(self): # Read initial response line, eg. "HTTP/1.1 200 OK" exc_text = "reading response status line" line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text) - protocol, status_code, reason_phrase = line.split(b" ", 2) + protocol, status_code_str, reason_phrase = line.split(b" ", 2) if protocol != b'HTTP/1.1': raise ProtocolError("Received unsupported protocol version") + status_code = bounded_int( + status_code_str, + min=100, + max=999, + exc_text="Received invalid status code" + ) + self.their_state = State.SEND_HEADERS return protocol, status_code, reason_phrase @@ -309,3 +316,12 @@ def read_until(self, marker, max_size, exc_text): return bytes(buffer[:index]) raise ProtocolError(f"Exceeded maximum size {exc_text}") + + +def bounded_int(intstr: str, min: int, max: int, exc_text: str): + if len(intstr) > len(str(max)): + raise ProtocolError(exc_text) + converted = int(intstr) + if converted < min or converted > max: + raise ProtocolError(exc_text) + return converted diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py index ff79012..bb66aad 100644 --- a/src/httpx/_parsers.py +++ b/src/httpx/_parsers.py @@ -148,10 +148,20 @@ def recv_status_line(self): # Read initial response line, eg. "HTTP/1.1 200 OK" exc_text = "reading response status line" line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text) - protocol, status_code, reason_phrase = line.split(b" ", 2) + protocol, status_code_str, reason_phrase = line.split(b" ", 2) if protocol != b'HTTP/1.1': raise ProtocolError("Received unsupported protocol version") + status_code = bounded_int( + status_code_str, + min=100, + max=999, + exc_text="Received invalid status code" + ) + if status_code < 200: + # 1xx status codes are currently unsupported + raise ProtocolError("Received unsupported status code") + self.their_state = State.SEND_HEADERS return protocol, status_code, reason_phrase @@ -309,3 +319,12 @@ def read_until(self, marker, max_size, exc_text): return bytes(buffer[:index]) raise ProtocolError(f"Exceeded maximum size {exc_text}") + + +def bounded_int(intstr: str, min: int, max: int, exc_text: str): + if len(intstr) > len(str(max)): + raise ProtocolError(exc_text) + converted = int(intstr) + if converted < min or converted > max: + raise ProtocolError(exc_text) + return converted diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 055f424..5f8ed30 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -46,7 +46,7 @@ def test_parser(): terminator = p.recv_body() assert protocol == b'HTTP/1.1' - assert code == b'200' + assert code == 200 assert reason_phase == b'OK' assert headers == [ (b'Content-Length', b'12'), @@ -91,7 +91,7 @@ def test_parser_trickle(): terminator = p.recv_body() assert protocol == b'HTTP/1.1' - assert code == b'200' + assert code == 200 assert reason_phase == b'OK' assert headers == [ (b'Content-Length', b'12'), @@ -140,7 +140,7 @@ def test_parser_transfer_encoding_chunked(): terminator = p.recv_body() assert protocol == b'HTTP/1.1' - assert code == b'200' + assert code == 200 assert reason_phase == b'OK' assert headers == [ (b'Content-Type', b'text/plain'), @@ -189,7 +189,7 @@ def test_parser_transfer_encoding_chunked_trickle(): terminator = p.recv_body() assert protocol == b'HTTP/1.1' - assert code == b'200' + assert code == 200 assert reason_phase == b'OK' assert headers == [ (b'Content-Type', b'text/plain'), @@ -453,7 +453,7 @@ def test_client_connection_close(): terminator = p.recv_body() assert protocol == b'HTTP/1.1' - assert code == b'200' + assert code == 200 assert reason_phase == b"OK" assert headers == [ (b'Content-Length', b'12'), @@ -487,7 +487,7 @@ def test_server_connection_close(): terminator = p.recv_body() assert protocol == b'HTTP/1.1' - assert code == b'200' + assert code == 200 assert reason_phase == b"OK" assert headers == [ (b'Content-Length', b'12'), @@ -498,3 +498,26 @@ def test_server_connection_close(): assert terminator == b"" assert repr(p) == "" + + +def test_invalid_status_code(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 1000 OK\r\n" + b"Content-Length: 12\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"hello, world" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Connection", b"close"), + ]) + p.send_body(b'') + + msg = "Received invalid status code" + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_status_line() From 68bb9caa8629b25644286aea01b433a05a616e3e Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 17 Jun 2025 14:05:33 +0100 Subject: [PATCH 14/19] Manage unsupported 1xx codes --- src/ahttpx/_parsers.py | 3 +++ src/httpx/_parsers.py | 2 +- tests/test_parsers.py | 27 +++++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py index aea38e4..30cc862 100644 --- a/src/ahttpx/_parsers.py +++ b/src/ahttpx/_parsers.py @@ -158,6 +158,9 @@ def recv_status_line(self): max=999, exc_text="Received invalid status code" ) + if status_code < 200: + # 1xx status codes should be read and skipped. + raise ProtocolError("Received unsupported status code") self.their_state = State.SEND_HEADERS return protocol, status_code, reason_phrase diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py index bb66aad..30cc862 100644 --- a/src/httpx/_parsers.py +++ b/src/httpx/_parsers.py @@ -159,7 +159,7 @@ def recv_status_line(self): exc_text="Received invalid status code" ) if status_code < 200: - # 1xx status codes are currently unsupported + # 1xx status codes should be read and skipped. raise ProtocolError("Received unsupported status code") self.their_state = State.SEND_HEADERS diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 5f8ed30..9a54927 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -521,3 +521,30 @@ def test_invalid_status_code(): msg = "Received invalid status code" with pytest.raises(httpx.ProtocolError, match=msg): p.recv_status_line() + + +def test_unsupported_status_code(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 103 Early Hints\r\n" + b"Link: ; rel=preload; as=style\r\n" + b"Link: ; rel=preload; as=script\r\n" + b"" + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: 12\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"hello, world" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Connection", b"close"), + ]) + p.send_body(b'') + + msg = "Received unsupported status code" + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_status_line() From 9dd9992222d08ed0ab7137c7542af80dee5da14f Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 17 Jun 2025 14:32:32 +0100 Subject: [PATCH 15/19] Validate Content-Length --- src/ahttpx/_parsers.py | 6 +++-- src/httpx/_parsers.py | 6 +++-- tests/test_parsers.py | 51 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 4 deletions(-) diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py index 30cc862..ca00945 100644 --- a/src/ahttpx/_parsers.py +++ b/src/ahttpx/_parsers.py @@ -95,7 +95,8 @@ def send_headers(self, headers): if lname == b'host': seen_host = True elif lname == b'content-length': - self.our_content_length = int(value) + exc_text = "Sent invalid Content-Length" + self.our_content_length = bounded_int(value, min=0, max=2**64, exc_text=exc_text) elif lname == b'connection' and value == b'close': self.our_keep_alive = False elif lname == b'transfer-encoding' and value == b'chunked': @@ -182,7 +183,8 @@ def recv_headers(self): for name, value in headers: lname = name.lower() if lname == b'content-length': - self.their_content_length = int(value) + exc_text = "Received invalid Content-Length" + self.their_content_length = bounded_int(value, min=0, max=2**64, exc_text=exc_text) elif lname == b'connection' and value == b'close': self.their_keep_alive = False elif lname == b'transfer-encoding' and value == b'chunked': diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py index 30cc862..ca00945 100644 --- a/src/httpx/_parsers.py +++ b/src/httpx/_parsers.py @@ -95,7 +95,8 @@ def send_headers(self, headers): if lname == b'host': seen_host = True elif lname == b'content-length': - self.our_content_length = int(value) + exc_text = "Sent invalid Content-Length" + self.our_content_length = bounded_int(value, min=0, max=2**64, exc_text=exc_text) elif lname == b'connection' and value == b'close': self.our_keep_alive = False elif lname == b'transfer-encoding' and value == b'chunked': @@ -182,7 +183,8 @@ def recv_headers(self): for name, value in headers: lname = name.lower() if lname == b'content-length': - self.their_content_length = int(value) + exc_text = "Received invalid Content-Length" + self.their_content_length = bounded_int(value, min=0, max=2**64, exc_text=exc_text) elif lname == b'connection' and value == b'close': self.their_keep_alive = False elif lname == b'transfer-encoding' and value == b'chunked': diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 9a54927..638019d 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -524,6 +524,12 @@ def test_invalid_status_code(): def test_unsupported_status_code(): + # We don't currently handle 1xx status codes. + # + # This example demonstrates 103 Early Hints behavior, + # which is typically unsupported by HTTP/1.1 clients... + # + # https://httpwg.org/specs/rfc8297.html#early-hints writer = io.BytesIO() reader = io.BytesIO( b"HTTP/1.1 103 Early Hints\r\n" @@ -548,3 +554,48 @@ def test_unsupported_status_code(): msg = "Received unsupported status code" with pytest.raises(httpx.ProtocolError, match=msg): p.recv_status_line() + + +def test_received_invalid_content_length(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: -999\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"hello, world" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Connection", b"close"), + ]) + p.send_body(b'') + + msg = "Received invalid Content-Length" + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_status_line() + + +def test_received_invalid_content_length(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: -999\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"hello, world" + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + msg = "Sent invalid Content-Length" + with pytest.raises(httpx.ProtocolError, match=msg): + # Limited to 20 digits. + # 100 million terabytes should be enough for anyone. + p.send_headers([ + (b"Host", b"example.com"), + (b"Content-Length", b"100000000000000000000"), + ]) From 0bab666d7e2dece4c74689250d35e96f363a0080 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 17 Jun 2025 17:19:01 +0100 Subject: [PATCH 16/19] Handle 1xx status codes --- src/ahttpx/_parsers.py | 29 +++++++++++++++++++---------- src/httpx/_parsers.py | 29 +++++++++++++++++++---------- tests/test_parsers.py | 42 ++++++++++++++++++++++++++++++------------ 3 files changed, 68 insertions(+), 32 deletions(-) diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py index ca00945..e9c577a 100644 --- a/src/ahttpx/_parsers.py +++ b/src/ahttpx/_parsers.py @@ -30,12 +30,13 @@ class State(enum.Enum): IDLE = 0 - SEND_HEADERS = 1 - SEND_BODY = 2 - DONE = 3 - MUST_CLOSE = 4 - CLOSED = 5 - ERROR = 6 + SEND_STATUS_LINE = 1 + SEND_HEADERS = 2 + SEND_BODY = 3 + DONE = 4 + MUST_CLOSE = 5 + CLOSED = 6 + ERROR = 7 class ProtocolError(Exception): @@ -69,6 +70,8 @@ def __init__(self, writer, reader): self.their_seen_length = 0 self.our_keep_alive = True self.their_keep_alive = True + self.processing_1xx = False + def send_method_line(self, method, target, protocol): if self.our_state != State.IDLE: @@ -82,6 +85,7 @@ def send_method_line(self, method, target, protocol): self.writer.write(data) self.our_state = State.SEND_HEADERS + self.their_state = State.SEND_STATUS_LINE def send_headers(self, headers): if self.our_state != State.SEND_HEADERS: @@ -142,7 +146,7 @@ def send_body(self, body): self.our_state = State.MUST_CLOSE def recv_status_line(self): - if self.their_state != State.IDLE: + if self.their_state != State.SEND_STATUS_LINE: msg = f"Called 'recv_status_line' in invalid state {self.description()}" raise ProtocolError(msg) @@ -160,8 +164,7 @@ def recv_status_line(self): exc_text="Received invalid status code" ) if status_code < 200: - # 1xx status codes should be read and skipped. - raise ProtocolError("Received unsupported status code") + self.processing_1xx = True self.their_state = State.SEND_HEADERS return protocol, status_code, reason_phrase @@ -190,7 +193,12 @@ def recv_headers(self): elif lname == b'transfer-encoding' and value == b'chunked': self.their_content_length = None - self.their_state = State.SEND_BODY + if self.processing_1xx: + # 1xx status codes preceed the final response status code + self.their_state = State.SEND_STATUS_LINE + self.processing_1xx = False + else: + self.their_state = State.SEND_BODY return headers def recv_body(self): @@ -245,6 +253,7 @@ def start_next_cycle(self): self.their_seen_length = 0 self.our_keep_alive = True self.their_keep_alive = True + self.processing_1xx = False def description(self) -> str: cl_state = self.our_state.name diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py index ca00945..e9c577a 100644 --- a/src/httpx/_parsers.py +++ b/src/httpx/_parsers.py @@ -30,12 +30,13 @@ class State(enum.Enum): IDLE = 0 - SEND_HEADERS = 1 - SEND_BODY = 2 - DONE = 3 - MUST_CLOSE = 4 - CLOSED = 5 - ERROR = 6 + SEND_STATUS_LINE = 1 + SEND_HEADERS = 2 + SEND_BODY = 3 + DONE = 4 + MUST_CLOSE = 5 + CLOSED = 6 + ERROR = 7 class ProtocolError(Exception): @@ -69,6 +70,8 @@ def __init__(self, writer, reader): self.their_seen_length = 0 self.our_keep_alive = True self.their_keep_alive = True + self.processing_1xx = False + def send_method_line(self, method, target, protocol): if self.our_state != State.IDLE: @@ -82,6 +85,7 @@ def send_method_line(self, method, target, protocol): self.writer.write(data) self.our_state = State.SEND_HEADERS + self.their_state = State.SEND_STATUS_LINE def send_headers(self, headers): if self.our_state != State.SEND_HEADERS: @@ -142,7 +146,7 @@ def send_body(self, body): self.our_state = State.MUST_CLOSE def recv_status_line(self): - if self.their_state != State.IDLE: + if self.their_state != State.SEND_STATUS_LINE: msg = f"Called 'recv_status_line' in invalid state {self.description()}" raise ProtocolError(msg) @@ -160,8 +164,7 @@ def recv_status_line(self): exc_text="Received invalid status code" ) if status_code < 200: - # 1xx status codes should be read and skipped. - raise ProtocolError("Received unsupported status code") + self.processing_1xx = True self.their_state = State.SEND_HEADERS return protocol, status_code, reason_phrase @@ -190,7 +193,12 @@ def recv_headers(self): elif lname == b'transfer-encoding' and value == b'chunked': self.their_content_length = None - self.their_state = State.SEND_BODY + if self.processing_1xx: + # 1xx status codes preceed the final response status code + self.their_state = State.SEND_STATUS_LINE + self.processing_1xx = False + else: + self.their_state = State.SEND_BODY return headers def recv_body(self): @@ -245,6 +253,7 @@ def start_next_cycle(self): self.their_seen_length = 0 self.our_keep_alive = True self.their_keep_alive = True + self.processing_1xx = False def description(self) -> str: cl_state = self.our_state.name diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 638019d..7d6f4e9 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -213,13 +213,13 @@ def test_parser_repr(): assert repr(p) == "" p.send_method_line(b"GET", b"/", b"HTTP/1.1") - assert repr(p) == "" + assert repr(p) == "" p.send_headers([(b"Host", b"example.com")]) - assert repr(p) == "" + assert repr(p) == "" p.send_body(b'') - assert repr(p) == "" + assert repr(p) == "" p.recv_status_line() assert repr(p) == "" @@ -258,7 +258,6 @@ def test_parser_invalid_transitions(): reader = io.BytesIO(b'HTTP/1.1 200 OK\r\n') p = httpx.HTTPParser(writer, reader) p.recv_status_line() - p.recv_status_line() with pytest.raises(httpx.ProtocolError): p = httpx.HTTPParser(writer, reader) @@ -535,7 +534,7 @@ def test_unsupported_status_code(): b"HTTP/1.1 103 Early Hints\r\n" b"Link: ; rel=preload; as=style\r\n" b"Link: ; rel=preload; as=script\r\n" - b"" + b"\r\n" b"HTTP/1.1 200 OK\r\n" b"Content-Length: 12\r\n" b"Content-Type: text/plain\r\n" @@ -545,15 +544,34 @@ def test_unsupported_status_code(): p = httpx.HTTPParser(writer, reader) p.send_method_line(b"GET", b"/", b"HTTP/1.1") - p.send_headers([ - (b"Host", b"example.com"), - (b"Connection", b"close"), - ]) + p.send_headers([(b"Host", b"example.com")]) p.send_body(b'') - msg = "Received unsupported status code" - with pytest.raises(httpx.ProtocolError, match=msg): - p.recv_status_line() + protocol, code, reason_phase = p.recv_status_line() + headers = p.recv_headers() + + assert protocol == b'HTTP/1.1' + assert code == 103 + assert reason_phase == b'Early Hints' + assert headers == [ + (b'Link', b'; rel=preload; as=style'), + (b'Link', b'; rel=preload; as=script'), + ] + + protocol, code, reason_phase = p.recv_status_line() + headers = p.recv_headers() + body = p.recv_body() + terminator = p.recv_body() + + assert protocol == b'HTTP/1.1' + assert code == 200 + assert reason_phase == b"OK" + assert headers == [ + (b'Content-Length', b'12'), + (b'Content-Type', b'text/plain'), + ] + assert body == b"hello, world" + assert terminator == b"" def test_received_invalid_content_length(): From d9b0bcb8e3de7301f00ca73aae44e06726c1a58f Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 17 Jun 2025 18:21:18 +0100 Subject: [PATCH 17/19] Tighter processing_1xx state --- src/ahttpx/_parsers.py | 4 +--- src/httpx/_parsers.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py index e9c577a..4b92d1c 100644 --- a/src/ahttpx/_parsers.py +++ b/src/ahttpx/_parsers.py @@ -163,8 +163,7 @@ def recv_status_line(self): max=999, exc_text="Received invalid status code" ) - if status_code < 200: - self.processing_1xx = True + self.processing_1xx = status_code < 200 self.their_state = State.SEND_HEADERS return protocol, status_code, reason_phrase @@ -196,7 +195,6 @@ def recv_headers(self): if self.processing_1xx: # 1xx status codes preceed the final response status code self.their_state = State.SEND_STATUS_LINE - self.processing_1xx = False else: self.their_state = State.SEND_BODY return headers diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py index e9c577a..4b92d1c 100644 --- a/src/httpx/_parsers.py +++ b/src/httpx/_parsers.py @@ -163,8 +163,7 @@ def recv_status_line(self): max=999, exc_text="Received invalid status code" ) - if status_code < 200: - self.processing_1xx = True + self.processing_1xx = status_code < 200 self.their_state = State.SEND_HEADERS return protocol, status_code, reason_phrase @@ -196,7 +195,6 @@ def recv_headers(self): if self.processing_1xx: # 1xx status codes preceed the final response status code self.their_state = State.SEND_STATUS_LINE - self.processing_1xx = False else: self.their_state = State.SEND_BODY return headers From b9458a9df45240426d986e0af1472fbd74a00a2a Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 18 Jun 2025 14:05:43 +0100 Subject: [PATCH 18/19] bounded_int and bounded_hex improvements --- src/ahttpx/_content.py | 2 +- src/ahttpx/_parsers.py | 55 +++++++++++++++++------ src/httpx/_content.py | 2 +- src/httpx/_parsers.py | 55 +++++++++++++++++------ tests/test_parsers.py | 99 ++++++++++++++++++++++++++++++++++-------- 5 files changed, 168 insertions(+), 45 deletions(-) diff --git a/src/ahttpx/_content.py b/src/ahttpx/_content.py index e75d30f..ea0ceac 100644 --- a/src/ahttpx/_content.py +++ b/src/ahttpx/_content.py @@ -77,7 +77,7 @@ def encode(self) -> tuple[Stream, str]: stream = ByteStream(str(self).encode("ascii")) content_type = "application/x-www-form-urlencoded" return (stream, content_type) - + # Dict operations def keys(self) -> typing.KeysView[str]: diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py index 4b92d1c..d7582ff 100644 --- a/src/ahttpx/_parsers.py +++ b/src/ahttpx/_parsers.py @@ -62,16 +62,22 @@ def __init__(self, writer, reader): self.reader = reader self.parser = ReadAheadParser(reader) + # Track client and server state... self.our_state = State.IDLE self.their_state = State.IDLE + + # Track message framing... self.our_content_length = 0 self.their_content_length = 0 self.our_seen_length = 0 self.their_seen_length = 0 + + # Track connection keep alive... self.our_keep_alive = True self.their_keep_alive = True - self.processing_1xx = False + # Special states... + self.processing_1xx = False def send_method_line(self, method, target, protocol): if self.our_state != State.IDLE: @@ -99,8 +105,11 @@ def send_headers(self, headers): if lname == b'host': seen_host = True elif lname == b'content-length': - exc_text = "Sent invalid Content-Length" - self.our_content_length = bounded_int(value, min=0, max=2**64, exc_text=exc_text) + self.our_content_length = bounded_int( + value, + max_digits=20, + exc_text="Sent invalid Content-Length" + ) elif lname == b'connection' and value == b'close': self.our_keep_alive = False elif lname == b'transfer-encoding' and value == b'chunked': @@ -159,10 +168,12 @@ def recv_status_line(self): status_code = bounded_int( status_code_str, - min=100, - max=999, + max_digits=3, exc_text="Received invalid status code" ) + if status_code < 100: + raise ProtocolError("Received invalid status code") + # 1xx status codes preceed the final response status code self.processing_1xx = status_code < 200 self.their_state = State.SEND_HEADERS @@ -185,8 +196,11 @@ def recv_headers(self): for name, value in headers: lname = name.lower() if lname == b'content-length': - exc_text = "Received invalid Content-Length" - self.their_content_length = bounded_int(value, min=0, max=2**64, exc_text=exc_text) + self.their_content_length = bounded_int( + value, + max_digits=20, + exc_text="Received invalid Content-Length" + ) elif lname == b'connection' and value == b'close': self.their_keep_alive = False elif lname == b'transfer-encoding' and value == b'chunked': @@ -209,7 +223,9 @@ def recv_body(self): exc_text = 'reading chunk size' line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text) sizestr, _, _ = line.partition(b";") - size = int(sizestr, base=16) + + exc_text = "Received invalid chunk size" + size = bounded_hex(sizestr, max_digits=8, exc_text=exc_text) if size > 0: body = self.parser.read(size=size) exc_text = 'reading chunk data' @@ -330,10 +346,23 @@ def read_until(self, marker, max_size, exc_text): raise ProtocolError(f"Exceeded maximum size {exc_text}") -def bounded_int(intstr: str, min: int, max: int, exc_text: str): - if len(intstr) > len(str(max)): +def bounded_int(intstr: bytes, max_digits: int, exc_text: str): + if len(intstr) > max_digits: + # Length of bytestring exceeds maximum. raise ProtocolError(exc_text) - converted = int(intstr) - if converted < min or converted > max: + if len(intstr.strip(b'0123456789')) != 0: + # Contains invalid characters. raise ProtocolError(exc_text) - return converted + + return int(intstr) + + +def bounded_hex(hexstr: bytes, max_digits: int, exc_text: str): + if len(hexstr) > max_digits: + # Length of bytestring exceeds maximum. + raise ProtocolError(exc_text) + if len(hexstr.strip(b'0123456789abcdefABCDEF')) != 0: + # Contains invalid characters. + raise ProtocolError(exc_text) + + return int(hexstr, base=16) diff --git a/src/httpx/_content.py b/src/httpx/_content.py index 1ee0ef2..4798b45 100644 --- a/src/httpx/_content.py +++ b/src/httpx/_content.py @@ -77,7 +77,7 @@ def encode(self) -> tuple[Stream, str]: stream = ByteStream(str(self).encode("ascii")) content_type = "application/x-www-form-urlencoded" return (stream, content_type) - + # Dict operations def keys(self) -> typing.KeysView[str]: diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py index 4b92d1c..d7582ff 100644 --- a/src/httpx/_parsers.py +++ b/src/httpx/_parsers.py @@ -62,16 +62,22 @@ def __init__(self, writer, reader): self.reader = reader self.parser = ReadAheadParser(reader) + # Track client and server state... self.our_state = State.IDLE self.their_state = State.IDLE + + # Track message framing... self.our_content_length = 0 self.their_content_length = 0 self.our_seen_length = 0 self.their_seen_length = 0 + + # Track connection keep alive... self.our_keep_alive = True self.their_keep_alive = True - self.processing_1xx = False + # Special states... + self.processing_1xx = False def send_method_line(self, method, target, protocol): if self.our_state != State.IDLE: @@ -99,8 +105,11 @@ def send_headers(self, headers): if lname == b'host': seen_host = True elif lname == b'content-length': - exc_text = "Sent invalid Content-Length" - self.our_content_length = bounded_int(value, min=0, max=2**64, exc_text=exc_text) + self.our_content_length = bounded_int( + value, + max_digits=20, + exc_text="Sent invalid Content-Length" + ) elif lname == b'connection' and value == b'close': self.our_keep_alive = False elif lname == b'transfer-encoding' and value == b'chunked': @@ -159,10 +168,12 @@ def recv_status_line(self): status_code = bounded_int( status_code_str, - min=100, - max=999, + max_digits=3, exc_text="Received invalid status code" ) + if status_code < 100: + raise ProtocolError("Received invalid status code") + # 1xx status codes preceed the final response status code self.processing_1xx = status_code < 200 self.their_state = State.SEND_HEADERS @@ -185,8 +196,11 @@ def recv_headers(self): for name, value in headers: lname = name.lower() if lname == b'content-length': - exc_text = "Received invalid Content-Length" - self.their_content_length = bounded_int(value, min=0, max=2**64, exc_text=exc_text) + self.their_content_length = bounded_int( + value, + max_digits=20, + exc_text="Received invalid Content-Length" + ) elif lname == b'connection' and value == b'close': self.their_keep_alive = False elif lname == b'transfer-encoding' and value == b'chunked': @@ -209,7 +223,9 @@ def recv_body(self): exc_text = 'reading chunk size' line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text) sizestr, _, _ = line.partition(b";") - size = int(sizestr, base=16) + + exc_text = "Received invalid chunk size" + size = bounded_hex(sizestr, max_digits=8, exc_text=exc_text) if size > 0: body = self.parser.read(size=size) exc_text = 'reading chunk data' @@ -330,10 +346,23 @@ def read_until(self, marker, max_size, exc_text): raise ProtocolError(f"Exceeded maximum size {exc_text}") -def bounded_int(intstr: str, min: int, max: int, exc_text: str): - if len(intstr) > len(str(max)): +def bounded_int(intstr: bytes, max_digits: int, exc_text: str): + if len(intstr) > max_digits: + # Length of bytestring exceeds maximum. raise ProtocolError(exc_text) - converted = int(intstr) - if converted < min or converted > max: + if len(intstr.strip(b'0123456789')) != 0: + # Contains invalid characters. raise ProtocolError(exc_text) - return converted + + return int(intstr) + + +def bounded_hex(hexstr: bytes, max_digits: int, exc_text: str): + if len(hexstr) > max_digits: + # Length of bytestring exceeds maximum. + raise ProtocolError(exc_text) + if len(hexstr.strip(b'0123456789abcdefABCDEF')) != 0: + # Contains invalid characters. + raise ProtocolError(exc_text) + + return int(hexstr, base=16) diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 7d6f4e9..b76a97a 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -360,6 +360,32 @@ def test_parser_large_body(): assert len(p.recv_body()) == 0 +def test_parser_stream_large_body(): + body = b"x" * 6956 + + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Transfer-Encoding: chunked\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"1b2c\r\n" + body + b'\r\n0\r\n\r\n' + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([(b"Host", b"example.com")]) + p.send_body(b'') + + # Checkout our buffer sizes. + p.recv_status_line() + p.recv_headers() + # assert len(p.recv_body()) == 4096 + # assert len(p.recv_body()) == 2860 + assert len(p.recv_body()) == 6956 + assert len(p.recv_body()) == 0 + + def test_parser_not_enough_data_received(): writer = io.BytesIO() reader = io.BytesIO( @@ -502,7 +528,7 @@ def test_server_connection_close(): def test_invalid_status_code(): writer = io.BytesIO() reader = io.BytesIO( - b"HTTP/1.1 1000 OK\r\n" + b"HTTP/1.1 99 OK\r\n" b"Content-Length: 12\r\n" b"Content-Type: text/plain\r\n" b"\r\n" @@ -522,13 +548,7 @@ def test_invalid_status_code(): p.recv_status_line() -def test_unsupported_status_code(): - # We don't currently handle 1xx status codes. - # - # This example demonstrates 103 Early Hints behavior, - # which is typically unsupported by HTTP/1.1 clients... - # - # https://httpwg.org/specs/rfc8297.html#early-hints +def test_1xx_status_code(): writer = io.BytesIO() reader = io.BytesIO( b"HTTP/1.1 103 Early Hints\r\n" @@ -592,20 +612,15 @@ def test_received_invalid_content_length(): ]) p.send_body(b'') + p.recv_status_line() msg = "Received invalid Content-Length" with pytest.raises(httpx.ProtocolError, match=msg): - p.recv_status_line() + p.recv_headers() -def test_received_invalid_content_length(): +def test_sent_invalid_content_length(): writer = io.BytesIO() - reader = io.BytesIO( - b"HTTP/1.1 200 OK\r\n" - b"Content-Length: -999\r\n" - b"Content-Type: text/plain\r\n" - b"\r\n" - b"hello, world" - ) + reader = io.BytesIO() p = httpx.HTTPParser(writer, reader) p.send_method_line(b"GET", b"/", b"HTTP/1.1") @@ -617,3 +632,53 @@ def test_received_invalid_content_length(): (b"Host", b"example.com"), (b"Content-Length", b"100000000000000000000"), ]) + + +def test_received_invalid_characters_in_chunk_size(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Transfer-Encoding: chunked\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"0xFF\r\n..." + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Connection", b"close"), + ]) + p.send_body(b'') + + p.recv_status_line() + p.recv_headers() + msg = "Received invalid chunk size" + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_body() + + +def test_received_oversized_chunk(): + writer = io.BytesIO() + reader = io.BytesIO( + b"HTTP/1.1 200 OK\r\n" + b"Transfer-Encoding: chunked\r\n" + b"Content-Type: text/plain\r\n" + b"\r\n" + b"FFFFFFFFFF\r\n..." + ) + + p = httpx.HTTPParser(writer, reader) + p.send_method_line(b"GET", b"/", b"HTTP/1.1") + p.send_headers([ + (b"Host", b"example.com"), + (b"Connection", b"close"), + ]) + p.send_body(b'') + + p.recv_status_line() + p.recv_headers() + msg = "Received invalid chunk size" + with pytest.raises(httpx.ProtocolError, match=msg): + p.recv_body() From 75ff4ecdc518d22260e5c12c6b8a10becc3181c8 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 18 Jun 2025 16:00:28 +0100 Subject: [PATCH 19/19] Docstrings --- src/ahttpx/_parsers.py | 66 +++++++++++++++++++++++++++++++++++------- src/httpx/_parsers.py | 66 +++++++++++++++++++++++++++++++++++------- 2 files changed, 110 insertions(+), 22 deletions(-) diff --git a/src/ahttpx/_parsers.py b/src/ahttpx/_parsers.py index d7582ff..fb658dd 100644 --- a/src/ahttpx/_parsers.py +++ b/src/ahttpx/_parsers.py @@ -5,18 +5,14 @@ # TODO... -# * Expect: 100 continue -# * Connection: keep-alive / close -# * Transfer-Encoding: chunked -# * Upgrade: ... / (CONNECT?) -# * Host: required (ordering?) +# * Upgrade +# * CONNECT # * Support 'Expect: 100 Continue' # * Add 'Error' state transitions # * Add tests to trickle data # * Add type annotations -# * Integer conversions should always be bounded and +ve. `boundedint(..., maxdigits, base)` # * Optional... HTTP/1.0 support # * Read trailing headers on Transfer-Encoding: chunked. Not just '\r\n'. # * When writing Transfer-Encoding: chunked, split large writes into buffer size. @@ -48,14 +44,14 @@ class HTTPParser: Usage... client = HTTPParser(writer, reader) - client.send_method_line() IDLE -> SEND_HEADERS - client.send_headers() SEND_HEADERS -> SEND_BODY - client.send_body() SEND_BODY -> SEND_BODY or DONE or MUST_CLOSE + client.send_method_line() + client.send_headers() + client.send_body() client.recv_status_line() client.recv_headers() client.recv_body() - client.start_next_cycle() DONE -> IDLE - client.close() CLOSED + client.start_next_cycle() + client.close() """ def __init__(self, writer, reader): self.writer = writer @@ -80,6 +76,14 @@ def __init__(self, writer, reader): self.processing_1xx = False def send_method_line(self, method, target, protocol): + """ + Send the initial request line: + + >>> p.send_method_line(b'GET', b'/', b'HTTP/1.1') + + The client will switch to SEND_HEADERS state. + The server will switch to SEND_STATUS_LINE state. + """ if self.our_state != State.IDLE: msg = f"Called 'send_method_line' in invalid state {self.description()}" raise ProtocolError(msg) @@ -94,6 +98,13 @@ def send_method_line(self, method, target, protocol): self.their_state = State.SEND_STATUS_LINE def send_headers(self, headers): + """ + Send the request headers: + + >>> p.send_headers([(b'Host', b'www.example.com')]) + + The client will switch to SEND_BODY state. + """ if self.our_state != State.SEND_HEADERS: msg = f"Called 'send_headers' in invalid state {self.description()}" raise ProtocolError(msg) @@ -125,6 +136,14 @@ def send_headers(self, headers): self.our_state = State.SEND_BODY def send_body(self, body): + """ + Send the request body. An empty bytes argument indicates the end of the stream: + + >>> p.send_body(b'') + + The client will switch to DONE by default. + The client will switch to MUST_CLOSE for requests a with 'Connection: close' header. + """ if self.our_state != State.SEND_BODY: msg = f"Called 'send_body' in invalid state {self.description()}" raise ProtocolError(msg) @@ -155,6 +174,13 @@ def send_body(self, body): self.our_state = State.MUST_CLOSE def recv_status_line(self): + """ + Receive the initial response status line: + + >>> protocol, status_code, reason_phrase = p.recv_status_line() + + The server will switch to SEND_HEADERS. + """ if self.their_state != State.SEND_STATUS_LINE: msg = f"Called 'recv_status_line' in invalid state {self.description()}" raise ProtocolError(msg) @@ -180,6 +206,14 @@ def recv_status_line(self): return protocol, status_code, reason_phrase def recv_headers(self): + """ + Receive the response headers: + + >>> headers = p.recv_status_line() + + The server will switch to SEND_BODY by default. + The server will switch to SEND_STATUS_LINE for preceeding 1xx responses. + """ if self.their_state != State.SEND_HEADERS: msg = f"Called 'recv_headers' in invalid state {self.description()}" raise ProtocolError(msg) @@ -214,6 +248,16 @@ def recv_headers(self): return headers def recv_body(self): + """ + Receive the response body. An empty byte string indicates the end of the stream: + + >>> buffer = bytearray() + >>> while body := p.recv_body() + >>> buffer.extend(body) + + The server will switch to DONE by default. + The server will switch to MUST_CLOSE for responses a with 'Connection: close' header. + """ if self.their_state != State.SEND_BODY: msg = f"Called 'recv_body' in invalid state {self.description()}" raise ProtocolError(msg) diff --git a/src/httpx/_parsers.py b/src/httpx/_parsers.py index d7582ff..fb658dd 100644 --- a/src/httpx/_parsers.py +++ b/src/httpx/_parsers.py @@ -5,18 +5,14 @@ # TODO... -# * Expect: 100 continue -# * Connection: keep-alive / close -# * Transfer-Encoding: chunked -# * Upgrade: ... / (CONNECT?) -# * Host: required (ordering?) +# * Upgrade +# * CONNECT # * Support 'Expect: 100 Continue' # * Add 'Error' state transitions # * Add tests to trickle data # * Add type annotations -# * Integer conversions should always be bounded and +ve. `boundedint(..., maxdigits, base)` # * Optional... HTTP/1.0 support # * Read trailing headers on Transfer-Encoding: chunked. Not just '\r\n'. # * When writing Transfer-Encoding: chunked, split large writes into buffer size. @@ -48,14 +44,14 @@ class HTTPParser: Usage... client = HTTPParser(writer, reader) - client.send_method_line() IDLE -> SEND_HEADERS - client.send_headers() SEND_HEADERS -> SEND_BODY - client.send_body() SEND_BODY -> SEND_BODY or DONE or MUST_CLOSE + client.send_method_line() + client.send_headers() + client.send_body() client.recv_status_line() client.recv_headers() client.recv_body() - client.start_next_cycle() DONE -> IDLE - client.close() CLOSED + client.start_next_cycle() + client.close() """ def __init__(self, writer, reader): self.writer = writer @@ -80,6 +76,14 @@ def __init__(self, writer, reader): self.processing_1xx = False def send_method_line(self, method, target, protocol): + """ + Send the initial request line: + + >>> p.send_method_line(b'GET', b'/', b'HTTP/1.1') + + The client will switch to SEND_HEADERS state. + The server will switch to SEND_STATUS_LINE state. + """ if self.our_state != State.IDLE: msg = f"Called 'send_method_line' in invalid state {self.description()}" raise ProtocolError(msg) @@ -94,6 +98,13 @@ def send_method_line(self, method, target, protocol): self.their_state = State.SEND_STATUS_LINE def send_headers(self, headers): + """ + Send the request headers: + + >>> p.send_headers([(b'Host', b'www.example.com')]) + + The client will switch to SEND_BODY state. + """ if self.our_state != State.SEND_HEADERS: msg = f"Called 'send_headers' in invalid state {self.description()}" raise ProtocolError(msg) @@ -125,6 +136,14 @@ def send_headers(self, headers): self.our_state = State.SEND_BODY def send_body(self, body): + """ + Send the request body. An empty bytes argument indicates the end of the stream: + + >>> p.send_body(b'') + + The client will switch to DONE by default. + The client will switch to MUST_CLOSE for requests a with 'Connection: close' header. + """ if self.our_state != State.SEND_BODY: msg = f"Called 'send_body' in invalid state {self.description()}" raise ProtocolError(msg) @@ -155,6 +174,13 @@ def send_body(self, body): self.our_state = State.MUST_CLOSE def recv_status_line(self): + """ + Receive the initial response status line: + + >>> protocol, status_code, reason_phrase = p.recv_status_line() + + The server will switch to SEND_HEADERS. + """ if self.their_state != State.SEND_STATUS_LINE: msg = f"Called 'recv_status_line' in invalid state {self.description()}" raise ProtocolError(msg) @@ -180,6 +206,14 @@ def recv_status_line(self): return protocol, status_code, reason_phrase def recv_headers(self): + """ + Receive the response headers: + + >>> headers = p.recv_status_line() + + The server will switch to SEND_BODY by default. + The server will switch to SEND_STATUS_LINE for preceeding 1xx responses. + """ if self.their_state != State.SEND_HEADERS: msg = f"Called 'recv_headers' in invalid state {self.description()}" raise ProtocolError(msg) @@ -214,6 +248,16 @@ def recv_headers(self): return headers def recv_body(self): + """ + Receive the response body. An empty byte string indicates the end of the stream: + + >>> buffer = bytearray() + >>> while body := p.recv_body() + >>> buffer.extend(body) + + The server will switch to DONE by default. + The server will switch to MUST_CLOSE for responses a with 'Connection: close' header. + """ if self.their_state != State.SEND_BODY: msg = f"Called 'recv_body' in invalid state {self.description()}" raise ProtocolError(msg)