diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml index 869f7e5..1c1f26a 100644 --- a/.github/workflows/cifuzz.yml +++ b/.github/workflows/cifuzz.yml @@ -25,13 +25,13 @@ jobs: steps: - name: Build Fuzzers id: build - uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@675ddfb89ae1c614f1dfa99d18b91cd6d1d6b88b # master 2026-04-10 + uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@9ff5089dbb11800055b6bc1af919a84b06dee2c8 # master 2026-04-27 with: oss-fuzz-project-name: "python-multipart" language: python - name: Run Fuzzers - uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@675ddfb89ae1c614f1dfa99d18b91cd6d1d6b88b # master 2026-04-10 + uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@9ff5089dbb11800055b6bc1af919a84b06dee2c8 # master 2026-04-27 with: oss-fuzz-project-name: "python-multipart" language: python diff --git a/python_multipart/multipart.py b/python_multipart/multipart.py index 2435bdc..8c850af 100644 --- a/python_multipart/multipart.py +++ b/python_multipart/multipart.py @@ -149,6 +149,27 @@ class MultipartState(IntEnum): """Default maximum size of a single multipart header line, including syntax overhead.""" +def _split_mime_parameters(value: str) -> list[str]: + """Split a MIME parameter string on semicolons that are outside quoted strings.""" + parts: list[str] = [] + start = 0 + in_quotes = False + i = 0 + while i < len(value): + c = value[i] + if in_quotes and c == "\\": + i += 2 # skip the escaped character + continue + if c == '"': + in_quotes = not in_quotes + elif c == ";" and not in_quotes: + parts.append(value[start:i]) + start = i + 1 + i += 1 + parts.append(value[start:]) + return parts + + def parse_options_header(value: str | bytes | None) -> tuple[bytes, dict[bytes, bytes]]: """Parses a Content-Type header into a value in the following format: (content_type, {parameters}).""" # Uses email.message.Message to parse the header as described in PEP 594. @@ -167,11 +188,27 @@ def parse_options_header(value: str | bytes | None) -> tuple[bytes, dict[bytes, if ";" not in value: return (value.lower().strip().encode("latin-1"), {}) + ctype_part, params_part = value.split(";", 1) + + # Pre-check for mixed RFC2231 parameter continuations (e.g., `filename*` and `filename*0*`). + # email.message.Message.get_params() handles these maliciously formed headers + # differently in Python 3.12 vs 3.13. We validate them here to ensure consistent behavior. + # _split_mime_parameters is used to avoid false positives from semicolons inside quoted values. + param_names = [p.split("=", 1)[0].strip().lower() for p in _split_mime_parameters(params_part) if "=" in p] + for name in param_names: + if "*" in name: + base, _, rest = name.partition("*") + if rest.rstrip("*").isdigit() and f"{base}*" in param_names: + return (ctype_part.lower().strip().encode("latin-1"), {}) + # Split at the first semicolon, to get our value and then options. # ctype, rest = value.split(b';', 1) message = Message() message["content-type"] = value - params = message.get_params() + try: + params = message.get_params() + except (TypeError, ValueError): + return (ctype_part.lower().strip().encode("latin-1"), {}) # If there were no parameters, this would have already returned above assert params, "At least the content type value should be present" ctype = params.pop(0)[0].encode("latin-1") diff --git a/tests/test_multipart.py b/tests/test_multipart.py index fce3253..1ccbc5e 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -304,6 +304,27 @@ def test_handles_rfc_2231(self) -> None: self.assertEqual(p[b"param"], b"encoded message") + def test_rejects_oversized_rfc_2231_index(self) -> None: + t, p = parse_options_header("text/plain; filename*" + ("1" * 4301) + "*=utf-8''x") + + self.assertEqual(t, b"text/plain") + self.assertEqual(p, {}) + + def test_rejects_mixed_rfc_2231_continuations(self) -> None: + t, p = parse_options_header("text/plain; filename*=utf-8''a; filename*0*=utf-8''b") + + self.assertEqual(t, b"text/plain") + self.assertEqual(p, {}) + + def test_quoted_value_containing_rfc_2231_like_text(self) -> None: + # Ensure semicolons inside quotes are ignored so "fake" parameters don't cause false splits. + header = "text/plain; notes=\"a;filename*0*=utf-8''junk\"; filename*=utf-8''real.txt" + t, p = parse_options_header(header) + + self.assertEqual(t, b"text/plain") + # The "filename*0*" inside the notes string should be ignored. + self.assertEqual(p[b"filename"], b"real.txt") + class TestBaseParser(unittest.TestCase): def setUp(self) -> None: