From c2bf11e6011b1e78024839e3810f79bf1dd5c1d5 Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 01/22] [mypyc] feat: improve LoadLiteral annotation determinism --- mypyc/codegen/emit.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 4ef53296ef0d..df9d8c8143c1 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -208,6 +208,15 @@ def object_annotation(self, obj: object, line: str) -> str: if any(x in formatted for x in ("/*", "*/", "\0")): return "" + # make frozenset annotations deterministic + if formatted.startswith("frozenset({"): + frozenset_items = formatted[11:-2] + # if our frozenset contains another frozenset or a tuple, we will need better logic + # here, but this redimentary logic will still vastly improve codegen determinism. + if "(" not in frozenset_items: + sorted_items = ", ".join(sorted(frozenset_items.split(", "))) + formatted = "frozenset({" + sorted_items + "})" + if "\n" in formatted: first_line, rest = formatted.split("\n", maxsplit=1) comment_continued = textwrap.indent(rest, (line_width + 3) * " ") From d750ec12df4d4696288c3905eb9cbcec8c749afa Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 02/22] Update emit.py --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index df9d8c8143c1..c8706f19e548 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -212,7 +212,7 @@ def object_annotation(self, obj: object, line: str) -> str: if formatted.startswith("frozenset({"): frozenset_items = formatted[11:-2] # if our frozenset contains another frozenset or a tuple, we will need better logic - # here, but this redimentary logic will still vastly improve codegen determinism. + # here, but this rudimentary logic will still vastly improve codegen determinism. if "(" not in frozenset_items: sorted_items = ", ".join(sorted(frozenset_items.split(", "))) formatted = "frozenset({" + sorted_items + "})" From 486f9f01b253c31bbd0b2b3faa227adc820c691a Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 03/22] new sort key --- mypyc/codegen/emit.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index c8706f19e548..e33b39c48095 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1235,3 +1235,11 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str # Multi-line result res.append(indent + ", ".join(current)) return "{\n " + ",\n ".join(res) + "\n" + indent + "}" + + +class _mypyc_safe_key(pprint._safe_key): + """A custom sort key implementation for pprint that makes the output deterministic + for all literal types supported by mypyc + """ + def __lt__(self, other: "_mypyc_safe_key") -> bool: + return str(type(self.obj)) + repr(self.obj) < str(type(other.obj)) + repr(other.obj) From c30a72735180daed5e5b141dc95fa6e35f58b444 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 04/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/codegen/emit.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index e33b39c48095..10b831be5623 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1241,5 +1241,6 @@ class _mypyc_safe_key(pprint._safe_key): """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc """ - def __lt__(self, other: "_mypyc_safe_key") -> bool: + + def __lt__(self, other: _mypyc_safe_key) -> bool: return str(type(self.obj)) + repr(self.obj) < str(type(other.obj)) + repr(other.obj) From d174f4eeb3538ef046c05a79c44a975bc8c321ee Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 05/22] use new safe key --- mypyc/codegen/emit.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 10b831be5623..ac4ccb665f48 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -204,19 +204,20 @@ def object_annotation(self, obj: object, line: str) -> str: If it contains illegal characters, an empty string is returned.""" line_width = self._indent + len(line) + + # temporarily override pprint._safe_key + default_safe_key = pprint._safe_key + pprint._safe_key = _mypyc_safe_key + + # pretty print the object formatted = pprint.pformat(obj, compact=True, width=max(90 - line_width, 20)) + + # replace the _safe_key + pprint._safe_key = default_safe_key + if any(x in formatted for x in ("/*", "*/", "\0")): return "" - # make frozenset annotations deterministic - if formatted.startswith("frozenset({"): - frozenset_items = formatted[11:-2] - # if our frozenset contains another frozenset or a tuple, we will need better logic - # here, but this rudimentary logic will still vastly improve codegen determinism. - if "(" not in frozenset_items: - sorted_items = ", ".join(sorted(frozenset_items.split(", "))) - formatted = "frozenset({" + sorted_items + "})" - if "\n" in formatted: first_line, rest = formatted.split("\n", maxsplit=1) comment_continued = textwrap.indent(rest, (line_width + 3) * " ") @@ -1239,8 +1240,10 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str class _mypyc_safe_key(pprint._safe_key): """A custom sort key implementation for pprint that makes the output deterministic - for all literal types supported by mypyc - """ + for all literal types supported by mypyc. - def __lt__(self, other: _mypyc_safe_key) -> bool: + This is NOT safe for use as a sort key for other types, so we MUST replace the + original pprint._safe_key once we've pprinted our object. + """ + def __lt__(self, other: "_mypyc_safe_key") -> bool: return str(type(self.obj)) + repr(self.obj) < str(type(other.obj)) + repr(other.obj) From 28c0399bd04939af69a1445de0d34ea845b89556 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 06/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/codegen/emit.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index ac4ccb665f48..1d8b8ef5022f 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -214,7 +214,7 @@ def object_annotation(self, obj: object, line: str) -> str: # replace the _safe_key pprint._safe_key = default_safe_key - + if any(x in formatted for x in ("/*", "*/", "\0")): return "" @@ -1245,5 +1245,6 @@ class _mypyc_safe_key(pprint._safe_key): This is NOT safe for use as a sort key for other types, so we MUST replace the original pprint._safe_key once we've pprinted our object. """ - def __lt__(self, other: "_mypyc_safe_key") -> bool: + + def __lt__(self, other: _mypyc_safe_key) -> bool: return str(type(self.obj)) + repr(self.obj) < str(type(other.obj)) + repr(other.obj) From be0de786c953ed44bc8d964de28be1ac016321aa Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 07/22] Update emit.py --- mypyc/codegen/emit.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 1d8b8ef5022f..849322ef26e9 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -206,14 +206,14 @@ def object_annotation(self, obj: object, line: str) -> str: line_width = self._indent + len(line) # temporarily override pprint._safe_key - default_safe_key = pprint._safe_key - pprint._safe_key = _mypyc_safe_key + default_safe_key = pprint._safe_key # type: ignore [attr-defined] + pprint._safe_key = _mypyc_safe_key # type: ignore [attr-defined] # pretty print the object formatted = pprint.pformat(obj, compact=True, width=max(90 - line_width, 20)) # replace the _safe_key - pprint._safe_key = default_safe_key + pprint._safe_key = default_safe_key # type: ignore [attr-defined] if any(x in formatted for x in ("/*", "*/", "\0")): return "" @@ -1238,7 +1238,7 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str return "{\n " + ",\n ".join(res) + "\n" + indent + "}" -class _mypyc_safe_key(pprint._safe_key): +def _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, misc] """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc. From d33d2d979cfab684058ac968f5afdd4cc8de15fb Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 08/22] Update emit.py --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 849322ef26e9..394ae0d11644 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1238,7 +1238,7 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str return "{\n " + ",\n ".join(res) + "\n" + indent + "}" -def _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, misc] +class _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, misc] """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc. From 650d99d6b74e5433de399f7231d07cd7054dd3bb Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 09/22] Update emit.py --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 394ae0d11644..f2bc07d6587f 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1238,7 +1238,7 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str return "{\n " + ",\n ".join(res) + "\n" + indent + "}" -class _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, misc] +class _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, name-defined, misc] """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc. From da2278578d3431b5dcf8ba9f1360aa3e4bb31ccd Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 10/22] Update emit.py --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index f2bc07d6587f..24df8e6ecf00 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1238,7 +1238,7 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str return "{\n " + ",\n ".join(res) + "\n" + indent + "}" -class _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, name-defined, misc] +class _mypyc_safe_key(pprint._safe_key): # type: ignore [name-defined, misc] """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc. From 96aa63e6818ce44da0d0da724523d37d2da185fc Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 11/22] refactor --- mypyc/codegen/emit.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 24df8e6ecf00..9db22cc75c79 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1238,13 +1238,11 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str return "{\n " + ",\n ".join(res) + "\n" + indent + "}" -class _mypyc_safe_key(pprint._safe_key): # type: ignore [name-defined, misc] +def _mypyc_safe_key(obj: object) -> str: """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc. This is NOT safe for use as a sort key for other types, so we MUST replace the original pprint._safe_key once we've pprinted our object. """ - - def __lt__(self, other: _mypyc_safe_key) -> bool: - return str(type(self.obj)) + repr(self.obj) < str(type(other.obj)) + repr(other.obj) + return str(type(obj)) + repr(obj) From 58e4b6752ff39e15dcb4634db3f94c46207831ab Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:32:40 -0400 Subject: [PATCH 12/22] Update emit.py --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 9db22cc75c79..ebb152923ff6 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1245,4 +1245,4 @@ def _mypyc_safe_key(obj: object) -> str: This is NOT safe for use as a sort key for other types, so we MUST replace the original pprint._safe_key once we've pprinted our object. """ - return str(type(obj)) + repr(obj) + return str(type(obj)) + pprint.pformat(obj) From 629f1cf99bd7febced2ba9f4f07f6fc045c1f1b3 Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 6 Jan 2026 06:11:39 +0000 Subject: [PATCH 13/22] extract func pformat_deterministic --- mypyc/codegen/emit.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 44f78f6510c3..bb813a6c52fd 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -210,16 +210,7 @@ def object_annotation(self, obj: object, line: str) -> str: If it contains illegal characters, an empty string is returned.""" line_width = self._indent + len(line) - - # temporarily override pprint._safe_key - default_safe_key = pprint._safe_key # type: ignore [attr-defined] - pprint._safe_key = _mypyc_safe_key # type: ignore [attr-defined] - - # pretty print the object - formatted = pprint.pformat(obj, compact=True, width=max(90 - line_width, 20)) - - # replace the _safe_key - pprint._safe_key = default_safe_key # type: ignore [attr-defined] + formatted = pformat_deterministic(obj, line_width) if any(x in formatted for x in ("/*", "*/", "\0")): return "" @@ -1283,6 +1274,19 @@ def native_function_doc_initializer(func: FuncIR) -> str: return c_string_initializer(docstring.encode("ascii", errors="backslashreplace")) +def pformat_deterministic(obj: object, line_width: int) -> str: + """Pretty-print `obj` with deterministic sorting for mypyc literal types.""" + # Temporarily override pprint._safe_key + default_safe_key = pprint._safe_key # type: ignore [attr-defined] + pprint._safe_key = _mypyc_safe_key # type: ignore [attr-defined] + + try: + return pprint.pformat(obj, compact=True, width=max(90 - line_width, 20)) + finally: + # Always restore the original key to avoid affecting other pprint users. + pprint._safe_key = default_safe_key # type: ignore [attr-defined] + + def _mypyc_safe_key(obj: object) -> str: """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc. From ac8f3ffc15abc90a304d144bfd47fed273b7f73f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 6 Jan 2026 06:15:37 +0000 Subject: [PATCH 14/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index bb813a6c52fd..0631ecd479c0 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1279,7 +1279,7 @@ def pformat_deterministic(obj: object, line_width: int) -> str: # Temporarily override pprint._safe_key default_safe_key = pprint._safe_key # type: ignore [attr-defined] pprint._safe_key = _mypyc_safe_key # type: ignore [attr-defined] - + try: return pprint.pformat(obj, compact=True, width=max(90 - line_width, 20)) finally: From 866d86e5d46d1ee01edcb52fd022f8969c49a524 Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 6 Jan 2026 06:27:51 +0000 Subject: [PATCH 15/22] add tests --- mypyc/codegen/emit.py | 10 +++++----- mypyc/test/test_emit.py | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index bb813a6c52fd..a59e4850cd18 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -210,7 +210,7 @@ def object_annotation(self, obj: object, line: str) -> str: If it contains illegal characters, an empty string is returned.""" line_width = self._indent + len(line) - formatted = pformat_deterministic(obj, line_width) + formatted = pformat_deterministic(obj, max(90 - line_width, 20)) if any(x in formatted for x in ("/*", "*/", "\0")): return "" @@ -1274,14 +1274,14 @@ def native_function_doc_initializer(func: FuncIR) -> str: return c_string_initializer(docstring.encode("ascii", errors="backslashreplace")) -def pformat_deterministic(obj: object, line_width: int) -> str: +def pformat_deterministic(obj: object, width: int) -> str: """Pretty-print `obj` with deterministic sorting for mypyc literal types.""" - # Temporarily override pprint._safe_key + # Temporarily override pprint._safe_key to get deterministic ordering of containers. default_safe_key = pprint._safe_key # type: ignore [attr-defined] pprint._safe_key = _mypyc_safe_key # type: ignore [attr-defined] - + try: - return pprint.pformat(obj, compact=True, width=max(90 - line_width, 20)) + return pprint.pformat(obj, compact=True, width=width) finally: # Always restore the original key to avoid affecting other pprint users. pprint._safe_key = default_safe_key # type: ignore [attr-defined] diff --git a/mypyc/test/test_emit.py b/mypyc/test/test_emit.py index 1baed3964299..7fef63aa744c 100644 --- a/mypyc/test/test_emit.py +++ b/mypyc/test/test_emit.py @@ -1,8 +1,9 @@ from __future__ import annotations +import pprint import unittest -from mypyc.codegen.emit import Emitter, EmitterContext +from mypyc.codegen.emit import Emitter, EmitterContext, pformat_deterministic from mypyc.common import HAVE_IMMORTAL from mypyc.ir.class_ir import ClassIR from mypyc.ir.ops import BasicBlock, Register, Value @@ -21,6 +22,37 @@ from mypyc.namegen import NameGenerator +class TestPformatDeterministic(unittest.TestCase): + def test_frozenset_elements_sorted(self) -> None: + fs_small = frozenset({("a", 1)}) + fs_large = frozenset({("a", 1), ("b", 2)}) + literal_a = frozenset({fs_large, fs_small}) + literal_b = frozenset({fs_small, fs_large}) + expected = "frozenset({frozenset({('a', 1)}), frozenset({('a', 1), ('b', 2)})})" + + assert pformat_deterministic(literal_a, 80) == expected + assert pformat_deterministic(literal_b, 80) == expected + + def test_nested_supported_literals(self) -> None: + nested_frozen = frozenset({("m", 0), ("n", 1)}) + item_a = ("outer", 1, nested_frozen) + item_b = ("outer", 2, frozenset({("x", 3)})) + literal_a = frozenset({item_a, item_b}) + literal_b = frozenset({item_b, item_a}) + expected = ( + "frozenset({('outer', 1, frozenset({('m', 0), ('n', 1)})), " + "('outer', 2, frozenset({('x', 3)}))})" + ) + + assert pformat_deterministic(literal_a, 120) == expected + assert pformat_deterministic(literal_b, 120) == expected + + def test_restores_default_safe_key(self) -> None: + original_safe_key = pprint._safe_key + pformat_deterministic({"key": "value"}, 80) + assert pprint._safe_key is original_safe_key + + class TestEmitter(unittest.TestCase): def setUp(self) -> None: self.n = Register(int_rprimitive, "n") From 6e0153a749f03fb6a8a9372aa6e6c87571744e57 Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 6 Jan 2026 06:49:32 +0000 Subject: [PATCH 16/22] fix test --- mypyc/test/test_emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/test/test_emit.py b/mypyc/test/test_emit.py index 7fef63aa744c..b197ab036424 100644 --- a/mypyc/test/test_emit.py +++ b/mypyc/test/test_emit.py @@ -28,7 +28,7 @@ def test_frozenset_elements_sorted(self) -> None: fs_large = frozenset({("a", 1), ("b", 2)}) literal_a = frozenset({fs_large, fs_small}) literal_b = frozenset({fs_small, fs_large}) - expected = "frozenset({frozenset({('a', 1)}), frozenset({('a', 1), ('b', 2)})})" + expected = "frozenset({frozenset({('a', 1), ('b', 2)}), frozenset({('a', 1)})})" assert pformat_deterministic(literal_a, 80) == expected assert pformat_deterministic(literal_b, 80) == expected From 5aa3d4023d70904d9cab455b022af59ef51b39a9 Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 6 Jan 2026 07:05:36 +0000 Subject: [PATCH 17/22] fix test --- mypyc/test/test_emit.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mypyc/test/test_emit.py b/mypyc/test/test_emit.py index b197ab036424..ef0f0ae9c292 100644 --- a/mypyc/test/test_emit.py +++ b/mypyc/test/test_emit.py @@ -39,10 +39,7 @@ def test_nested_supported_literals(self) -> None: item_b = ("outer", 2, frozenset({("x", 3)})) literal_a = frozenset({item_a, item_b}) literal_b = frozenset({item_b, item_a}) - expected = ( - "frozenset({('outer', 1, frozenset({('m', 0), ('n', 1)})), " - "('outer', 2, frozenset({('x', 3)}))})" - ) + expected = "frozenset({('outer', 2, frozenset({('x', 3)})), ('outer', 1, frozenset({('m', 0), ('n', 1)}))})" assert pformat_deterministic(literal_a, 120) == expected assert pformat_deterministic(literal_b, 120) == expected From ab520a241b7f5b735d8035fb07e26631f4aa5d28 Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 6 Jan 2026 07:17:43 +0000 Subject: [PATCH 18/22] fix test --- mypyc/test/test_emit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mypyc/test/test_emit.py b/mypyc/test/test_emit.py index ef0f0ae9c292..c084b4758476 100644 --- a/mypyc/test/test_emit.py +++ b/mypyc/test/test_emit.py @@ -28,7 +28,7 @@ def test_frozenset_elements_sorted(self) -> None: fs_large = frozenset({("a", 1), ("b", 2)}) literal_a = frozenset({fs_large, fs_small}) literal_b = frozenset({fs_small, fs_large}) - expected = "frozenset({frozenset({('a', 1), ('b', 2)}), frozenset({('a', 1)})})" + expected = "frozenset({frozenset({('b', 2), ('a', 1)}), frozenset({('a', 1)})})" assert pformat_deterministic(literal_a, 80) == expected assert pformat_deterministic(literal_b, 80) == expected @@ -39,7 +39,7 @@ def test_nested_supported_literals(self) -> None: item_b = ("outer", 2, frozenset({("x", 3)})) literal_a = frozenset({item_a, item_b}) literal_b = frozenset({item_b, item_a}) - expected = "frozenset({('outer', 2, frozenset({('x', 3)})), ('outer', 1, frozenset({('m', 0), ('n', 1)}))})" + expected = "frozenset({('outer', 1, frozenset({('m', 0), ('n', 1)})), ('outer', 2, frozenset({('x', 3)}))})" assert pformat_deterministic(literal_a, 120) == expected assert pformat_deterministic(literal_b, 120) == expected From 99a07eadb37f85a75de4c3b96b5d63e518f0d1cc Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 6 Jan 2026 08:09:00 +0000 Subject: [PATCH 19/22] fix recursion --- mypyc/codegen/emit.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index a59e4850cd18..554f802fa344 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1281,7 +1281,7 @@ def pformat_deterministic(obj: object, width: int) -> str: pprint._safe_key = _mypyc_safe_key # type: ignore [attr-defined] try: - return pprint.pformat(obj, compact=True, width=width) + return pprint.pformat(_normalize_sets(obj), compact=True, width=width) finally: # Always restore the original key to avoid affecting other pprint users. pprint._safe_key = default_safe_key # type: ignore [attr-defined] @@ -1293,5 +1293,22 @@ def _mypyc_safe_key(obj: object) -> str: This is NOT safe for use as a sort key for other types, so we MUST replace the original pprint._safe_key once we've pprinted our object. + + Since this is a bit hacky, see for context https://github.com/python/mypy/pull/20012 + """ + return str(type(obj)) + pprint.pformat(obj, compact=True, sort_dicts=True) + + +def _normalize_sets(obj: object) -> object: + """Recursively normalize sets/frozensets so pprint sees a stable order. + + We rebuild each set/frozenset from a deterministically sorted list of + elements (using _mypyc_safe_key), recursing into tuples those sets contain. + This keeps repr (used internally) output deterministic without otherwise changing content. """ - return str(type(obj)) + pprint.pformat(obj) + if isinstance(obj, frozenset): + return frozenset(map(_normalize_sets, sorted(obj, key=_mypyc_safe_key))) + elif isinstance(obj, tuple): + return tuple(map(_normalize_sets, obj)) + else: + return obj From 065692613959ba27fba90ff460d7619ef6e31da5 Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 6 Jan 2026 08:51:26 +0000 Subject: [PATCH 20/22] use subclass --- mypyc/codegen/emit.py | 50 +++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 554f802fa344..63d3c27b28d1 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -6,7 +6,7 @@ import sys import textwrap from collections.abc import Callable -from typing import Final +from typing import TYPE_CHECKING, Final from mypyc.codegen.cstring import c_string_initializer from mypyc.codegen.literals import Literals @@ -59,6 +59,9 @@ from mypyc.namegen import NameGenerator, exported_name from mypyc.sametype import is_same_type +if TYPE_CHECKING: + from _typeshed import SupportsWrite + # Whether to insert debug asserts for all error handling, to quickly # catch errors propagating without exceptions set. DEBUG_ERRORS: Final = False @@ -1281,7 +1284,8 @@ def pformat_deterministic(obj: object, width: int) -> str: pprint._safe_key = _mypyc_safe_key # type: ignore [attr-defined] try: - return pprint.pformat(_normalize_sets(obj), compact=True, width=width) + printer = _DeterministicPrettyPrinter(width=width, compact=True, sort_dicts=True) + return printer.pformat(obj) finally: # Always restore the original key to avoid affecting other pprint users. pprint._safe_key = default_safe_key # type: ignore [attr-defined] @@ -1299,16 +1303,34 @@ def _mypyc_safe_key(obj: object) -> str: return str(type(obj)) + pprint.pformat(obj, compact=True, sort_dicts=True) -def _normalize_sets(obj: object) -> object: - """Recursively normalize sets/frozensets so pprint sees a stable order. +class _DeterministicPrettyPrinter(pprint.PrettyPrinter): + """PrettyPrinter that sorts set/frozenset elements deterministically.""" - We rebuild each set/frozenset from a deterministically sorted list of - elements (using _mypyc_safe_key), recursing into tuples those sets contain. - This keeps repr (used internally) output deterministic without otherwise changing content. - """ - if isinstance(obj, frozenset): - return frozenset(map(_normalize_sets, sorted(obj, key=_mypyc_safe_key))) - elif isinstance(obj, tuple): - return tuple(map(_normalize_sets, obj)) - else: - return obj + _dispatch = pprint.PrettyPrinter._dispatch.copy() + + def _pprint_set( + self, + object: set[object] | frozenset[object], + stream: "SupportsWrite[str]", + indent: int, + allowance: int, + context: dict[int, int], + level: int, + ) -> None: + if not object: + stream.write(repr(object)) + return + typ = type(object) + if typ is set: + stream.write("{") + endchar = "}" + else: + stream.write("frozenset({") + endchar = "})" + indent += len("frozenset(") + items = sorted(object, key=_mypyc_safe_key) + self._format_items(items, stream, indent, allowance + len(endchar), context, level) + stream.write(endchar) + + _dispatch[set.__repr__] = _pprint_set + _dispatch[frozenset.__repr__] = _pprint_set From 3b19181d96c03d589f6f9738a9567fa612b69962 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 6 Jan 2026 08:53:45 +0000 Subject: [PATCH 21/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 63d3c27b28d1..cf648001b824 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1311,7 +1311,7 @@ class _DeterministicPrettyPrinter(pprint.PrettyPrinter): def _pprint_set( self, object: set[object] | frozenset[object], - stream: "SupportsWrite[str]", + stream: SupportsWrite[str], indent: int, allowance: int, context: dict[int, int], From 09328d5affd274d1371c6746e5161f6efe9fcc85 Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 6 Jan 2026 09:06:06 +0000 Subject: [PATCH 22/22] fix test --- mypyc/test/test_emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/test/test_emit.py b/mypyc/test/test_emit.py index c084b4758476..f52da1cd8757 100644 --- a/mypyc/test/test_emit.py +++ b/mypyc/test/test_emit.py @@ -39,7 +39,7 @@ def test_nested_supported_literals(self) -> None: item_b = ("outer", 2, frozenset({("x", 3)})) literal_a = frozenset({item_a, item_b}) literal_b = frozenset({item_b, item_a}) - expected = "frozenset({('outer', 1, frozenset({('m', 0), ('n', 1)})), ('outer', 2, frozenset({('x', 3)}))})" + expected = "frozenset({('outer', 2, frozenset({('x', 3)})), ('outer', 1, frozenset({('n', 1), ('m', 0)}))})" assert pformat_deterministic(literal_a, 120) == expected assert pformat_deterministic(literal_b, 120) == expected