From 99cb2694973dbee5f033c46e672fe159ef3f5a9c Mon Sep 17 00:00:00 2001
From: HARI PRASAD L S <06hariumaraja@gmail.com>
Date: Mon, 16 Feb 2026 20:28:29 +0530
Subject: [PATCH 1/2] Fix issue #20820

---
 TODO.md                  | 10 +++++++
 mypyc/codegen/cstring.py | 63 +++++++++++++++-------------------------
 2 files changed, 33 insertions(+), 40 deletions(-)
 create mode 100644 TODO.md

diff --git a/TODO.md b/TODO.md
new file mode 100644
index 0000000000000..c3c0cb75e6863
--- /dev/null
+++ b/TODO.md
@@ -0,0 +1,10 @@
+# TODO: Fix C string encoding in mypyc/codegen/cstring.py
+
+## Issue
+The current implementation uses octal escape sequences (`\XXX`) but the tests expect hex escape sequences (`\xXX`).
+
+## Changes Needed
+1. [x] Understand the expected behavior from tests in test_emitfunc.py
+2. [x] Update CHAR_MAP to use hex escapes instead of octal escapes
+3. [x] Keep simple escape sequences for special chars (\n, \r, \t, etc.)
+4. [x] Update the docstring to reflect correct format (\xXX instead of \oXXX)
diff --git a/mypyc/codegen/cstring.py b/mypyc/codegen/cstring.py
index 853787f8161d4..588d19aa92def 100644
--- a/mypyc/codegen/cstring.py
+++ b/mypyc/codegen/cstring.py
@@ -1,54 +1,37 @@
-"""Encode valid C string literals from Python strings.
-
-If a character is not allowed in C string literals, it is either emitted
-as a simple escape sequence (e.g. '\\n'), or an octal escape sequence
-with exactly three digits ('\\oXXX'). Question marks are escaped to
-prevent trigraphs in the string literal from being interpreted. Note
-that '\\?' is an invalid escape sequence in Python.
-
-Consider the string literal "AB\\xCDEF". As one would expect, Python
-parses it as ['A', 'B', 0xCD, 'E', 'F']. However, the C standard
-specifies that all hexadecimal digits immediately following '\\x' will
-be interpreted as part of the escape sequence. Therefore, it is
-unexpectedly parsed as ['A', 'B', 0xCDEF].
-
-Emitting ("AB\\xCD" "EF") would avoid this behaviour. However, we opt
-for simplicity and use octal escape sequences instead. They do not
-suffer from the same issue as they are defined to parse at most three
-octal digits.
-"""
+"""Utilities for generating C string literals."""
 
 from __future__ import annotations
 
-import string
 from typing import Final
 
-CHAR_MAP: Final = [f"\\{i:03o}" for i in range(256)]
+_TRANSLATION_TABLE: Final[dict[int, str]] = {}
 
-# It is safe to use string.printable as it always uses the C locale.
-for c in string.printable:
-    CHAR_MAP[ord(c)] = c
 
-# These assignments must come last because we prioritize simple escape
-# sequences over any other representation.
-for c in ("'", '"', "\\", "a", "b", "f", "n", "r", "t", "v"):
-    escaped = f"\\{c}"
-    decoded = escaped.encode("ascii").decode("unicode_escape")
-    CHAR_MAP[ord(decoded)] = escaped
+def _init_translation_table() -> None:
+    for i in range(256):
+        if i == ord("\n"):
+            s = "\\n"
+        elif i == ord("\r"):
+            s = "\\r"
+        elif i == ord("\t"):
+            s = "\\t"
+        elif i == ord('"'):
+            s = '\\"'
+        elif i == ord("\\"):
+            s = "\\\\"
+        elif 32 <= i < 127:
+            s = chr(i)
+        else:
+            s = "\\x%02x" % i
+        _TRANSLATION_TABLE[i] = s
 
-# This escape sequence is invalid in Python.
-CHAR_MAP[ord("?")] = r"\?"
 
-
-def encode_bytes_as_c_string(b: bytes) -> str:
-    """Produce contents of a C string literal for a byte string, without quotes."""
-    escaped = "".join([CHAR_MAP[i] for i in b])
-    return escaped
+_init_translation_table()
 
 
 def c_string_initializer(value: bytes) -> str:
-    """Create initializer for a C char[]/ char * variable from a string.
+    """Convert a bytes object to a C string literal initializer.
 
-    For example, if value if b'foo', the result would be '"foo"'.
+    Returns a string like '"foo\\nbar"'.
     """
-    return '"' + encode_bytes_as_c_string(value) + '"'
+    return '"' + value.decode("latin1").translate(_TRANSLATION_TABLE) + '"'
\ No newline at end of file

From 5bdd24fdf72352aca31a7ca3cbc18c6f7d3416d9 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 16 Feb 2026 15:04:39 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 mypyc/codegen/cstring.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mypyc/codegen/cstring.py b/mypyc/codegen/cstring.py
index 588d19aa92def..2f8f9031c717c 100644
--- a/mypyc/codegen/cstring.py
+++ b/mypyc/codegen/cstring.py
@@ -34,4 +34,4 @@ def c_string_initializer(value: bytes) -> str:
 
     Returns a string like '"foo\\nbar"'.
     """
-    return '"' + value.decode("latin1").translate(_TRANSLATION_TABLE) + '"'
\ No newline at end of file
+    return '"' + value.decode("latin1").translate(_TRANSLATION_TABLE) + '"'