Skip to content

[mypyc] Add support for C string literals in the IR #19383

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions mypyc/codegen/emitfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
Cast,
ComparisonOp,
ControlOp,
CString,
DecRef,
Extend,
Float,
Expand Down Expand Up @@ -843,6 +844,8 @@ def reg(self, reg: Value) -> str:
elif r == "nan":
return "NAN"
return r
elif isinstance(reg, CString):
return '"' + encode_c_string_literal(reg.value) + '"'
else:
return self.emitter.reg(reg)

Expand Down Expand Up @@ -904,3 +907,30 @@ def emit_unsigned_int_cast(self, type: RType) -> str:
return "(uint64_t)"
else:
return ""


_translation_table: Final[dict[int, str]] = {}


def encode_c_string_literal(b: bytes) -> str:
"""Convert bytestring to the C string literal syntax (with necessary escaping).

For example, b'foo\n' gets converted to 'foo\\n' (note that double quotes are not added).
"""
if not _translation_table:
# Initialize the translation table on the first call.
d = {
ord("\n"): "\\n",
ord("\r"): "\\r",
ord("\t"): "\\t",
ord('"'): '\\"',
ord("\\"): "\\\\",
}
for i in range(256):
if i not in d:
if i < 32 or i >= 127:
d[i] = "\\x%.2x" % i
else:
d[i] = chr(i)
_translation_table.update(str.maketrans(d))
return b.decode("latin1").translate(_translation_table)
15 changes: 15 additions & 0 deletions mypyc/ir/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class to enable the new behavior. Sometimes adding a new abstract
RVoid,
bit_rprimitive,
bool_rprimitive,
cstring_rprimitive,
float_rprimitive,
int_rprimitive,
is_bit_rprimitive,
Expand Down Expand Up @@ -230,6 +231,20 @@ def __init__(self, value: float, line: int = -1) -> None:
self.line = line


@final
class CString(Value):
"""C string literal (zero-terminated).

You can also include zero values in the value, but then you'll need to track
the length of the string separately.
"""

def __init__(self, value: bytes, line: int = -1) -> None:
self.value = value
self.type = cstring_rprimitive
self.line = line


class Op(Value):
"""Abstract base class for all IR operations.

Expand Down
3 changes: 3 additions & 0 deletions mypyc/ir/pprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
Cast,
ComparisonOp,
ControlOp,
CString,
DecRef,
Extend,
Float,
Expand Down Expand Up @@ -327,6 +328,8 @@ def format(self, fmt: str, *args: Any) -> str:
result.append(str(arg.value))
elif isinstance(arg, Float):
result.append(repr(arg.value))
elif isinstance(arg, CString):
result.append(f"CString({arg.value!r})")
else:
result.append(self.names[arg])
elif typespec == "d":
Expand Down
10 changes: 6 additions & 4 deletions mypyc/ir/rtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,13 +254,11 @@ def __init__(
elif ctype == "CPyPtr":
# TODO: Invent an overlapping error value?
self.c_undefined = "0"
elif ctype == "PyObject *":
# Boxed types use the null pointer as the error value.
elif ctype.endswith("*"):
# Boxed and pointer types use the null pointer as the error value.
self.c_undefined = "NULL"
elif ctype == "char":
self.c_undefined = "2"
elif ctype in ("PyObject **", "void *"):
self.c_undefined = "NULL"
elif ctype == "double":
self.c_undefined = "-113.0"
elif ctype in ("uint8_t", "uint16_t", "uint32_t", "uint64_t"):
Expand Down Expand Up @@ -445,6 +443,10 @@ def __hash__(self) -> int:
"c_ptr", is_unboxed=False, is_refcounted=False, ctype="void *"
)

cstring_rprimitive: Final = RPrimitive(
"cstring", is_unboxed=True, is_refcounted=False, ctype="const char *"
)

# The type corresponding to mypyc.common.BITMAP_TYPE
bitmap_rprimitive: Final = uint32_rprimitive

Expand Down
26 changes: 26 additions & 0 deletions mypyc/test/test_emitfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
CallC,
Cast,
ComparisonOp,
CString,
DecRef,
Extend,
GetAttr,
Expand Down Expand Up @@ -49,6 +50,7 @@
RType,
bool_rprimitive,
c_int_rprimitive,
cstring_rprimitive,
dict_rprimitive,
int32_rprimitive,
int64_rprimitive,
Expand Down Expand Up @@ -824,6 +826,30 @@ def test_inc_ref_int_literal(self) -> None:
b = LoadLiteral(x, object_rprimitive)
self.assert_emit([b, IncRef(b)], "CPy_INCREF(cpy_r_r0);")

def test_c_string(self) -> None:
s = Register(cstring_rprimitive, "s")
self.assert_emit(Assign(s, CString(b"foo")), """cpy_r_s = "foo";""")
self.assert_emit(Assign(s, CString(b'foo "o')), r"""cpy_r_s = "foo \"o";""")
self.assert_emit(Assign(s, CString(b"\x00")), r"""cpy_r_s = "\x00";""")
self.assert_emit(Assign(s, CString(b"\\")), r"""cpy_r_s = "\\";""")
for i in range(256):
b = bytes([i])
if b == b"\n":
target = "\\n"
elif b == b"\r":
target = "\\r"
elif b == b"\t":
target = "\\t"
elif b == b'"':
target = '\\"'
elif b == b"\\":
target = "\\\\"
elif i < 32 or i >= 127:
target = "\\x%.2x" % i
else:
target = b.decode("ascii")
self.assert_emit(Assign(s, CString(b)), f'cpy_r_s = "{target}";')

def assert_emit(
self,
op: Op | list[Op],
Expand Down