Skip to content

Commit 3d23716

Browse files
authored
[mypyc] Add minimal, experimental librt.base64 module (#20226)
The module currently only has a `b64encode` function adapted from CPython. It's only enabled when librt is compiled with experimental features enabled, so that we are free to iterate on this and break backward compatibility until we are ready to declare the module as stable. This also adds a way to define experimental features in `librt` (and mypyc in general, but it's currently only used for `librt`). In follow-up PRs I'm planning to add a more efficient implementation of `b64encode` and add more features to the module, including decoding. I'm not planning to include every feature from the stdlib base64 module, since many of them aren't used very widely.
1 parent 568b945 commit 3d23716

File tree

13 files changed

+327
-5
lines changed

13 files changed

+327
-5
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
def b64encode(s: bytes) -> bytes: ...

mypyc/build.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
from mypyc.namegen import exported_name
4343
from mypyc.options import CompilerOptions
4444

45-
LIBRT_MODULES = [("librt.internal", "librt_internal.c")]
45+
LIBRT_MODULES = [("librt.internal", "librt_internal.c"), ("librt.base64", "librt_base64.c")]
4646

4747
try:
4848
# Import setuptools so that it monkey-patch overrides distutils
@@ -495,7 +495,9 @@ def mypycify(
495495
group_name: str | None = None,
496496
log_trace: bool = False,
497497
depends_on_librt_internal: bool = False,
498+
depends_on_librt_base64: bool = False,
498499
install_librt: bool = False,
500+
experimental_features: bool = False,
499501
) -> list[Extension]:
500502
"""Main entry point to building using mypyc.
501503
@@ -551,6 +553,9 @@ def mypycify(
551553
those are build and published on PyPI separately, but during
552554
tests, we want to use their development versions (i.e. from
553555
current commit).
556+
experimental_features: Enable experimental features (install_librt=True is
557+
also needed if using experimental librt features). These
558+
have no backward compatibility guarantees!
554559
"""
555560

556561
# Figure out our configuration
@@ -565,6 +570,8 @@ def mypycify(
565570
group_name=group_name,
566571
log_trace=log_trace,
567572
depends_on_librt_internal=depends_on_librt_internal,
573+
depends_on_librt_base64=depends_on_librt_base64,
574+
experimental_features=experimental_features,
568575
)
569576

570577
# Generate all the actual important C code
@@ -607,6 +614,8 @@ def mypycify(
607614
]
608615
if log_trace:
609616
cflags.append("-DMYPYC_LOG_TRACE")
617+
if experimental_features:
618+
cflags.append("-DMYPYC_EXPERIMENTAL")
610619
elif compiler.compiler_type == "msvc":
611620
# msvc doesn't have levels, '/O2' is full and '/Od' is disable
612621
if opt_level == "0":
@@ -633,6 +642,8 @@ def mypycify(
633642
cflags += ["/GL-", "/wd9025"] # warning about overriding /GL
634643
if log_trace:
635644
cflags.append("/DMYPYC_LOG_TRACE")
645+
if experimental_features:
646+
cflags.append("/DMYPYC_EXPERIMENTAL")
636647

637648
# If configured to (defaults to yes in multi-file mode), copy the
638649
# runtime library in. Otherwise it just gets #included to save on

mypyc/codegen/emitmodule.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,8 @@ def generate_c_for_modules(self) -> list[tuple[str, str]]:
604604
ext_declarations.emit_line("#include <CPy.h>")
605605
if self.compiler_options.depends_on_librt_internal:
606606
ext_declarations.emit_line("#include <librt_internal.h>")
607+
if self.compiler_options.depends_on_librt_base64:
608+
ext_declarations.emit_line("#include <librt_base64.h>")
607609

608610
declarations = Emitter(self.context)
609611
declarations.emit_line(f"#ifndef MYPYC_LIBRT_INTERNAL{self.group_suffix}_H")
@@ -1034,6 +1036,10 @@ def emit_module_exec_func(
10341036
emitter.emit_line("if (import_librt_internal() < 0) {")
10351037
emitter.emit_line("return -1;")
10361038
emitter.emit_line("}")
1039+
if self.compiler_options.depends_on_librt_base64:
1040+
emitter.emit_line("if (import_librt_base64() < 0) {")
1041+
emitter.emit_line("return -1;")
1042+
emitter.emit_line("}")
10371043
emitter.emit_line("PyObject* modname = NULL;")
10381044
if self.multi_phase_init:
10391045
emitter.emit_line(f"{module_static} = module;")

mypyc/ir/ops.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,7 @@ def __init__(
707707
extra_int_constants: list[tuple[int, RType]],
708708
priority: int,
709709
is_pure: bool,
710+
experimental: bool,
710711
) -> None:
711712
# Each primitive much have a distinct name, but otherwise they are arbitrary.
712713
self.name: Final = name
@@ -729,6 +730,9 @@ def __init__(
729730
self.is_pure: Final = is_pure
730731
if is_pure:
731732
assert error_kind == ERR_NEVER
733+
# Experimental primitives are not used unless mypyc experimental features are
734+
# explicitly enabled
735+
self.experimental = experimental
732736

733737
def __repr__(self) -> str:
734738
return f"<PrimitiveDescription {self.name!r}: {self.arg_types}>"

mypyc/irbuild/ll_builder.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2212,6 +2212,8 @@ def matching_primitive_op(
22122212
for desc in candidates:
22132213
if len(desc.arg_types) != len(args):
22142214
continue
2215+
if desc.experimental and not self.options.experimental_features:
2216+
continue
22152217
if all(
22162218
# formal is not None and # TODO
22172219
is_subtype(actual.type, formal)

mypyc/lib-rt/librt_base64.c

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
#define PY_SSIZE_T_CLEAN
2+
#include <Python.h>
3+
#include "librt_base64.h"
4+
#include "pythoncapi_compat.h"
5+
6+
#ifdef MYPYC_EXPERIMENTAL
7+
8+
// b64encode_internal below is adapted from the CPython 3.14.0 binascii module
9+
10+
static const unsigned char table_b2a_base64[] =
11+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
12+
13+
#define BASE64_PAD '='
14+
15+
/* Max binary chunk size; limited only by available memory */
16+
#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
17+
18+
static PyObject *
19+
b64encode_internal(PyObject *obj) {
20+
unsigned char *ascii_data;
21+
const unsigned char *bin_data;
22+
int leftbits = 0;
23+
unsigned char this_ch;
24+
unsigned int leftchar = 0;
25+
Py_ssize_t bin_len, out_len;
26+
PyBytesWriter *writer;
27+
int newline = 0; // TODO
28+
29+
if (!PyBytes_Check(obj)) {
30+
PyErr_SetString(PyExc_TypeError, "base64() expects a bytes object");
31+
return NULL;
32+
}
33+
34+
bin_data = (const unsigned char *)PyBytes_AS_STRING(obj);
35+
bin_len = PyBytes_GET_SIZE(obj);
36+
37+
assert(bin_len >= 0);
38+
39+
if ( bin_len > BASE64_MAXBIN ) {
40+
PyErr_SetString(PyExc_ValueError, "Too much data for base64 line");
41+
return NULL;
42+
}
43+
44+
/* We're lazy and allocate too much (fixed up later).
45+
"+2" leaves room for up to two pad characters.
46+
Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
47+
out_len = bin_len*2 + 2;
48+
if (newline)
49+
out_len++;
50+
writer = PyBytesWriter_Create(out_len);
51+
ascii_data = PyBytesWriter_GetData(writer);
52+
if (writer == NULL)
53+
return NULL;
54+
55+
for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
56+
/* Shift the data into our buffer */
57+
leftchar = (leftchar << 8) | *bin_data;
58+
leftbits += 8;
59+
60+
/* See if there are 6-bit groups ready */
61+
while ( leftbits >= 6 ) {
62+
this_ch = (leftchar >> (leftbits-6)) & 0x3f;
63+
leftbits -= 6;
64+
*ascii_data++ = table_b2a_base64[this_ch];
65+
}
66+
}
67+
if ( leftbits == 2 ) {
68+
*ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
69+
*ascii_data++ = BASE64_PAD;
70+
*ascii_data++ = BASE64_PAD;
71+
} else if ( leftbits == 4 ) {
72+
*ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
73+
*ascii_data++ = BASE64_PAD;
74+
}
75+
if (newline)
76+
*ascii_data++ = '\n'; /* Append a courtesy newline */
77+
78+
return PyBytesWriter_FinishWithSize(writer, ascii_data - (unsigned char *)PyBytesWriter_GetData(writer));
79+
}
80+
81+
static PyObject*
82+
b64encode(PyObject *self, PyObject *const *args, size_t nargs) {
83+
if (nargs != 1) {
84+
PyErr_SetString(PyExc_TypeError, "b64encode() takes exactly one argument");
85+
return 0;
86+
}
87+
return b64encode_internal(args[0]);
88+
}
89+
90+
#endif
91+
92+
static PyMethodDef librt_base64_module_methods[] = {
93+
#ifdef MYPYC_EXPERIMENTAL
94+
{"b64encode", (PyCFunction)b64encode, METH_FASTCALL, PyDoc_STR("Encode bytes-like object using Base64.")},
95+
#endif
96+
{NULL, NULL, 0, NULL}
97+
};
98+
99+
static int
100+
base64_abi_version(void) {
101+
return 0;
102+
}
103+
104+
static int
105+
base64_api_version(void) {
106+
return 0;
107+
}
108+
109+
static int
110+
librt_base64_module_exec(PyObject *m)
111+
{
112+
#ifdef MYPYC_EXPERIMENTAL
113+
// Export mypy internal C API, be careful with the order!
114+
static void *base64_api[LIBRT_BASE64_API_LEN] = {
115+
(void *)base64_abi_version,
116+
(void *)base64_api_version,
117+
(void *)b64encode_internal,
118+
};
119+
PyObject *c_api_object = PyCapsule_New((void *)base64_api, "librt.base64._C_API", NULL);
120+
if (PyModule_Add(m, "_C_API", c_api_object) < 0) {
121+
return -1;
122+
}
123+
#endif
124+
return 0;
125+
}
126+
127+
static PyModuleDef_Slot librt_base64_module_slots[] = {
128+
{Py_mod_exec, librt_base64_module_exec},
129+
#ifdef Py_MOD_GIL_NOT_USED
130+
{Py_mod_gil, Py_MOD_GIL_NOT_USED},
131+
#endif
132+
{0, NULL}
133+
};
134+
135+
static PyModuleDef librt_base64_module = {
136+
.m_base = PyModuleDef_HEAD_INIT,
137+
.m_name = "base64",
138+
.m_doc = "base64 encoding and decoding optimized for mypyc",
139+
.m_size = 0,
140+
.m_methods = librt_base64_module_methods,
141+
.m_slots = librt_base64_module_slots,
142+
};
143+
144+
PyMODINIT_FUNC
145+
PyInit_base64(void)
146+
{
147+
return PyModuleDef_Init(&librt_base64_module);
148+
}

mypyc/lib-rt/librt_base64.h

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#ifndef LIBRT_BASE64_H
2+
#define LIBRT_BASE64_H
3+
4+
#ifndef MYPYC_EXPERIMENTAL
5+
6+
static int
7+
import_librt_base64(void)
8+
{
9+
// All librt.base64 features are experimental for now, so don't set up the API here
10+
return 0;
11+
}
12+
13+
#else // MYPYC_EXPERIMENTAL
14+
15+
#define LIBRT_BASE64_ABI_VERSION 0
16+
#define LIBRT_BASE64_API_VERSION 0
17+
#define LIBRT_BASE64_API_LEN 3
18+
19+
static void *LibRTBase64_API[LIBRT_BASE64_API_LEN];
20+
21+
#define LibRTBase64_ABIVersion (*(int (*)(void)) LibRTBase64_API[0])
22+
#define LibRTBase64_APIVersion (*(int (*)(void)) LibRTBase64_API[1])
23+
#define LibRTBase64_b64encode_internal (*(PyObject* (*)(PyObject *source)) LibRTBase64_API[2])
24+
25+
static int
26+
import_librt_base64(void)
27+
{
28+
PyObject *mod = PyImport_ImportModule("librt.base64");
29+
if (mod == NULL)
30+
return -1;
31+
Py_DECREF(mod); // we import just for the side effect of making the below work.
32+
void *capsule = PyCapsule_Import("librt.base64._C_API", 0);
33+
if (capsule == NULL)
34+
return -1;
35+
memcpy(LibRTBase64_API, capsule, sizeof(LibRTBase64_API));
36+
if (LibRTBase64_ABIVersion() != LIBRT_BASE64_ABI_VERSION) {
37+
char err[128];
38+
snprintf(err, sizeof(err), "ABI version conflict for librt.base64, expected %d, found %d",
39+
LIBRT_BASE64_ABI_VERSION,
40+
LibRTBase64_ABIVersion()
41+
);
42+
PyErr_SetString(PyExc_ValueError, err);
43+
return -1;
44+
}
45+
if (LibRTBase64_APIVersion() < LIBRT_BASE64_API_VERSION) {
46+
char err[128];
47+
snprintf(err, sizeof(err),
48+
"API version conflict for librt.base64, expected %d or newer, found %d (hint: upgrade librt)",
49+
LIBRT_BASE64_API_VERSION,
50+
LibRTBase64_APIVersion()
51+
);
52+
PyErr_SetString(PyExc_ValueError, err);
53+
return -1;
54+
}
55+
return 0;
56+
}
57+
58+
#endif // MYPYC_EXPERIMENTAL
59+
60+
#endif // LIBRT_BASE64_H

mypyc/lib-rt/setup.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ def run(self) -> None:
9696
],
9797
include_dirs=["."],
9898
extra_compile_args=cflags,
99-
)
99+
),
100+
Extension(
101+
"librt.base64", ["librt_base64.c"], include_dirs=["."], extra_compile_args=cflags
102+
),
100103
]
101104
)

mypyc/options.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ def __init__(
1818
group_name: str | None = None,
1919
log_trace: bool = False,
2020
depends_on_librt_internal: bool = False,
21+
depends_on_librt_base64: bool = False,
22+
experimental_features: bool = False,
2123
) -> None:
2224
self.strip_asserts = strip_asserts
2325
self.multi_file = multi_file
@@ -55,3 +57,8 @@ def __init__(
5557
# only for mypy itself, third-party code compiled with mypyc should not use
5658
# librt.internal.
5759
self.depends_on_librt_internal = depends_on_librt_internal
60+
self.depends_on_librt_base64 = depends_on_librt_base64
61+
# Some experimental features are only available when building librt in
62+
# experimental mode (e.g. use _experimental suffix in librt run test).
63+
# These can't be used with a librt wheel installed from PyPI.
64+
self.experimental_features = experimental_features

mypyc/primitives/misc_ops.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,3 +465,12 @@
465465
c_function_name="cache_version_internal",
466466
error_kind=ERR_NEVER,
467467
)
468+
469+
function_op(
470+
name="librt.base64.b64encode",
471+
arg_types=[bytes_rprimitive],
472+
return_type=bytes_rprimitive,
473+
c_function_name="LibRTBase64_b64encode_internal",
474+
error_kind=ERR_MAGIC,
475+
experimental=True,
476+
)

0 commit comments

Comments
 (0)