Skip to content

Commit 8f922b3

Browse files
authored
[mypyc] Use faster base64 encode implementation in librt.base64 (#20237)
Vendor optimized base64 implementation from https://github.com/aklomp/base64. This is based on commit 9e8ed65048ff0f703fad3deb03bf66ac7f78a4d7 (May 2025). Enable SIMD on macOS (64-bit ARM only). Other platforms probably use a generic version. I'll look into enabling SIMD more generally in a follow-up PR. A `b64encode` micro-benchmark was up to 11 times faster compared to the stdlib `base64` module (on a MacBook Pro).
1 parent 9286425 commit 8f922b3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+5787
-54
lines changed

LICENSE

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,3 +227,38 @@ FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
227227
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
228228
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
229229
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
230+
231+
= = = = =
232+
233+
Files under lib-rt/base64 are licensed under the following license.
234+
235+
= = = = =
236+
237+
Copyright (c) 2005-2007, Nick Galbreath
238+
Copyright (c) 2015-2018, Wojciech Muła
239+
Copyright (c) 2016-2017, Matthieu Darbois
240+
Copyright (c) 2013-2022, Alfred Klomp
241+
All rights reserved.
242+
243+
Redistribution and use in source and binary forms, with or without
244+
modification, are permitted provided that the following conditions are
245+
met:
246+
247+
- Redistributions of source code must retain the above copyright notice,
248+
this list of conditions and the following disclaimer.
249+
250+
- Redistributions in binary form must reproduce the above copyright
251+
notice, this list of conditions and the following disclaimer in the
252+
documentation and/or other materials provided with the distribution.
253+
254+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
255+
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
256+
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
257+
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
258+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
259+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
260+
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
261+
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
262+
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
263+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
264+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

mypyc/build.py

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import sys
2727
import time
2828
from collections.abc import Iterable
29-
from typing import TYPE_CHECKING, Any, NoReturn, Union, cast
29+
from typing import TYPE_CHECKING, Any, NamedTuple, NoReturn, Union, cast
3030

3131
from mypy.build import BuildSource
3232
from mypy.errors import CompileError
@@ -42,7 +42,52 @@
4242
from mypyc.namegen import exported_name
4343
from mypyc.options import CompilerOptions
4444

45-
LIBRT_MODULES = [("librt.internal", "librt_internal.c"), ("librt.base64", "librt_base64.c")]
45+
46+
class ModDesc(NamedTuple):
47+
module: str
48+
c_files: list[str]
49+
other_files: list[str]
50+
include_dirs: list[str]
51+
52+
53+
LIBRT_MODULES = [
54+
ModDesc("librt.internal", ["librt_internal.c"], [], []),
55+
ModDesc(
56+
"librt.base64",
57+
[
58+
"librt_base64.c",
59+
"base64/lib.c",
60+
"base64/codec_choose.c",
61+
"base64/tables/tables.c",
62+
"base64/arch/generic/codec.c",
63+
"base64/arch/ssse3/codec.c",
64+
"base64/arch/sse41/codec.c",
65+
"base64/arch/sse42/codec.c",
66+
"base64/arch/avx/codec.c",
67+
"base64/arch/avx2/codec.c",
68+
"base64/arch/avx512/codec.c",
69+
"base64/arch/neon32/codec.c",
70+
"base64/arch/neon64/codec.c",
71+
],
72+
[
73+
"base64/arch/generic/32/enc_loop.c",
74+
"base64/arch/generic/64/enc_loop.c",
75+
"base64/arch/generic/32/dec_loop.c",
76+
"base64/arch/generic/enc_head.c",
77+
"base64/arch/generic/enc_tail.c",
78+
"base64/arch/generic/dec_head.c",
79+
"base64/arch/generic/dec_tail.c",
80+
"base64/arch/neon64/dec_loop.c",
81+
"base64/arch/neon64/enc_loop_asm.c",
82+
"base64/codecs.h",
83+
"base64/env.h",
84+
"base64/tables/tables.h",
85+
"base64/tables/table_dec_32bit.h",
86+
"base64/tables/table_enc_12bit.h",
87+
],
88+
["base64"],
89+
),
90+
]
4691

4792
try:
4893
# Import setuptools so that it monkey-patch overrides distutils
@@ -677,17 +722,19 @@ def mypycify(
677722
rt_file = os.path.join(build_dir, name)
678723
with open(os.path.join(include_dir(), name), encoding="utf-8") as f:
679724
write_file(rt_file, f.read())
680-
for mod, file_name in LIBRT_MODULES:
681-
rt_file = os.path.join(build_dir, file_name)
682-
with open(os.path.join(include_dir(), file_name), encoding="utf-8") as f:
683-
write_file(rt_file, f.read())
725+
for mod, file_names, addit_files, includes in LIBRT_MODULES:
726+
for file_name in file_names + addit_files:
727+
rt_file = os.path.join(build_dir, file_name)
728+
with open(os.path.join(include_dir(), file_name), encoding="utf-8") as f:
729+
write_file(rt_file, f.read())
684730
extensions.append(
685731
get_extension()(
686732
mod,
687733
sources=[
688-
os.path.join(build_dir, file) for file in [file_name] + RUNTIME_C_FILES
734+
os.path.join(build_dir, file) for file in file_names + RUNTIME_C_FILES
689735
],
690-
include_dirs=[include_dir()],
736+
include_dirs=[include_dir()]
737+
+ [os.path.join(include_dir(), d) for d in includes],
691738
extra_compile_args=cflags,
692739
)
693740
)
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#include <stdint.h>
2+
#include <stddef.h>
3+
#include <stdlib.h>
4+
5+
#include "libbase64.h"
6+
#include "../../tables/tables.h"
7+
#include "../../codecs.h"
8+
#include "config.h"
9+
#include "../../env.h"
10+
11+
#if HAVE_AVX
12+
#include <immintrin.h>
13+
14+
// Only enable inline assembly on supported compilers and on 64-bit CPUs.
15+
#ifndef BASE64_AVX_USE_ASM
16+
# if (defined(__GNUC__) || defined(__clang__)) && BASE64_WORDSIZE == 64
17+
# define BASE64_AVX_USE_ASM 1
18+
# else
19+
# define BASE64_AVX_USE_ASM 0
20+
# endif
21+
#endif
22+
23+
#include "../ssse3/dec_reshuffle.c"
24+
#include "../ssse3/dec_loop.c"
25+
26+
#if BASE64_AVX_USE_ASM
27+
# include "enc_loop_asm.c"
28+
#else
29+
# include "../ssse3/enc_translate.c"
30+
# include "../ssse3/enc_reshuffle.c"
31+
# include "../ssse3/enc_loop.c"
32+
#endif
33+
34+
#endif // HAVE_AVX
35+
36+
void
37+
base64_stream_encode_avx BASE64_ENC_PARAMS
38+
{
39+
#if HAVE_AVX
40+
#include "../generic/enc_head.c"
41+
42+
// For supported compilers, use a hand-optimized inline assembly
43+
// encoder. Otherwise fall back on the SSSE3 encoder, but compiled with
44+
// AVX flags to generate better optimized AVX code.
45+
46+
#if BASE64_AVX_USE_ASM
47+
enc_loop_avx(&s, &slen, &o, &olen);
48+
#else
49+
enc_loop_ssse3(&s, &slen, &o, &olen);
50+
#endif
51+
52+
#include "../generic/enc_tail.c"
53+
#else
54+
base64_enc_stub(state, src, srclen, out, outlen);
55+
#endif
56+
}
57+
58+
int
59+
base64_stream_decode_avx BASE64_DEC_PARAMS
60+
{
61+
#if HAVE_AVX
62+
#include "../generic/dec_head.c"
63+
dec_loop_ssse3(&s, &slen, &o, &olen);
64+
#include "../generic/dec_tail.c"
65+
#else
66+
return base64_dec_stub(state, src, srclen, out, outlen);
67+
#endif
68+
}

0 commit comments

Comments
 (0)