From d0bf832243a93f184d67e2b354c85e0629743e98 Mon Sep 17 00:00:00 2001 From: James Le Cuirot Date: Sat, 14 Feb 2026 13:42:39 +0000 Subject: [PATCH] Fix cross-compiling librt by enabling x86_64 optimisations with pragmas Although Python still lacks a supported method to cross-compile packages, many downstreams do it anyway, so librt should allow this. The current approach of enabling x86_64 optimisations after checking platform.machine() is broken. When building on x86_64 for another architecture, the build fails because the optimisations get enabled when they shouldn't. Conversely, when building on another architecture for x86_64, the build fails because the optimisations don't get enabled when they should. GCC supports enabling an optimisation with a pragma from that line onwards. Clang requires the optimisation to be pushed on and popped off the stack. Unfortunately, MSVC does not have an equivalent feature, but it is unlikely anyone would cross-compile to x86_64 with that. The remaining logic for MSVC could be simplified, but it looks like other compilers like Borland are potentially supported? I considered checking for x86_64 using CCompiler's preprocess() instead, but this seemed awkward. This has been tested with GCC and Clang, including with AVX512 enabled. --- mypyc/build_setup.py | 9 +-------- mypyc/lib-rt/base64/arch/avx/codec.c | 8 ++++++++ mypyc/lib-rt/base64/arch/avx2/codec.c | 8 ++++++++ mypyc/lib-rt/base64/arch/avx512/codec.c | 8 ++++++++ mypyc/lib-rt/base64/arch/sse41/codec.c | 8 ++++++++ mypyc/lib-rt/base64/arch/sse42/codec.c | 8 ++++++++ mypyc/lib-rt/base64/arch/ssse3/codec.c | 8 ++++++++ mypyc/lib-rt/build_setup.py | 9 +-------- 8 files changed, 50 insertions(+), 16 deletions(-) diff --git a/mypyc/build_setup.py b/mypyc/build_setup.py index b70a1df7930e8..ec995935990c4 100644 --- a/mypyc/build_setup.py +++ b/mypyc/build_setup.py @@ -19,18 +19,11 @@ from distutils import ccompiler EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = { - "unix": { - "base64/arch/ssse3": ["-mssse3"], - "base64/arch/sse41": ["-msse4.1"], - "base64/arch/sse42": ["-msse4.2"], - "base64/arch/avx2": ["-mavx2"], - "base64/arch/avx": ["-mavx"], - }, "msvc": { "base64/arch/sse42": ["/arch:SSE4.2"], "base64/arch/avx2": ["/arch:AVX2"], "base64/arch/avx": ["/arch:AVX"], - }, + } } ccompiler.CCompiler.__spawn = ccompiler.CCompiler.spawn # type: ignore[attr-defined] diff --git a/mypyc/lib-rt/base64/arch/avx/codec.c b/mypyc/lib-rt/base64/arch/avx/codec.c index 7a64a94be2aff..9b0ef58cd62ff 100644 --- a/mypyc/lib-rt/base64/arch/avx/codec.c +++ b/mypyc/lib-rt/base64/arch/avx/codec.c @@ -9,6 +9,11 @@ #include "../../env.h" #if HAVE_AVX +#if defined(__clang__) +#pragma clang attribute push (__attribute__((target("avx"))), apply_to=function) +#else +#pragma GCC target("avx") +#endif #include // Only enable inline assembly on supported compilers and on 64-bit CPUs. @@ -62,6 +67,9 @@ base64_stream_decode_avx BASE64_DEC_PARAMS #include "../generic/dec_head.c" dec_loop_ssse3(&s, &slen, &o, &olen); #include "../generic/dec_tail.c" +#if defined(__clang__) + #pragma clang attribute pop +#endif #else return base64_dec_stub(state, src, srclen, out, outlen); #endif diff --git a/mypyc/lib-rt/base64/arch/avx2/codec.c b/mypyc/lib-rt/base64/arch/avx2/codec.c index a54385bf89bea..e7ecf5da4ec79 100644 --- a/mypyc/lib-rt/base64/arch/avx2/codec.c +++ b/mypyc/lib-rt/base64/arch/avx2/codec.c @@ -9,6 +9,11 @@ #include "../../env.h" #if HAVE_AVX2 +#if defined(__clang__) +#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) +#else +#pragma GCC target("avx2") +#endif #include // Only enable inline assembly on supported compilers and on 64-bit CPUs. @@ -52,6 +57,9 @@ base64_stream_decode_avx2 BASE64_DEC_PARAMS #include "../generic/dec_head.c" dec_loop_avx2(&s, &slen, &o, &olen); #include "../generic/dec_tail.c" +#if defined(__clang__) + #pragma clang attribute pop +#endif #else return base64_dec_stub(state, src, srclen, out, outlen); #endif diff --git a/mypyc/lib-rt/base64/arch/avx512/codec.c b/mypyc/lib-rt/base64/arch/avx512/codec.c index 98210826a5fe9..44c11acbd028c 100644 --- a/mypyc/lib-rt/base64/arch/avx512/codec.c +++ b/mypyc/lib-rt/base64/arch/avx512/codec.c @@ -9,6 +9,11 @@ #include "../../env.h" #if HAVE_AVX512 +#if defined(__clang__) +#pragma clang attribute push (__attribute__((target("avx512vbmi"))), apply_to=function) +#else +#pragma GCC target("avx512vbmi") +#endif #include #include "../avx2/dec_reshuffle.c" @@ -38,6 +43,9 @@ base64_stream_decode_avx512 BASE64_DEC_PARAMS #include "../generic/dec_head.c" dec_loop_avx2(&s, &slen, &o, &olen); #include "../generic/dec_tail.c" +#if defined(__clang__) + #pragma clang attribute pop +#endif #else return base64_dec_stub(state, src, srclen, out, outlen); #endif diff --git a/mypyc/lib-rt/base64/arch/sse41/codec.c b/mypyc/lib-rt/base64/arch/sse41/codec.c index c627db5f726d4..cb8c8f3a84097 100644 --- a/mypyc/lib-rt/base64/arch/sse41/codec.c +++ b/mypyc/lib-rt/base64/arch/sse41/codec.c @@ -9,6 +9,11 @@ #include "../../env.h" #if HAVE_SSE41 +#if defined(__clang__) +#pragma clang attribute push (__attribute__((target("sse4.1"))), apply_to=function) +#else +#pragma GCC target("sse4.1") +#endif #include // Only enable inline assembly on supported compilers and on 64-bit CPUs. @@ -52,6 +57,9 @@ base64_stream_decode_sse41 BASE64_DEC_PARAMS #include "../generic/dec_head.c" dec_loop_ssse3(&s, &slen, &o, &olen); #include "../generic/dec_tail.c" +#if defined(__clang__) + #pragma clang attribute pop +#endif #else return base64_dec_stub(state, src, srclen, out, outlen); #endif diff --git a/mypyc/lib-rt/base64/arch/sse42/codec.c b/mypyc/lib-rt/base64/arch/sse42/codec.c index 2fe4e2997aa14..ec70a02970320 100644 --- a/mypyc/lib-rt/base64/arch/sse42/codec.c +++ b/mypyc/lib-rt/base64/arch/sse42/codec.c @@ -9,6 +9,11 @@ #include "../../env.h" #if HAVE_SSE42 +#if defined(__clang__) +#pragma clang attribute push (__attribute__((target("sse4.2"))), apply_to=function) +#else +#pragma GCC target("sse4.2") +#endif #include // Only enable inline assembly on supported compilers and on 64-bit CPUs. @@ -52,6 +57,9 @@ base64_stream_decode_sse42 BASE64_DEC_PARAMS #include "../generic/dec_head.c" dec_loop_ssse3(&s, &slen, &o, &olen); #include "../generic/dec_tail.c" +#if defined(__clang__) + #pragma clang attribute pop +#endif #else return base64_dec_stub(state, src, srclen, out, outlen); #endif diff --git a/mypyc/lib-rt/base64/arch/ssse3/codec.c b/mypyc/lib-rt/base64/arch/ssse3/codec.c index e51b3dfdb1677..2a3577ff8fc3f 100644 --- a/mypyc/lib-rt/base64/arch/ssse3/codec.c +++ b/mypyc/lib-rt/base64/arch/ssse3/codec.c @@ -9,6 +9,11 @@ #include "../../env.h" #if HAVE_SSSE3 +#if defined(__clang__) +#pragma clang attribute push (__attribute__((target("ssse3"))), apply_to=function) +#else +#pragma GCC target("ssse3") +#endif #include // Only enable inline assembly on supported compilers and on 64-bit CPUs. @@ -54,6 +59,9 @@ base64_stream_decode_ssse3 BASE64_DEC_PARAMS #include "../generic/dec_head.c" dec_loop_ssse3(&s, &slen, &o, &olen); #include "../generic/dec_tail.c" +#if defined(__clang__) + #pragma clang attribute pop +#endif #else return base64_dec_stub(state, src, srclen, out, outlen); #endif diff --git a/mypyc/lib-rt/build_setup.py b/mypyc/lib-rt/build_setup.py index b70a1df7930e8..ec995935990c4 100644 --- a/mypyc/lib-rt/build_setup.py +++ b/mypyc/lib-rt/build_setup.py @@ -19,18 +19,11 @@ from distutils import ccompiler EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = { - "unix": { - "base64/arch/ssse3": ["-mssse3"], - "base64/arch/sse41": ["-msse4.1"], - "base64/arch/sse42": ["-msse4.2"], - "base64/arch/avx2": ["-mavx2"], - "base64/arch/avx": ["-mavx"], - }, "msvc": { "base64/arch/sse42": ["/arch:SSE4.2"], "base64/arch/avx2": ["/arch:AVX2"], "base64/arch/avx": ["/arch:AVX"], - }, + } } ccompiler.CCompiler.__spawn = ccompiler.CCompiler.spawn # type: ignore[attr-defined]