diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c43ec75fba3619..c7e51eea1028b8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -191,7 +191,7 @@ jobs: macOS ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-macos == 'true' strategy: fail-fast: false matrix: @@ -217,7 +217,7 @@ jobs: ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} ${{ fromJSON(matrix.bolt) && '(bolt)' || '' }} needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-ubuntu == 'true' strategy: fail-fast: false matrix: @@ -248,7 +248,7 @@ jobs: runs-on: ${{ matrix.os }} timeout-minutes: 60 needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-ubuntu == 'true' strategy: fail-fast: false matrix: @@ -304,7 +304,7 @@ jobs: runs-on: ${{ matrix.os }} timeout-minutes: 60 needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-ubuntu == 'true' strategy: fail-fast: false matrix: @@ -368,7 +368,7 @@ jobs: build-android: name: Android (${{ matrix.arch }}) needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-android == 'true' timeout-minutes: 60 strategy: fail-fast: false @@ -390,7 +390,7 @@ jobs: build-ios: name: iOS needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-ios == 'true' timeout-minutes: 60 runs-on: macos-15 steps: @@ -413,7 +413,7 @@ jobs: build-wasi: name: 'WASI' needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-wasi == 'true' uses: ./.github/workflows/reusable-wasi.yml test-hypothesis: @@ -421,7 +421,7 @@ jobs: runs-on: ubuntu-24.04 timeout-minutes: 60 needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-ubuntu == 'true' env: OPENSSL_VER: 3.0.18 PYTHONSTRICTEXTENSIONBUILD: 1 @@ -528,7 +528,7 @@ jobs: runs-on: ${{ matrix.os }} timeout-minutes: 60 needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-ubuntu == 'true' strategy: fail-fast: false matrix: @@ -581,7 +581,7 @@ jobs: # ${{ '' } is a hack to nest jobs under the same sidebar category. name: Sanitizers${{ '' }} # zizmor: ignore[obfuscation] needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-ubuntu == 'true' strategy: fail-fast: false matrix: @@ -606,7 +606,7 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 60 needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-ubuntu == 'true' steps: - uses: actions/checkout@v4 with: @@ -713,25 +713,24 @@ jobs: test-hypothesis, cifuzz, allowed-skips: >- + ${{ !fromJSON(needs.build-context.outputs.run-docs) && 'check-docs,' || '' }} ${{ - !fromJSON(needs.build-context.outputs.run-docs) + needs.build-context.outputs.run-tests != 'true' && ' - check-docs, + check-autoconf-regen, + check-generated-files, ' || '' }} + ${{ !fromJSON(needs.build-context.outputs.run-windows-tests) && 'build-windows,' || '' }} + ${{ !fromJSON(needs.build-context.outputs.run-ci-fuzz) && 'cifuzz,' || '' }} + ${{ !fromJSON(needs.build-context.outputs.run-macos) && 'build-macos,' || '' }} ${{ - needs.build-context.outputs.run-tests != 'true' + !fromJSON(needs.build-context.outputs.run-ubuntu) && ' - check-autoconf-regen, - check-generated-files, - build-macos, build-ubuntu, build-ubuntu-ssltests-awslc, build-ubuntu-ssltests-openssl, - build-android, - build-ios, - build-wasi, test-hypothesis, build-asan, build-san, @@ -739,18 +738,7 @@ jobs: ' || '' }} - ${{ - !fromJSON(needs.build-context.outputs.run-windows-tests) - && ' - build-windows, - ' - || '' - }} - ${{ - !fromJSON(needs.build-context.outputs.run-ci-fuzz) - && ' - cifuzz, - ' - || '' - }} + ${{ !fromJSON(needs.build-context.outputs.run-android) && 'build-android,' || '' }} + ${{ !fromJSON(needs.build-context.outputs.run-ios) && 'build-ios,' || '' }} + ${{ !fromJSON(needs.build-context.outputs.run-wasi) && 'build-wasi,' || '' }} jobs: ${{ toJSON(needs) }} diff --git a/.github/workflows/reusable-context.yml b/.github/workflows/reusable-context.yml index 66c7cc47de03fb..ce5562f2d51fbb 100644 --- a/.github/workflows/reusable-context.yml +++ b/.github/workflows/reusable-context.yml @@ -17,21 +17,36 @@ on: # yamllint disable-line rule:truthy # || 'falsy-branch' # }} # + run-android: + description: Whether to run the Android tests + value: ${{ jobs.compute-changes.outputs.run-android }} # bool + run-ci-fuzz: + description: Whether to run the CIFuzz job + value: ${{ jobs.compute-changes.outputs.run-ci-fuzz }} # bool run-docs: description: Whether to build the docs value: ${{ jobs.compute-changes.outputs.run-docs }} # bool + run-ios: + description: Whether to run the iOS tests + value: ${{ jobs.compute-changes.outputs.run-ios }} # bool + run-macos: + description: Whether to run the macOS tests + value: ${{ jobs.compute-changes.outputs.run-macos }} # bool run-tests: description: Whether to run the regular tests value: ${{ jobs.compute-changes.outputs.run-tests }} # bool - run-windows-tests: - description: Whether to run the Windows tests - value: ${{ jobs.compute-changes.outputs.run-windows-tests }} # bool + run-ubuntu: + description: Whether to run the Ubuntu tests + value: ${{ jobs.compute-changes.outputs.run-ubuntu }} # bool + run-wasi: + description: Whether to run the WASI tests + value: ${{ jobs.compute-changes.outputs.run-wasi }} # bool run-windows-msi: description: Whether to run the MSI installer smoke tests value: ${{ jobs.compute-changes.outputs.run-windows-msi }} # bool - run-ci-fuzz: - description: Whether to run the CIFuzz job - value: ${{ jobs.compute-changes.outputs.run-ci-fuzz }} # bool + run-windows-tests: + description: Whether to run the Windows tests + value: ${{ jobs.compute-changes.outputs.run-windows-tests }} # bool jobs: compute-changes: @@ -39,9 +54,14 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 10 outputs: + run-android: ${{ steps.changes.outputs.run-android }} run-ci-fuzz: ${{ steps.changes.outputs.run-ci-fuzz }} run-docs: ${{ steps.changes.outputs.run-docs }} + run-ios: ${{ steps.changes.outputs.run-ios }} + run-macos: ${{ steps.changes.outputs.run-macos }} run-tests: ${{ steps.changes.outputs.run-tests }} + run-ubuntu: ${{ steps.changes.outputs.run-ubuntu }} + run-wasi: ${{ steps.changes.outputs.run-wasi }} run-windows-msi: ${{ steps.changes.outputs.run-windows-msi }} run-windows-tests: ${{ steps.changes.outputs.run-windows-tests }} steps: diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css index 18d2279da9b645..e1cc3a8a58cf1d 100644 --- a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css +++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css @@ -302,7 +302,6 @@ body.resizing-sidebar { } .section-content { - overflow: hidden; transition: max-height var(--transition-normal), opacity var(--transition-normal); max-height: 1000px; opacity: 1; diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 51234a2e40f54f..2e7485e89fdac5 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2695,6 +2695,28 @@ def recursive_wrapper_4569(): pass """)) + def test_attribute_changes_are_watched(self): + # Just running to make sure it doesn't crash. + script_helper.assert_python_ok("-c", textwrap.dedent(""" + from concurrent.futures import ThreadPoolExecutor + from unittest import TestCase + NTHREADS = 6 + BOTTOM = 0 + TOP = 1250000 + class A: + attr = 10**1000 + class TestType(TestCase): + def read(id0): + for _ in range(BOTTOM, TOP): + A.attr + def write(id0): + x = A.attr + x += 1 + A.attr = x + with ThreadPoolExecutor(NTHREADS) as pool: + pool.submit(read, (1,)) + pool.submit(write, (1,)) + """)) def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-05-14-33-54.gh-issue-142276.H4j8hP.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-05-14-33-54.gh-issue-142276.H4j8hP.rst new file mode 100644 index 00000000000000..aa8e3da33580c8 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-05-14-33-54.gh-issue-142276.H4j8hP.rst @@ -0,0 +1 @@ +Fix missing type watcher when promoting attribute loads to constants in the JIT. Patch by Ken Jin. Reproducer by Yuancheng Jiang. diff --git a/Modules/_remote_debugging/threads.c b/Modules/_remote_debugging/threads.c index 774338f9dc241e..953c8a383f0cbc 100644 --- a/Modules/_remote_debugging/threads.c +++ b/Modules/_remote_debugging/threads.c @@ -323,6 +323,7 @@ unwind_stack_for_thread( #ifdef Py_GIL_DISABLED int active = GET_MEMBER(_thread_status, ts, unwinder->debug_offsets.thread_state.status).active; has_gil = active; + (void)gil_requested; // unused #else // Read holds_gil directly from thread state has_gil = GET_MEMBER(int, ts, unwinder->debug_offsets.thread_state.holds_gil); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 4ba255d28bdcf6..6411049796bf12 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -982,9 +982,10 @@ dummy_func( DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub)); Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0]; DEOPT_IF(PyUnicode_GET_LENGTH(str) <= index); - // Specialize for reading an ASCII character from any string: - Py_UCS4 c = PyUnicode_READ_CHAR(str, index); - DEOPT_IF(Py_ARRAY_LENGTH(_Py_SINGLETON(strings).ascii) <= c); + // Specialize for reading an ASCII character from an ASCII string: + DEOPT_IF(!PyUnicode_IS_COMPACT_ASCII(str)); + uint8_t c = PyUnicode_1BYTE_DATA(str)[index]; + assert(c < 128); STAT_INC(BINARY_OP, hit); PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 7273a87681b4dd..079d31da6c1b7a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1502,11 +1502,12 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - Py_UCS4 c = PyUnicode_READ_CHAR(str, index); - if (Py_ARRAY_LENGTH(_Py_SINGLETON(strings).ascii) <= c) { + if (!PyUnicode_IS_COMPACT_ASCII(str)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + uint8_t c = PyUnicode_1BYTE_DATA(str)[index]; + assert(c < 128); STAT_INC(BINARY_OP, hit); PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 68d73cccec4d6b..3d5bf75ac0acae 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -892,12 +892,13 @@ assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); } - Py_UCS4 c = PyUnicode_READ_CHAR(str, index); - if (Py_ARRAY_LENGTH(_Py_SINGLETON(strings).ascii) <= c) { + if (!PyUnicode_IS_COMPACT_ASCII(str)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); } + uint8_t c = PyUnicode_1BYTE_DATA(str)[index]; + assert(c < 128); STAT_INC(BINARY_OP, hit); PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); diff --git a/Python/jit.c b/Python/jit.c index 7106db8a99a77a..b0d53d156fa440 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -185,6 +185,7 @@ set_bits(uint32_t *loc, uint8_t loc_start, uint64_t value, uint8_t value_start, #define IS_AARCH64_ADRP(I) (((I) & 0x9F000000) == 0x90000000) #define IS_AARCH64_BRANCH(I) (((I) & 0x7C000000) == 0x14000000) #define IS_AARCH64_BRANCH_COND(I) (((I) & 0x7C000000) == 0x54000000) +#define IS_AARCH64_BRANCH_ZERO(I) (((I) & 0x7E000000) == 0x34000000) #define IS_AARCH64_TEST_AND_BRANCH(I) (((I) & 0x7E000000) == 0x36000000) #define IS_AARCH64_LDR_OR_STR(I) (((I) & 0x3B000000) == 0x39000000) #define IS_AARCH64_MOV(I) (((I) & 0x9F800000) == 0x92800000) @@ -352,7 +353,7 @@ void patch_aarch64_19r(unsigned char *location, uint64_t value) { uint32_t *loc32 = (uint32_t *)location; - assert(IS_AARCH64_BRANCH_COND(*loc32)); + assert(IS_AARCH64_BRANCH_COND(*loc32) || IS_AARCH64_BRANCH_ZERO(*loc32)); value -= (uintptr_t)location; // Check that we're not out of range of 21 signed bits: assert((int64_t)value >= -(1 << 20)); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 51722556554609..c4afc6bd29086f 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -242,7 +242,7 @@ eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit) } static JitOptRef -lookup_attr(JitOptContext *ctx, _PyUOpInstruction *this_instr, +lookup_attr(JitOptContext *ctx, _PyBloomFilter *dependencies, _PyUOpInstruction *this_instr, PyTypeObject *type, PyObject *name, uint16_t immortal, uint16_t mortal) { @@ -252,6 +252,8 @@ lookup_attr(JitOptContext *ctx, _PyUOpInstruction *this_instr, if (lookup) { int opcode = _Py_IsImmortal(lookup) ? immortal : mortal; REPLACE_OP(this_instr, opcode, 0, (uintptr_t)lookup); + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); + _Py_BloomFilter_Add(dependencies, type); return sym_new_const(ctx, lookup); } } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 06fa8a4522a499..9eee3c69da7c86 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -607,7 +607,7 @@ dummy_func(void) { (void)descr; PyTypeObject *type = (PyTypeObject *)sym_get_const(ctx, owner); PyObject *name = get_co_name(ctx, oparg >> 1); - attr = lookup_attr(ctx, this_instr, type, name, + attr = lookup_attr(ctx, dependencies, this_instr, type, name, _POP_TOP_LOAD_CONST_INLINE_BORROW, _POP_TOP_LOAD_CONST_INLINE); } @@ -616,7 +616,7 @@ dummy_func(void) { (void)descr; PyTypeObject *type = sym_get_type(owner); PyObject *name = get_co_name(ctx, oparg >> 1); - attr = lookup_attr(ctx, this_instr, type, name, + attr = lookup_attr(ctx, dependencies, this_instr, type, name, _POP_TOP_LOAD_CONST_INLINE_BORROW, _POP_TOP_LOAD_CONST_INLINE); } @@ -625,7 +625,7 @@ dummy_func(void) { (void)descr; PyTypeObject *type = sym_get_type(owner); PyObject *name = get_co_name(ctx, oparg >> 1); - attr = lookup_attr(ctx, this_instr, type, name, + attr = lookup_attr(ctx, dependencies, this_instr, type, name, _POP_TOP_LOAD_CONST_INLINE_BORROW, _POP_TOP_LOAD_CONST_INLINE); } @@ -634,7 +634,7 @@ dummy_func(void) { (void)descr; PyTypeObject *type = sym_get_type(owner); PyObject *name = get_co_name(ctx, oparg >> 1); - attr = lookup_attr(ctx, this_instr, type, name, + attr = lookup_attr(ctx, dependencies, this_instr, type, name, _LOAD_CONST_UNDER_INLINE_BORROW, _LOAD_CONST_UNDER_INLINE); self = owner; @@ -644,7 +644,7 @@ dummy_func(void) { (void)descr; PyTypeObject *type = sym_get_type(owner); PyObject *name = get_co_name(ctx, oparg >> 1); - attr = lookup_attr(ctx, this_instr, type, name, + attr = lookup_attr(ctx, dependencies, this_instr, type, name, _LOAD_CONST_UNDER_INLINE_BORROW, _LOAD_CONST_UNDER_INLINE); self = owner; @@ -654,7 +654,7 @@ dummy_func(void) { (void)descr; PyTypeObject *type = sym_get_type(owner); PyObject *name = get_co_name(ctx, oparg >> 1); - attr = lookup_attr(ctx, this_instr, type, name, + attr = lookup_attr(ctx, dependencies, this_instr, type, name, _LOAD_CONST_UNDER_INLINE_BORROW, _LOAD_CONST_UNDER_INLINE); self = owner; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 85bebed58677ed..36130cdb893ab1 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1848,7 +1848,7 @@ (void)descr; PyTypeObject *type = (PyTypeObject *)sym_get_const(ctx, owner); PyObject *name = get_co_name(ctx, oparg >> 1); - attr = lookup_attr(ctx, this_instr, type, name, + attr = lookup_attr(ctx, dependencies, this_instr, type, name, _POP_TOP_LOAD_CONST_INLINE_BORROW, _POP_TOP_LOAD_CONST_INLINE); stack_pointer[-1] = attr; @@ -2495,7 +2495,7 @@ (void)descr; PyTypeObject *type = sym_get_type(owner); PyObject *name = get_co_name(ctx, oparg >> 1); - attr = lookup_attr(ctx, this_instr, type, name, + attr = lookup_attr(ctx, dependencies, this_instr, type, name, _LOAD_CONST_UNDER_INLINE_BORROW, _LOAD_CONST_UNDER_INLINE); self = owner; @@ -2516,7 +2516,7 @@ (void)descr; PyTypeObject *type = sym_get_type(owner); PyObject *name = get_co_name(ctx, oparg >> 1); - attr = lookup_attr(ctx, this_instr, type, name, + attr = lookup_attr(ctx, dependencies, this_instr, type, name, _LOAD_CONST_UNDER_INLINE_BORROW, _LOAD_CONST_UNDER_INLINE); self = owner; @@ -2536,7 +2536,7 @@ (void)descr; PyTypeObject *type = sym_get_type(owner); PyObject *name = get_co_name(ctx, oparg >> 1); - attr = lookup_attr(ctx, this_instr, type, name, + attr = lookup_attr(ctx, dependencies, this_instr, type, name, _POP_TOP_LOAD_CONST_INLINE_BORROW, _POP_TOP_LOAD_CONST_INLINE); stack_pointer[-1] = attr; @@ -2551,7 +2551,7 @@ (void)descr; PyTypeObject *type = sym_get_type(owner); PyObject *name = get_co_name(ctx, oparg >> 1); - attr = lookup_attr(ctx, this_instr, type, name, + attr = lookup_attr(ctx, dependencies, this_instr, type, name, _POP_TOP_LOAD_CONST_INLINE_BORROW, _POP_TOP_LOAD_CONST_INLINE); stack_pointer[-1] = attr; @@ -2571,7 +2571,7 @@ (void)descr; PyTypeObject *type = sym_get_type(owner); PyObject *name = get_co_name(ctx, oparg >> 1); - attr = lookup_attr(ctx, this_instr, type, name, + attr = lookup_attr(ctx, dependencies, this_instr, type, name, _LOAD_CONST_UNDER_INLINE_BORROW, _LOAD_CONST_UNDER_INLINE); self = owner; diff --git a/Tools/build/compute-changes.py b/Tools/build/compute-changes.py index b5993d29b92972..524d3066fbffa7 100644 --- a/Tools/build/compute-changes.py +++ b/Tools/build/compute-changes.py @@ -45,12 +45,22 @@ SUFFIXES_C_OR_CPP = frozenset({".c", ".h", ".cpp"}) SUFFIXES_DOCUMENTATION = frozenset({".rst", ".md"}) +ANDROID_DIRS = frozenset({"Android"}) +IOS_DIRS = frozenset({"Apple", "iOS"}) +MACOS_DIRS = frozenset({"Mac"}) +WASI_DIRS = frozenset({Path("Tools", "wasm")}) + @dataclass(kw_only=True, slots=True) class Outputs: + run_android: bool = False run_ci_fuzz: bool = False run_docs: bool = False + run_ios: bool = False + run_macos: bool = False run_tests: bool = False + run_ubuntu: bool = False + run_wasi: bool = False run_windows_msi: bool = False run_windows_tests: bool = False @@ -63,7 +73,15 @@ def compute_changes() -> None: outputs = process_changed_files(files) else: # Otherwise, just run the tests - outputs = Outputs(run_tests=True, run_windows_tests=True) + outputs = Outputs( + run_android=True, + run_ios=True, + run_macos=True, + run_tests=True, + run_ubuntu=True, + run_wasi=True, + run_windows_tests=True, + ) outputs = process_target_branch(outputs, target_branch) if outputs.run_tests: @@ -111,6 +129,21 @@ def get_changed_files( return frozenset(map(Path, filter(None, map(str.strip, changed_files)))) +def get_file_platform(file: Path) -> str | None: + if not file.parts: + return None + first_part = file.parts[0] + if first_part in MACOS_DIRS: + return "macos" + if first_part in IOS_DIRS: + return "ios" + if first_part in ANDROID_DIRS: + return "android" + if len(file.parts) >= 2 and Path(*file.parts[:2]) in WASI_DIRS: # Tools/wasm/ + return "wasi" + return None + + def process_changed_files(changed_files: Set[Path]) -> Outputs: run_tests = False run_ci_fuzz = False @@ -118,6 +151,9 @@ def process_changed_files(changed_files: Set[Path]) -> Outputs: run_windows_tests = False run_windows_msi = False + platforms_changed = set() + has_platform_specific_change = True + for file in changed_files: # Documentation files doc_or_misc = file.parts[0] in {"Doc", "Misc"} @@ -126,10 +162,15 @@ def process_changed_files(changed_files: Set[Path]) -> Outputs: if file.parent == GITHUB_WORKFLOWS_PATH: if file.name == "build.yml": run_tests = run_ci_fuzz = True + has_platform_specific_change = False if file.name == "reusable-docs.yml": run_docs = True if file.name == "reusable-windows-msi.yml": run_windows_msi = True + if file.name == "reusable-macos.yml": + platforms_changed.add("macos") + if file.name == "reusable-wasi.yml": + platforms_changed.add("wasi") if not ( doc_file @@ -138,8 +179,13 @@ def process_changed_files(changed_files: Set[Path]) -> Outputs: ): run_tests = True - if file not in UNIX_BUILD_SYSTEM_FILE_NAMES: - run_windows_tests = True + platform = get_file_platform(file) + if platform is not None: + platforms_changed.add(platform) + else: + has_platform_specific_change = False + if file not in UNIX_BUILD_SYSTEM_FILE_NAMES: + run_windows_tests = True # The fuzz tests are pretty slow so they are executed only for PRs # changing relevant files. @@ -159,12 +205,38 @@ def process_changed_files(changed_files: Set[Path]) -> Outputs: if file.parts[:2] == ("Tools", "msi"): run_windows_msi = True + # Check which platform specific tests to run + if run_tests: + if not has_platform_specific_change or not platforms_changed: + run_android = True + run_ios = True + run_macos = True + run_ubuntu = True + run_wasi = True + else: + run_android = "android" in platforms_changed + run_ios = "ios" in platforms_changed + run_macos = "macos" in platforms_changed + run_ubuntu = False + run_wasi = "wasi" in platforms_changed + else: + run_android = False + run_ios = False + run_macos = False + run_ubuntu = False + run_wasi = False + return Outputs( + run_android=run_android, run_ci_fuzz=run_ci_fuzz, run_docs=run_docs, + run_ios=run_ios, + run_macos=run_macos, run_tests=run_tests, - run_windows_tests=run_windows_tests, + run_ubuntu=run_ubuntu, + run_wasi=run_wasi, run_windows_msi=run_windows_msi, + run_windows_tests=run_windows_tests, ) @@ -191,11 +263,16 @@ def write_github_output(outputs: Outputs) -> None: return with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as f: + f.write(f"run-android={bool_lower(outputs.run_android)}\n") f.write(f"run-ci-fuzz={bool_lower(outputs.run_ci_fuzz)}\n") f.write(f"run-docs={bool_lower(outputs.run_docs)}\n") + f.write(f"run-ios={bool_lower(outputs.run_ios)}\n") + f.write(f"run-macos={bool_lower(outputs.run_macos)}\n") f.write(f"run-tests={bool_lower(outputs.run_tests)}\n") - f.write(f"run-windows-tests={bool_lower(outputs.run_windows_tests)}\n") + f.write(f"run-ubuntu={bool_lower(outputs.run_ubuntu)}\n") + f.write(f"run-wasi={bool_lower(outputs.run_wasi)}\n") f.write(f"run-windows-msi={bool_lower(outputs.run_windows_msi)}\n") + f.write(f"run-windows-tests={bool_lower(outputs.run_windows_tests)}\n") def bool_lower(value: bool, /) -> str: diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 93aa4899fe6ec8..9293a649e8a0ec 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -614,6 +614,8 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyUnicode_Concat", "PyUnicode_GET_LENGTH", "PyUnicode_READ_CHAR", + "PyUnicode_IS_COMPACT_ASCII", + "PyUnicode_1BYTE_DATA", "Py_ARRAY_LENGTH", "Py_FatalError", "Py_INCREF", diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py index 0adc550ba5e84c..297b9517f6a27a 100644 --- a/Tools/jit/_optimizers.py +++ b/Tools/jit/_optimizers.py @@ -1,6 +1,7 @@ """Low-level optimization of textual assembly.""" import dataclasses +import enum import pathlib import re import typing @@ -65,23 +66,72 @@ # MyPy doesn't understand that a invariant variable can be initialized by a covariant value CUSTOM_AARCH64_BRANCH19: str | None = "CUSTOM_AARCH64_BRANCH19" -# Branches are either b.{cond} or bc.{cond} -_AARCH64_BRANCHES: dict[str, tuple[str | None, str | None]] = { - "b." + cond: (("b." + inverse if inverse else None), CUSTOM_AARCH64_BRANCH19) - for (cond, inverse) in _AARCH64_COND_CODES.items() -} | { - "bc." + cond: (("bc." + inverse if inverse else None), CUSTOM_AARCH64_BRANCH19) - for (cond, inverse) in _AARCH64_COND_CODES.items() +_AARCH64_SHORT_BRANCHES = { + "tbz": "tbnz", + "tbnz": "tbz", } +# Branches are either b.{cond}, bc.{cond}, cbz, cbnz, tbz or tbnz +_AARCH64_BRANCHES: dict[str, tuple[str | None, str | None]] = ( + { + "b." + cond: (("b." + inverse if inverse else None), CUSTOM_AARCH64_BRANCH19) + for (cond, inverse) in _AARCH64_COND_CODES.items() + } + | { + "bc." + cond: (("bc." + inverse if inverse else None), CUSTOM_AARCH64_BRANCH19) + for (cond, inverse) in _AARCH64_COND_CODES.items() + } + | { + "cbz": ("cbnz", CUSTOM_AARCH64_BRANCH19), + "cbnz": ("cbz", CUSTOM_AARCH64_BRANCH19), + } + | {cond: (inverse, None) for (cond, inverse) in _AARCH64_SHORT_BRANCHES.items()} +) + + +@enum.unique +class InstructionKind(enum.Enum): + + JUMP = enum.auto() + LONG_BRANCH = enum.auto() + SHORT_BRANCH = enum.auto() + RETURN = enum.auto() + OTHER = enum.auto() + @dataclasses.dataclass +class Instruction: + kind: InstructionKind + name: str + text: str + target: str | None + + def is_branch(self) -> bool: + return self.kind in (InstructionKind.LONG_BRANCH, InstructionKind.SHORT_BRANCH) + + def update_target(self, target: str) -> "Instruction": + assert self.target is not None + return Instruction( + self.kind, self.name, self.text.replace(self.target, target), target + ) + + def update_name_and_target(self, name: str, target: str) -> "Instruction": + assert self.target is not None + return Instruction( + self.kind, + name, + self.text.replace(self.name, name).replace(self.target, target), + target, + ) + + +@dataclasses.dataclass(eq=False) class _Block: label: str | None = None # Non-instruction lines like labels, directives, and comments: noninstructions: list[str] = dataclasses.field(default_factory=list) # Instruction lines: - instructions: list[str] = dataclasses.field(default_factory=list) + instructions: list[Instruction] = dataclasses.field(default_factory=list) # If this block ends in a jump, where to? target: typing.Self | None = None # The next block in the linked list: @@ -108,6 +158,7 @@ class Optimizer: # Prefixes used to mangle local labels and symbols: label_prefix: str symbol_prefix: str + re_global: re.Pattern[str] # The first block in the linked list: _root: _Block = dataclasses.field(init=False, default_factory=_Block) _labels: dict[str, _Block] = dataclasses.field(init=False, default_factory=dict) @@ -122,27 +173,36 @@ class Optimizer: # Override everything that follows in subclasses: _supports_external_relocations = True _branches: typing.ClassVar[dict[str, tuple[str | None, str | None]]] = {} + # Short branches are instructions that can branch within a micro-op, + # but might not have the reach to branch anywhere within a trace. + _short_branches: typing.ClassVar[dict[str, str]] = {} # Two groups (instruction and target): _re_branch: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH # One group (target): _re_jump: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH # No groups: _re_return: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH + text: str = "" + globals: set[str] = dataclasses.field(default_factory=set) def __post_init__(self) -> None: # Split the code into a linked list of basic blocks. A basic block is an # optional label, followed by zero or more non-instruction lines, # followed by zero or more instruction lines (only the last of which may # be a branch, jump, or return): - text = self._preprocess(self.path.read_text()) + self.text = self._preprocess(self.path.read_text()) block = self._root - for line in text.splitlines(): + for line in self.text.splitlines(): # See if we need to start a new block: if match := self._re_label.match(line): # Label. New block: block.link = block = self._lookup_label(match["label"]) block.noninstructions.append(line) continue + if match := self.re_global.match(line): + self.globals.add(match["label"]) + block.noninstructions.append(line) + continue if self._re_noninstructions.match(line): if block.instructions: # Non-instruction lines. New block: @@ -152,16 +212,19 @@ def __post_init__(self) -> None: if block.target or not block.fallthrough: # Current block ends with a branch, jump, or return. New block: block.link = block = _Block() - block.instructions.append(line) - if match := self._re_branch.match(line): + inst = self._parse_instruction(line) + block.instructions.append(inst) + if inst.is_branch(): # A block ending in a branch has a target and fallthrough: - block.target = self._lookup_label(match["target"]) + assert inst.target is not None + block.target = self._lookup_label(inst.target) assert block.fallthrough - elif match := self._re_jump.match(line): + elif inst.kind == InstructionKind.JUMP: # A block ending in a jump has a target and no fallthrough: - block.target = self._lookup_label(match["target"]) + assert inst.target is not None + block.target = self._lookup_label(inst.target) block.fallthrough = False - elif self._re_return.match(line): + elif inst.kind == InstructionKind.RETURN: # A block ending in a return has no target and fallthrough: assert not block.target block.fallthrough = False @@ -174,39 +237,47 @@ def _preprocess(self, text: str) -> str: continue_label = f"{self.label_prefix}_JIT_CONTINUE" return re.sub(continue_symbol, continue_label, text) - @classmethod - def _invert_branch(cls, line: str, target: str) -> str | None: - match = cls._re_branch.match(line) - assert match - inverted_reloc = cls._branches.get(match["instruction"]) + def _parse_instruction(self, line: str) -> Instruction: + target = None + if match := self._re_branch.match(line): + target = match["target"] + name = match["instruction"] + if name in self._short_branches: + kind = InstructionKind.SHORT_BRANCH + else: + kind = InstructionKind.LONG_BRANCH + elif match := self._re_jump.match(line): + target = match["target"] + name = line[: -len(target)].strip() + kind = InstructionKind.JUMP + elif match := self._re_return.match(line): + name = line + kind = InstructionKind.RETURN + else: + name, *_ = line.split(" ") + kind = InstructionKind.OTHER + return Instruction(kind, name, line, target) + + def _invert_branch(self, inst: Instruction, target: str) -> Instruction | None: + assert inst.is_branch() + if inst.kind == InstructionKind.SHORT_BRANCH and self._is_far_target(target): + return None + inverted_reloc = self._branches.get(inst.name) if inverted_reloc is None: return None inverted = inverted_reloc[0] if not inverted: return None - (a, b), (c, d) = match.span("instruction"), match.span("target") - # Before: - # je FOO - # After: - # jne BAR - return "".join([line[:a], inverted, line[b:c], target, line[d:]]) - - @classmethod - def _update_jump(cls, line: str, target: str) -> str: - match = cls._re_jump.match(line) - assert match - a, b = match.span("target") - # Before: - # jmp FOO - # After: - # jmp BAR - return "".join([line[:a], target, line[b:]]) + return inst.update_name_and_target(inverted, target) def _lookup_label(self, label: str) -> _Block: if label not in self._labels: self._labels[label] = _Block(label) return self._labels[label] + def _is_far_target(self, label: str) -> bool: + return not label.startswith(self.label_prefix) + def _blocks(self) -> typing.Generator[_Block, None, None]: block: _Block | None = self._root while block: @@ -214,7 +285,7 @@ def _blocks(self) -> typing.Generator[_Block, None, None]: block = block.link def _body(self) -> str: - lines = [] + lines = ["#" + line for line in self.text.splitlines()] hot = True for block in self._blocks(): if hot != block.hot: @@ -222,7 +293,8 @@ def _body(self) -> str: # Make it easy to tell at a glance where cold code is: lines.append(f"# JIT: {'HOT' if hot else 'COLD'} ".ljust(80, "#")) lines.extend(block.noninstructions) - lines.extend(block.instructions) + for inst in block.instructions: + lines.append(inst.text) return "\n".join(lines) def _predecessors(self, block: _Block) -> typing.Generator[_Block, None, None]: @@ -289,8 +361,8 @@ def _invert_hot_branches(self) -> None: if inverted is None: continue branch.instructions[-1] = inverted - jump.instructions[-1] = self._update_jump( - jump.instructions[-1], branch.target.label + jump.instructions[-1] = jump.instructions[-1].update_target( + branch.target.label ) branch.target, jump.target = jump.target, branch.target jump.hot = True @@ -299,49 +371,106 @@ def _remove_redundant_jumps(self) -> None: # Zero-length jumps can be introduced by _insert_continue_label and # _invert_hot_branches: for block in self._blocks(): + target = block.target + if target is None: + continue + target = target.resolve() # Before: # jmp FOO # FOO: # After: # FOO: - if ( - block.target - and block.link - and block.target.resolve() is block.link.resolve() - ): + if block.link and target is block.link.resolve(): block.target = None block.fallthrough = True block.instructions.pop() + # Before: + # br ? FOO: + # ... + # FOO: + # jump BAR + # After: + # br cond BAR + # ... + elif ( + len(target.instructions) == 1 + and target.instructions[0].kind == InstructionKind.JUMP + ): + assert target.target is not None + assert target.target.label is not None + if block.instructions[ + -1 + ].kind == InstructionKind.SHORT_BRANCH and self._is_far_target( + target.target.label + ): + continue + block.target = target.target + block.instructions[-1] = block.instructions[-1].update_target( + target.target.label + ) + + def _find_live_blocks(self) -> set[_Block]: + live: set[_Block] = set() + # Externally reachable blocks are live + todo: set[_Block] = {b for b in self._blocks() if b.label in self.globals} + while todo: + block = todo.pop() + live.add(block) + if block.fallthrough: + next = block.link + if next is not None and next not in live: + todo.add(next) + next = block.target + if next is not None and next not in live: + todo.add(next) + return live + + def _remove_unreachable(self) -> None: + live = self._find_live_blocks() + continuation = self._lookup_label(f"{self.label_prefix}_JIT_CONTINUE") + # Keep blocks after continuation as they may contain data and + # metadata that the assembler needs + prev: _Block | None = None + block = self._root + while block is not continuation: + next = block.link + assert next is not None + if not block in live and prev: + prev.link = next + else: + prev = block + block = next + assert prev.link is block def _fixup_external_labels(self) -> None: if self._supports_external_relocations: # Nothing to fix up return - for block in self._blocks(): + for index, block in enumerate(self._blocks()): if block.target and block.fallthrough: branch = block.instructions[-1] - match = self._re_branch.match(branch) - assert match is not None - target = match["target"] - reloc = self._branches[match["instruction"]][1] - if reloc is not None and not target.startswith(self.label_prefix): + assert branch.is_branch() + target = branch.target + assert target is not None + reloc = self._branches[branch.name][1] + if reloc is not None and self._is_far_target(target): name = target[len(self.symbol_prefix) :] - block.instructions[-1] = ( - f"// target='{target}' prefix='{self.label_prefix}'" - ) - block.instructions.append( - f"{self.symbol_prefix}{reloc}_JIT_RELOCATION_{name}:" + label = f"{self.symbol_prefix}{reloc}_JIT_RELOCATION_{name}_JIT_RELOCATION_{index}:" + block.instructions[-1] = Instruction( + InstructionKind.OTHER, "", label, None ) - a, b = match.span("target") - branch = "".join([branch[:a], "0", branch[b:]]) - block.instructions.append(branch) + block.instructions.append(branch.update_target("0")) def run(self) -> None: """Run this optimizer.""" self._insert_continue_label() self._mark_hot_blocks() - self._invert_hot_branches() - self._remove_redundant_jumps() + # Removing branches can expose opportunities for more branch removal. + # Repeat a few times. 2 would probably do, but it's fast enough with 4. + for _ in range(4): + self._invert_hot_branches() + self._remove_redundant_jumps() + self._remove_unreachable() self._fixup_external_labels() self.path.write_text(self._body()) @@ -350,10 +479,12 @@ class OptimizerAArch64(Optimizer): # pylint: disable = too-few-public-methods """aarch64-pc-windows-msvc/aarch64-apple-darwin/aarch64-unknown-linux-gnu""" _branches = _AARCH64_BRANCHES + _short_branches = _AARCH64_SHORT_BRANCHES # Mach-O does not support the 19 bit branch locations needed for branch reordering _supports_external_relocations = False + _branch_patterns = [name.replace(".", r"\.") for name in _AARCH64_BRANCHES] _re_branch = re.compile( - rf"\s*(?P{'|'.join(_AARCH64_BRANCHES)})\s+(.+,\s+)*(?P[\w.]+)" + rf"\s*(?P{'|'.join(_branch_patterns)})\s+(.+,\s+)*(?P[\w.]+)" ) # https://developer.arm.com/documentation/ddi0602/2025-03/Base-Instructions/B--Branch- @@ -366,6 +497,7 @@ class OptimizerX86(Optimizer): # pylint: disable = too-few-public-methods """i686-pc-windows-msvc/x86_64-apple-darwin/x86_64-unknown-linux-gnu""" _branches = _X86_BRANCHES + _short_branches = {} _re_branch = re.compile( rf"\s*(?P{'|'.join(_X86_BRANCHES)})\s+(?P[\w.]+)" ) diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index e717365b6b9785..5c45ab930a4ac4 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -226,7 +226,7 @@ def convert_labels_to_relocations(self) -> None: for name, hole_plus in self.symbols.items(): if isinstance(name, str) and "_JIT_RELOCATION_" in name: _, offset = hole_plus - reloc, target = name.split("_JIT_RELOCATION_") + reloc, target, _ = name.split("_JIT_RELOCATION_") value, symbol = symbol_to_value(target) hole = Hole( int(offset), typing.cast(_schema.HoleKind, reloc), value, symbol, 0 diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 4c188d74a68602..adb8a8d8ecb8a1 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -46,6 +46,7 @@ class _Target(typing.Generic[_S, _R]): optimizer: type[_optimizers.Optimizer] = _optimizers.Optimizer label_prefix: typing.ClassVar[str] symbol_prefix: typing.ClassVar[str] + re_global: typing.ClassVar[re.Pattern[str]] stable: bool = False debug: bool = False verbose: bool = False @@ -180,7 +181,10 @@ async def _compile( "clang", args_s, echo=self.verbose, llvm_version=self.llvm_version ) self.optimizer( - s, label_prefix=self.label_prefix, symbol_prefix=self.symbol_prefix + s, + label_prefix=self.label_prefix, + symbol_prefix=self.symbol_prefix, + re_global=self.re_global, ).run() args_o = [f"--target={self.triple}", "-c", "-o", f"{o}", f"{s}"] await _llvm.run( @@ -355,12 +359,14 @@ class _COFF32(_COFF): # These mangle like Mach-O and other "older" formats: label_prefix = "L" symbol_prefix = "_" + re_global = re.compile(r'\s*\.def\s+(?P