diff --git a/.cspell.dict/cpython.txt b/.cspell.dict/cpython.txt index b8081e25a9b..a9fbc8f4318 100644 --- a/.cspell.dict/cpython.txt +++ b/.cspell.dict/cpython.txt @@ -127,8 +127,8 @@ NEWLOCALS newsemlockobject nfrees nkwargs -nlocalsplus nkwelts +nlocalsplus Nondescriptor noninteger nops @@ -154,12 +154,14 @@ prec preinitialized pybuilddir pycore +pyinner pydecimal Pyfunc pylifecycle pymain pyrepl PYTHONTRACEMALLOC +PYTHONUTF8 pythonw PYTHREAD_NAME releasebuffer @@ -171,9 +173,11 @@ saveall scls setdict setfunc +setprofileallthreads SETREF setresult setslice +settraceallthreads SLOTDEFINED SMALLBUF SOABI @@ -190,8 +194,10 @@ subparams subscr sval swappedbytes +sysdict templatelib testconsole +threadstate ticketer tmptype tok_oldval diff --git a/.cspell.dict/rust-more.txt b/.cspell.dict/rust-more.txt index c3ebd61833a..c4457723c6c 100644 --- a/.cspell.dict/rust-more.txt +++ b/.cspell.dict/rust-more.txt @@ -5,7 +5,9 @@ biguint bindgen bitand bitflags +bitflagset bitor +bitvec bitxor bstr byteorder @@ -58,6 +60,7 @@ powi prepended punct replacen +retag rmatch rposition rsplitn @@ -89,5 +92,3 @@ widestring winapi winresource winsock -bitvec -Bitvec diff --git a/.cspell.json b/.cspell.json index e2b1d86aaeb..07fe948c5bf 100644 --- a/.cspell.json +++ b/.cspell.json @@ -152,11 +152,6 @@ "IFEXEC", // "stat" "FIRMLINK", - // CPython internal names - "PYTHONUTF", - "sysdict", - "settraceallthreads", - "setprofileallthreads" ], // flagWords - list of words to be always considered incorrect "flagWords": [ diff --git a/.github/actions/install-linux-deps/action.yml b/.github/actions/install-linux-deps/action.yml new file mode 100644 index 00000000000..7900060fb29 --- /dev/null +++ b/.github/actions/install-linux-deps/action.yml @@ -0,0 +1,49 @@ +# This action installs a few dependencies necessary to build RustPython on Linux. +# It can be configured depending on which libraries are needed: +# +# ``` +# - uses: ./.github/actions/install-linux-deps +# with: +# gcc-multilib: true +# musl-tools: false +# ``` +# +# See the `inputs` section for all options and their defaults. Note that you must checkout the +# repository before you can use this action. +# +# This action will only install dependencies when the current operating system is Linux. It will do +# nothing on any other OS (macOS, Windows). + +name: Install Linux dependencies +description: Installs the dependencies necessary to build RustPython on Linux. +inputs: + gcc-multilib: + description: Install gcc-multilib (gcc-multilib) + required: false + default: "false" + musl-tools: + description: Install musl-tools (musl-tools) + required: false + default: "false" + gcc-aarch64-linux-gnu: + description: Install gcc-aarch64-linux-gnu (gcc-aarch64-linux-gnu) + required: false + default: "false" + clang: + description: Install clang (clang) + required: false + default: "false" +runs: + using: composite + steps: + - name: Install Linux dependencies + shell: bash + if: ${{ runner.os == 'Linux' }} + run: > + sudo apt-get update + + sudo apt-get install --no-install-recommends + ${{ fromJSON(inputs.gcc-multilib) && 'gcc-multilib' || '' }} + ${{ fromJSON(inputs.musl-tools) && 'musl-tools' || '' }} + ${{ fromJSON(inputs.clang) && 'clang' || '' }} + ${{ fromJSON(inputs.gcc-aarch64-linux-gnu) && 'gcc-aarch64-linux-gnu linux-libc-dev-arm64-cross libc6-dev-arm64-cross' || '' }} diff --git a/.github/actions/install-macos-deps/action.yml b/.github/actions/install-macos-deps/action.yml new file mode 100644 index 00000000000..46abef197a4 --- /dev/null +++ b/.github/actions/install-macos-deps/action.yml @@ -0,0 +1,47 @@ +# This action installs a few dependencies necessary to build RustPython on macOS. By default it installs +# autoconf, automake and libtool, but can be configured depending on which libraries are needed: +# +# ``` +# - uses: ./.github/actions/install-macos-deps +# with: +# openssl: true +# libtool: false +# ``` +# +# See the `inputs` section for all options and their defaults. Note that you must checkout the +# repository before you can use this action. +# +# This action will only install dependencies when the current operating system is macOS. It will do +# nothing on any other OS (Linux, Windows). + +name: Install macOS dependencies +description: Installs the dependencies necessary to build RustPython on macOS. +inputs: + autoconf: + description: Install autoconf (autoconf) + required: false + default: "true" + automake: + description: Install automake (automake) + required: false + default: "true" + libtool: + description: Install libtool (libtool) + required: false + default: "true" + openssl: + description: Install openssl (openssl@3) + required: false + default: "false" +runs: + using: composite + steps: + - name: Install macOS dependencies + shell: bash + if: ${{ runner.os == 'macOS' }} + run: > + brew install + ${{ fromJSON(inputs.autoconf) && 'autoconf' || '' }} + ${{ fromJSON(inputs.automake) && 'automake' || '' }} + ${{ fromJSON(inputs.libtool) && 'libtool' || '' }} + ${{ fromJSON(inputs.openssl) && 'openssl@3' || '' }} diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a58490666c7..15b4997cfcf 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -129,25 +129,20 @@ jobs: os: [macos-latest, ubuntu-latest, windows-2025] fail-fast: false steps: - - uses: actions/checkout@v6.0.2 + - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable with: components: clippy - uses: Swatinem/rust-cache@v2 - - name: Set up the Mac environment - run: brew install autoconf automake libtool - if: runner.os == 'macOS' + - name: Install macOS dependencies + uses: ./.github/actions/install-macos-deps - name: run clippy run: cargo clippy ${{ env.CARGO_ARGS }} --workspace --all-targets ${{ env.WORKSPACE_EXCLUDES }} -- -Dwarnings - name: run rust tests run: cargo test --workspace ${{ env.WORKSPACE_EXCLUDES }} --verbose --features threading ${{ env.CARGO_ARGS }} - if: runner.os != 'macOS' - - name: run rust tests - run: cargo test --workspace ${{ env.WORKSPACE_EXCLUDES }} --exclude rustpython-jit --verbose --features threading ${{ env.CARGO_ARGS }} - if: runner.os == 'macOS' - name: check compilation without threading run: cargo check ${{ env.CARGO_ARGS }} @@ -189,94 +184,58 @@ jobs: PYTHONPATH: scripts if: runner.os == 'Linux' - - name: prepare Intel MacOS build - uses: dtolnay/rust-toolchain@stable - with: - target: x86_64-apple-darwin - if: runner.os == 'macOS' - - name: Check compilation for Intel MacOS - run: cargo check --target x86_64-apple-darwin - if: runner.os == 'macOS' - - name: prepare iOS build - uses: dtolnay/rust-toolchain@stable - with: - target: aarch64-apple-ios - if: runner.os == 'macOS' - - name: Check compilation for iOS - run: cargo check --target aarch64-apple-ios ${{ env.CARGO_ARGS_NO_SSL }} - if: runner.os == 'macOS' - - exotic_targets: + cargo_check: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} name: Ensure compilation on various targets - runs-on: ubuntu-latest - timeout-minutes: 30 + runs-on: ${{ matrix.os }} + strategy: + matrix: + include: + - os: ubuntu-latest + targets: + - aarch64-linux-android + - i686-unknown-linux-gnu + - i686-unknown-linux-musl + - wasm32-wasip2 + - x86_64-unknown-freebsd + dependencies: + gcc-multilib: true + musl-tools: true + - os: ubuntu-latest + targets: + - aarch64-unknown-linux-gnu + dependencies: + gcc-aarch64-linux-gnu: true # conflict with `gcc-multilib` + - os: macos-latest + targets: + - aarch64-apple-ios + - x86_64-apple-darwin + fail-fast: false steps: - - uses: actions/checkout@v6.0.2 - - uses: dtolnay/rust-toolchain@stable + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: - target: i686-unknown-linux-gnu + persist-credentials: false - - name: Install gcc-multilib and musl-tools - run: sudo apt-get update && sudo apt-get install gcc-multilib musl-tools - - name: Check compilation for x86 32bit - run: cargo check --target i686-unknown-linux-gnu ${{ env.CARGO_ARGS_NO_SSL }} + - uses: Swatinem/rust-cache@v2 + with: + prefix-key: v0-rust-${{ join(matrix.targets, '-') }} + + - name: Install dependencies + uses: ./.github/actions/install-linux-deps + with: ${{ matrix.dependencies || fromJSON('{}') }} - uses: dtolnay/rust-toolchain@stable with: - target: aarch64-linux-android + targets: ${{ join(matrix.targets, ',') }} - name: Setup Android NDK + if: ${{ contains(matrix.targets, 'aarch64-linux-android') }} id: setup-ndk uses: nttld/setup-ndk@v1 with: ndk-version: r27 add-to-path: true - - name: Check compilation for android - run: cargo check --target aarch64-linux-android ${{ env.CARGO_ARGS_NO_SSL }} - env: - CC_aarch64_linux_android: ${{ steps.setup-ndk.outputs.ndk-path }}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android24-clang - AR_aarch64_linux_android: ${{ steps.setup-ndk.outputs.ndk-path }}/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-ar - CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER: ${{ steps.setup-ndk.outputs.ndk-path }}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android24-clang - - - uses: dtolnay/rust-toolchain@stable - with: - target: aarch64-unknown-linux-gnu - - - name: Install gcc-aarch64-linux-gnu - run: sudo apt install gcc-aarch64-linux-gnu - - name: Check compilation for aarch64 linux gnu - run: cargo check --target aarch64-unknown-linux-gnu ${{ env.CARGO_ARGS_NO_SSL }} - - - uses: dtolnay/rust-toolchain@stable - with: - target: i686-unknown-linux-musl - - - name: Check compilation for musl - run: cargo check --target i686-unknown-linux-musl ${{ env.CARGO_ARGS_NO_SSL }} - - - uses: dtolnay/rust-toolchain@stable - with: - target: x86_64-unknown-freebsd - - - name: Check compilation for freebsd - run: cargo check --target x86_64-unknown-freebsd ${{ env.CARGO_ARGS_NO_SSL }} - - - uses: dtolnay/rust-toolchain@stable - with: - target: x86_64-unknown-freebsd - - - name: Check compilation for freeBSD - run: cargo check --target x86_64-unknown-freebsd ${{ env.CARGO_ARGS_NO_SSL }} - - - uses: dtolnay/rust-toolchain@stable - with: - target: wasm32-wasip2 - - - name: Check compilation for wasip2 - run: cargo check --target wasm32-wasip2 ${{ env.CARGO_ARGS_NO_SSL }} - # - name: Prepare repository for redox compilation # run: bash scripts/redox/uncomment-cargo.sh # - name: Check compilation for Redox @@ -285,6 +244,19 @@ jobs: # command: check # args: --ignore-rust-version + - name: Check compilation + run: | + for target in ${{ join(matrix.targets, ' ') }} + do + echo "::group::${target}" + cargo check --target $target ${{ env.CARGO_ARGS_NO_SSL }} + echo "::endgroup::" + done + env: + CC_aarch64_linux_android: ${{ steps.setup-ndk.outputs.ndk-path }}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android24-clang + AR_aarch64_linux_android: ${{ steps.setup-ndk.outputs.ndk-path }}/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-ar + CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER: ${{ steps.setup-ndk.outputs.ndk-path }}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android24-clang + snippets_cpython: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} env: @@ -293,27 +265,27 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [macos-latest, ubuntu-latest, windows-2025] + os: + - macos-latest + - ubuntu-latest + - windows-2025 fail-fast: false steps: - - uses: actions/checkout@v6.0.2 + - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 - uses: actions/setup-python@v6.2.0 with: python-version: ${{ env.PYTHON_VERSION }} - - name: Set up the Mac environment - run: brew install autoconf automake libtool openssl@3 - if: runner.os == 'macOS' - - name: build rustpython - run: cargo build --release --verbose --features=threading ${{ env.CARGO_ARGS }} - if: runner.os == 'macOS' - - name: build rustpython - run: cargo build --release --verbose --features=threading ${{ env.CARGO_ARGS }},jit - if: runner.os != 'macOS' - - uses: actions/setup-python@v6.2.0 + + - name: Install macOS dependencies + uses: ./.github/actions/install-macos-deps with: - python-version: ${{ env.PYTHON_VERSION }} + openssl: true + + - name: build rustpython + run: cargo build --release --verbose --features=threading,jit ${{ env.CARGO_ARGS }} + - name: run snippets run: python -m pip install -r requirements.txt && pytest -v working-directory: ./extra_tests @@ -445,20 +417,16 @@ jobs: run: | target/release/rustpython -m venv testvenv testvenv/bin/rustpython -m pip install wheel - - if: runner.os != 'macOS' - name: Check whats_left is not broken - shell: bash - run: python -I scripts/whats_left.py --no-default-features --features "$(sed -e 's/--[^ ]*//g' <<< "${{ env.CARGO_ARGS }}" | tr -d '[:space:]'),threading,jit" - - if: runner.os == 'macOS' # TODO fix jit on macOS - name: Check whats_left is not broken (macOS) + + - name: Check whats_left is not broken shell: bash - run: python -I scripts/whats_left.py --no-default-features --features "$(sed -e 's/--[^ ]*//g' <<< "${{ env.CARGO_ARGS }}" | tr -d '[:space:]'),threading" # no jit on macOS for now + run: python -I scripts/whats_left.py ${{ env.CARGO_ARGS }} --features jit lint: name: Lint Rust & Python code runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6.0.2 + - uses: actions/checkout@v6 - uses: actions/setup-python@v6.2.0 with: python-version: ${{ env.PYTHON_VERSION }} @@ -486,7 +454,7 @@ jobs: - name: Install ruff uses: astral-sh/ruff-action@4919ec5cf1f49eff0871dbcea0da843445b837e6 # v3.6.1 with: - version: "0.15.4" + version: "0.15.5" args: "--version" - run: ruff check --diff @@ -514,9 +482,9 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 30 env: - NIGHTLY_CHANNEL: nightly-2026-02-11 # https://github.com/rust-lang/miri/issues/4855 + NIGHTLY_CHANNEL: nightly steps: - - uses: actions/checkout@v6.0.2 + - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@master with: @@ -538,7 +506,7 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 30 steps: - - uses: actions/checkout@v6.0.2 + - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 @@ -601,7 +569,7 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 30 steps: - - uses: actions/checkout@v6.0.2 + - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable with: target: wasm32-wasip1 @@ -609,8 +577,12 @@ jobs: - uses: Swatinem/rust-cache@v2 - name: Setup Wasmer uses: wasmerio/setup-wasmer@v3 + - name: Install clang - run: sudo apt-get update && sudo apt-get install clang -y + uses: ./.github/actions/install-linux-deps + with: + clang: true + - name: build rustpython run: cargo build --release --target wasm32-wasip1 --features freeze-stdlib,stdlib --verbose - name: run snippets diff --git a/.github/workflows/cron-ci.yaml b/.github/workflows/cron-ci.yaml index f451984fb53..64a7d5c88e5 100644 --- a/.github/workflows/cron-ci.yaml +++ b/.github/workflows/cron-ci.yaml @@ -24,7 +24,7 @@ jobs: # Disable this scheduled job when running on a fork. if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} steps: - - uses: actions/checkout@v6.0.2 + - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable - uses: taiki-e/install-action@cargo-llvm-cov - uses: actions/setup-python@v6.2.0 @@ -53,7 +53,7 @@ jobs: # Disable this scheduled job when running on a fork. if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} steps: - - uses: actions/checkout@v6.0.2 + - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable - name: build rustpython run: cargo build --release --verbose @@ -85,7 +85,7 @@ jobs: # Disable this scheduled job when running on a fork. if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} steps: - - uses: actions/checkout@v6.0.2 + - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable - uses: actions/setup-python@v6.2.0 with: @@ -143,7 +143,7 @@ jobs: # Disable this scheduled job when running on a fork. if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} steps: - - uses: actions/checkout@v6.0.2 + - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable - uses: actions/setup-python@v6.2.0 with: diff --git a/.github/workflows/lib-deps-check.yaml b/.github/workflows/lib-deps-check.yaml index 550ba2f2529..4eed6b77b16 100644 --- a/.github/workflows/lib-deps-check.yaml +++ b/.github/workflows/lib-deps-check.yaml @@ -21,7 +21,7 @@ jobs: timeout-minutes: 10 steps: - name: Checkout base branch - uses: actions/checkout@v6.0.2 + uses: actions/checkout@v6 with: # Use base branch for scripts (security: don't run PR code with elevated permissions) ref: ${{ github.event.pull_request.base.ref }} diff --git a/.github/workflows/pr-auto-commit.yaml b/.github/workflows/pr-auto-commit.yaml deleted file mode 100644 index ceaa78ba28b..00000000000 --- a/.github/workflows/pr-auto-commit.yaml +++ /dev/null @@ -1,122 +0,0 @@ -name: Auto-format PR - -# This workflow triggers when a PR is opened/updated -on: - pull_request_target: - types: [opened, synchronize, reopened] - branches: - - main - - release - -concurrency: - group: auto-format-${{ github.event.pull_request.number }} - cancel-in-progress: true - -jobs: - auto_format: - permissions: - contents: write - pull-requests: write - runs-on: ubuntu-latest - timeout-minutes: 60 - steps: - - name: Checkout PR branch - uses: actions/checkout@v6.0.2 - with: - ref: ${{ github.event.pull_request.head.sha }} - repository: ${{ github.event.pull_request.head.repo.full_name }} - token: ${{ secrets.AUTO_COMMIT_PAT }} - fetch-depth: 0 - - - name: Setup Rust - uses: dtolnay/rust-toolchain@stable - with: - components: rustfmt - - - name: Configure git - run: | - git config user.name "github-actions[bot]" - git config user.email "github-actions[bot]@users.noreply.github.com" - echo "" > /tmp/committed_commands.txt - - - name: Run cargo fmt - run: | - echo "Running cargo fmt --all on PR #${{ github.event.pull_request.number }}" - cargo fmt --all - if [ -n "$(git status --porcelain)" ]; then - git add -u - git commit -m "Auto-format: cargo fmt --all" - echo "- \`cargo fmt --all\`" >> /tmp/committed_commands.txt - fi - - - name: Install ruff - uses: astral-sh/ruff-action@4919ec5cf1f49eff0871dbcea0da843445b837e6 # v3.6.1 - with: - version: "0.15.4" - args: "--version" - - - name: Run ruff format - run: | - ruff format - if [ -n "$(git status --porcelain)" ]; then - git add -u - git commit -m "Auto-format: ruff format" - echo "- \`ruff format\`" >> /tmp/committed_commands.txt - fi - - - name: Run ruff check import sorting - run: | - ruff check --select I --fix - if [ -n "$(git status --porcelain)" ]; then - git add -u - git commit -m "Auto-format: ruff check --select I --fix" - echo "- \`ruff check --select I --fix\`" >> /tmp/committed_commands.txt - fi - - - name: Run generate_opcode_metadata.py - run: | - python scripts/generate_opcode_metadata.py - if [ -n "$(git status --porcelain)" ]; then - git add -u - git commit -m "Auto-generate: generate_opcode_metadata.py" - echo "- \`python scripts/generate_opcode_metadata.py\`" >> /tmp/committed_commands.txt - fi - - - name: Check for changes - id: check-changes - run: | - if [ "$(git rev-parse HEAD)" != "${{ github.event.pull_request.head.sha }}" ]; then - echo "has_changes=true" >> $GITHUB_OUTPUT - else - echo "has_changes=false" >> $GITHUB_OUTPUT - fi - - - name: Push formatting changes - if: steps.check-changes.outputs.has_changes == 'true' - env: - HEAD_REF: ${{ github.event.pull_request.head.ref }} - run: | - git push origin "HEAD:${HEAD_REF}" - - - name: Read committed commands - id: committed-commands - if: steps.check-changes.outputs.has_changes == 'true' - run: | - echo "list<> $GITHUB_OUTPUT - cat /tmp/committed_commands.txt >> $GITHUB_OUTPUT - echo "EOF" >> $GITHUB_OUTPUT - - - name: Comment on PR - if: steps.check-changes.outputs.has_changes == 'true' - uses: marocchino/sticky-pull-request-comment@v2 - with: - number: ${{ github.event.pull_request.number }} - message: | - **Code has been automatically formatted** - - The code in this PR has been formatted using: - ${{ steps.committed-commands.outputs.list }} - Please pull the latest changes before pushing again: - ```bash - git pull origin ${{ github.event.pull_request.head.ref }} - ``` diff --git a/.github/workflows/pr-format.yaml b/.github/workflows/pr-format.yaml new file mode 100644 index 00000000000..e9d55bb40ee --- /dev/null +++ b/.github/workflows/pr-format.yaml @@ -0,0 +1,64 @@ +name: Format Check + +# This workflow triggers when a PR is opened/updated +# Posts inline suggestion comments instead of auto-committing +on: + pull_request: + types: [opened, synchronize, reopened] + branches: + - main + - release + +concurrency: + group: format-check-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + format_check: + permissions: + contents: read + pull-requests: write + runs-on: ubuntu-latest + timeout-minutes: 60 + steps: + - name: Checkout PR branch + uses: actions/checkout@v6 + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + + - name: Run cargo fmt + run: cargo fmt --all + + - name: Install ruff + uses: astral-sh/ruff-action@4919ec5cf1f49eff0871dbcea0da843445b837e6 # v3.6.1 + with: + version: "0.15.4" + args: "--version" + + - name: Run ruff format + run: ruff format + + - name: Run ruff check import sorting + run: ruff check --select I --fix + + - name: Run generate_opcode_metadata.py + run: python scripts/generate_opcode_metadata.py + + - name: Check for formatting changes + run: | + if ! git diff --exit-code; then + echo "::error::Formatting changes detected. Please run 'cargo fmt --all', 'ruff format', and 'ruff check --select I --fix' locally." + exit 1 + fi + + - name: Post formatting suggestions + if: failure() + uses: reviewdog/action-suggester@v1 + with: + tool_name: auto-format + github_token: ${{ secrets.GITHUB_TOKEN }} + level: warning + filter_mode: diff_context diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ab5f6e230f4..d640ac87a3b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -52,7 +52,7 @@ jobs: # target: aarch64-pc-windows-msvc fail-fast: false steps: - - uses: actions/checkout@v6.0.2 + - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable - uses: cargo-bins/cargo-binstall@main @@ -88,7 +88,7 @@ jobs: # Disable this scheduled job when running on a fork. if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} steps: - - uses: actions/checkout@v6.0.2 + - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable with: targets: wasm32-wasip1 @@ -139,7 +139,7 @@ jobs: if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} needs: [build, build-wasm] steps: - - uses: actions/checkout@v6.0.2 + - uses: actions/checkout@v6 - name: Download Binary Artifacts uses: actions/download-artifact@v8.0.0 diff --git a/.github/workflows/upgrade-pylib.lock.yml b/.github/workflows/upgrade-pylib.lock.yml index 32aa8743ff7..06b4d12b42e 100644 --- a/.github/workflows/upgrade-pylib.lock.yml +++ b/.github/workflows/upgrade-pylib.lock.yml @@ -58,7 +58,7 @@ jobs: comment_repo: "" steps: - name: Setup Scripts - uses: github/gh-aw/actions/setup@88319be75ab1adc60640307a10e5cf04b3deff1e # v0.51.5 + uses: github/gh-aw/actions/setup@f1073c5498ee46fec1530555a7c953445417c69b # v0.56.2 with: destination: /opt/gh-aw/actions - name: Check workflow file timestamps @@ -99,7 +99,7 @@ jobs: secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }} steps: - name: Setup Scripts - uses: github/gh-aw/actions/setup@88319be75ab1adc60640307a10e5cf04b3deff1e # v0.51.5 + uses: github/gh-aw/actions/setup@f1073c5498ee46fec1530555a7c953445417c69b # v0.56.2 with: destination: /opt/gh-aw/actions - name: Checkout repository @@ -804,7 +804,7 @@ jobs: total_count: ${{ steps.missing_tool.outputs.total_count }} steps: - name: Setup Scripts - uses: github/gh-aw/actions/setup@88319be75ab1adc60640307a10e5cf04b3deff1e # v0.51.5 + uses: github/gh-aw/actions/setup@f1073c5498ee46fec1530555a7c953445417c69b # v0.56.2 with: destination: /opt/gh-aw/actions - name: Download agent output artifact @@ -925,7 +925,7 @@ jobs: success: ${{ steps.parse_results.outputs.success }} steps: - name: Setup Scripts - uses: github/gh-aw/actions/setup@88319be75ab1adc60640307a10e5cf04b3deff1e # v0.51.5 + uses: github/gh-aw/actions/setup@f1073c5498ee46fec1530555a7c953445417c69b # v0.56.2 with: destination: /opt/gh-aw/actions - name: Download agent artifacts @@ -1037,7 +1037,7 @@ jobs: process_safe_outputs_temporary_id_map: ${{ steps.process_safe_outputs.outputs.temporary_id_map }} steps: - name: Setup Scripts - uses: github/gh-aw/actions/setup@88319be75ab1adc60640307a10e5cf04b3deff1e # v0.51.5 + uses: github/gh-aw/actions/setup@f1073c5498ee46fec1530555a7c953445417c69b # v0.56.2 with: destination: /opt/gh-aw/actions - name: Download agent output artifact diff --git a/Cargo.lock b/Cargo.lock index e2a2f05a733..68adc9d682c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -249,9 +249,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-lc-fips-sys" -version = "0.13.11" +version = "0.13.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df6ea8e07e2df15b9f09f2ac5ee2977369b06d116f0c4eb5fa4ad443b73c7f53" +checksum = "5ed8cd42adddefbdb8507fb7443fa9b666631078616b78f70ed22117b5c27d90" dependencies = [ "bindgen 0.72.1", "cc", @@ -349,6 +349,18 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +[[package]] +name = "bitflagset" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64b6ee310aa7af14142c8c9121775774ff601ae055ed98ba7fac96098bcde1b9" +dependencies = [ + "num-integer", + "num-traits", + "radium", + "ref-cast", +] + [[package]] name = "blake2" version = "0.10.6" @@ -1527,9 +1539,9 @@ dependencies = [ [[package]] name = "insta" -version = "1.46.1" +version = "1.46.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "248b42847813a1550dafd15296fd9748c651d0c32194559dbc05d804d54b21e8" +checksum = "e82db8c87c7f1ccecb34ce0c24399b8a73081427f3c7c50a5d597925356115e4" dependencies = [ "console", "once_cell", @@ -1721,9 +1733,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.182" +version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] name = "libffi" @@ -1970,9 +1982,9 @@ checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "memmap2" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" dependencies = [ "libc", ] @@ -2168,9 +2180,9 @@ checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" [[package]] name = "openssl" -version = "0.10.75" +version = "0.10.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" +checksum = "951c002c75e16ea2c65b8c7e4d3d51d5530d8dfa7d060b4776828c88cfb18ecf" dependencies = [ "bitflags 2.11.0", "cfg-if", @@ -2209,9 +2221,9 @@ dependencies = [ [[package]] name = "openssl-sys" -version = "0.9.111" +version = "0.9.112" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" +checksum = "57d55af3b3e226502be1526dfdba67ab0e9c96fc293004e79576b2b9edb0dbdb" dependencies = [ "cc", "libc", @@ -2258,8 +2270,7 @@ dependencies = [ [[package]] name = "parking_lot_core" version = "0.9.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +source = "git+https://github.com/youknowone/parking_lot?branch=rustpython#4392edbe879acc9c0dd94eda53d2205d3ab912c9" dependencies = [ "cfg-if", "libc", @@ -2611,9 +2622,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.44" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -2770,6 +2781,26 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "ref-cast" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "regalloc2" version = "0.13.5" @@ -2863,7 +2894,7 @@ dependencies = [ [[package]] name = "ruff_python_ast" version = "0.0.0" -source = "git+https://github.com/astral-sh/ruff.git?rev=f14edd8661e2803254f89265548c7487f47a09f6#f14edd8661e2803254f89265548c7487f47a09f6" +source = "git+https://github.com/astral-sh/ruff.git?rev=5e4a3d9c3b381df20f6a52caef0f56ed0ebc74be#5e4a3d9c3b381df20f6a52caef0f56ed0ebc74be" dependencies = [ "aho-corasick", "bitflags 2.11.0", @@ -2881,7 +2912,7 @@ dependencies = [ [[package]] name = "ruff_python_parser" version = "0.0.0" -source = "git+https://github.com/astral-sh/ruff.git?rev=f14edd8661e2803254f89265548c7487f47a09f6#f14edd8661e2803254f89265548c7487f47a09f6" +source = "git+https://github.com/astral-sh/ruff.git?rev=5e4a3d9c3b381df20f6a52caef0f56ed0ebc74be#5e4a3d9c3b381df20f6a52caef0f56ed0ebc74be" dependencies = [ "bitflags 2.11.0", "bstr", @@ -2901,7 +2932,7 @@ dependencies = [ [[package]] name = "ruff_python_trivia" version = "0.0.0" -source = "git+https://github.com/astral-sh/ruff.git?rev=f14edd8661e2803254f89265548c7487f47a09f6#f14edd8661e2803254f89265548c7487f47a09f6" +source = "git+https://github.com/astral-sh/ruff.git?rev=5e4a3d9c3b381df20f6a52caef0f56ed0ebc74be#5e4a3d9c3b381df20f6a52caef0f56ed0ebc74be" dependencies = [ "itertools 0.14.0", "ruff_source_file", @@ -2912,7 +2943,7 @@ dependencies = [ [[package]] name = "ruff_source_file" version = "0.0.0" -source = "git+https://github.com/astral-sh/ruff.git?rev=f14edd8661e2803254f89265548c7487f47a09f6#f14edd8661e2803254f89265548c7487f47a09f6" +source = "git+https://github.com/astral-sh/ruff.git?rev=5e4a3d9c3b381df20f6a52caef0f56ed0ebc74be#5e4a3d9c3b381df20f6a52caef0f56ed0ebc74be" dependencies = [ "memchr", "ruff_text_size", @@ -2921,7 +2952,7 @@ dependencies = [ [[package]] name = "ruff_text_size" version = "0.0.0" -source = "git+https://github.com/astral-sh/ruff.git?rev=f14edd8661e2803254f89265548c7487f47a09f6#f14edd8661e2803254f89265548c7487f47a09f6" +source = "git+https://github.com/astral-sh/ruff.git?rev=5e4a3d9c3b381df20f6a52caef0f56ed0ebc74be#5e4a3d9c3b381df20f6a52caef0f56ed0ebc74be" dependencies = [ "get-size2", ] @@ -2956,9 +2987,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.36" +version = "0.23.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" dependencies = [ "aws-lc-rs", "once_cell", @@ -3129,6 +3160,7 @@ name = "rustpython-compiler-core" version = "0.4.0" dependencies = [ "bitflags 2.11.0", + "bitflagset", "itertools 0.14.0", "lz4_flex", "malachite-bigint", @@ -3283,6 +3315,10 @@ dependencies = [ "pkcs8", "pymath", "rand_core 0.9.5", + "ruff_python_ast", + "ruff_python_parser", + "ruff_source_file", + "ruff_text_size", "rustix", "rustls", "rustls-native-certs", @@ -3706,12 +3742,12 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "socket2" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -3762,9 +3798,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.114" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -4281,9 +4317,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.21.0" +version = "1.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" +checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" dependencies = [ "atomic", "js-sys", diff --git a/Cargo.toml b/Cargo.toml index 664340c23cf..2f720de968a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -101,6 +101,7 @@ opt-level = 3 lto = "thin" [patch.crates-io] +parking_lot_core = { git = "https://github.com/youknowone/parking_lot", branch = "rustpython" } # REDOX START, Uncomment when you want to compile/check with redoxer # REDOX END @@ -155,17 +156,18 @@ rustpython-sre_engine = { path = "crates/sre_engine", version = "0.4.0" } rustpython-wtf8 = { path = "crates/wtf8", version = "0.4.0" } rustpython-doc = { path = "crates/doc", version = "0.4.0" } -# Ruff tag 0.15.4 is based on commit f14edd8661e2803254f89265548c7487f47a09f6 +# Ruff tag 0.15.5 is based on commit 5e4a3d9c3b381df20f6a52caef0f56ed0ebc74be # at the time of this capture. We use the commit hash to ensure reproducible builds. -ruff_python_parser = { git = "https://github.com/astral-sh/ruff.git", rev = "f14edd8661e2803254f89265548c7487f47a09f6" } -ruff_python_ast = { git = "https://github.com/astral-sh/ruff.git", rev = "f14edd8661e2803254f89265548c7487f47a09f6" } -ruff_text_size = { git = "https://github.com/astral-sh/ruff.git", rev = "f14edd8661e2803254f89265548c7487f47a09f6" } -ruff_source_file = { git = "https://github.com/astral-sh/ruff.git", rev = "f14edd8661e2803254f89265548c7487f47a09f6" } +ruff_python_parser = { git = "https://github.com/astral-sh/ruff.git", rev = "5e4a3d9c3b381df20f6a52caef0f56ed0ebc74be" } +ruff_python_ast = { git = "https://github.com/astral-sh/ruff.git", rev = "5e4a3d9c3b381df20f6a52caef0f56ed0ebc74be" } +ruff_text_size = { git = "https://github.com/astral-sh/ruff.git", rev = "5e4a3d9c3b381df20f6a52caef0f56ed0ebc74be" } +ruff_source_file = { git = "https://github.com/astral-sh/ruff.git", rev = "5e4a3d9c3b381df20f6a52caef0f56ed0ebc74be" } phf = { version = "0.13.1", default-features = false, features = ["macros"]} ahash = "0.8.12" ascii = "1.1" bitflags = "2.11.0" +bitflagset = "0.0.3" bstr = "1" bytes = "1.11.1" cfg-if = "1.0" @@ -182,7 +184,7 @@ insta = "1.46" itertools = "0.14.0" is-macro = "0.3.7" junction = "1.4.2" -libc = "0.2.182" +libc = "0.2.183" libffi = "5" log = "0.4.29" nix = { version = "0.30", features = ["fs", "user", "process", "term", "time", "signal", "ioctl", "socket", "sched", "zerocopy", "dir", "hostname", "net", "poll"] } @@ -199,7 +201,7 @@ parking_lot = "0.12.3" paste = "1.0.15" proc-macro2 = "1.0.105" pymath = { version = "0.1.5", features = ["mul_add", "malachite-bigint", "complex"] } -quote = "1.0.44" +quote = "1.0.45" radium = "1.1.1" rand = "0.9" rand_core = { version = "0.9", features = ["os_rng"] } diff --git a/Lib/locale.py b/Lib/locale.py index db6d0abb26b..dfedc6386cb 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -13,7 +13,6 @@ import sys import encodings import encodings.aliases -import re import _collections_abc from builtins import str as _builtin_str import functools @@ -177,8 +176,7 @@ def _strip_padding(s, amount): amount -= 1 return s[lpos:rpos+1] -_percent_re = re.compile(r'%(?:\((?P.*?)\))?' - r'(?P[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]') +_percent_re = None def _format(percent, value, grouping=False, monetary=False, *additional): if additional: @@ -217,6 +215,13 @@ def format_string(f, val, grouping=False, monetary=False): Grouping is applied if the third parameter is true. Conversion uses monetary thousands separator and grouping strings if forth parameter monetary is true.""" + global _percent_re + if _percent_re is None: + import re + + _percent_re = re.compile(r'%(?:\((?P.*?)\))?(?P[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]') + percents = list(_percent_re.finditer(f)) new_f = _percent_re.sub('%s', f) diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py new file mode 100644 index 00000000000..ac478ee7f51 --- /dev/null +++ b/Lib/modulefinder.py @@ -0,0 +1,671 @@ +"""Find modules used by a script, using introspection.""" + +import dis +import importlib._bootstrap_external +import importlib.machinery +import marshal +import os +import io +import sys + +# Old imp constants: + +_SEARCH_ERROR = 0 +_PY_SOURCE = 1 +_PY_COMPILED = 2 +_C_EXTENSION = 3 +_PKG_DIRECTORY = 5 +_C_BUILTIN = 6 +_PY_FROZEN = 7 + +# Modulefinder does a good job at simulating Python's, but it can not +# handle __path__ modifications packages make at runtime. Therefore there +# is a mechanism whereby you can register extra paths in this map for a +# package, and it will be honored. + +# Note this is a mapping is lists of paths. +packagePathMap = {} + +# A Public interface +def AddPackagePath(packagename, path): + packagePathMap.setdefault(packagename, []).append(path) + +replacePackageMap = {} + +# This ReplacePackage mechanism allows modulefinder to work around +# situations in which a package injects itself under the name +# of another package into sys.modules at runtime by calling +# ReplacePackage("real_package_name", "faked_package_name") +# before running ModuleFinder. + +def ReplacePackage(oldname, newname): + replacePackageMap[oldname] = newname + + +def _find_module(name, path=None): + """An importlib reimplementation of imp.find_module (for our purposes).""" + + # It's necessary to clear the caches for our Finder first, in case any + # modules are being added/deleted/modified at runtime. In particular, + # test_modulefinder.py changes file tree contents in a cache-breaking way: + + importlib.machinery.PathFinder.invalidate_caches() + + spec = importlib.machinery.PathFinder.find_spec(name, path) + + if spec is None: + raise ImportError("No module named {name!r}".format(name=name), name=name) + + # Some special cases: + + if spec.loader is importlib.machinery.BuiltinImporter: + return None, None, ("", "", _C_BUILTIN) + + if spec.loader is importlib.machinery.FrozenImporter: + return None, None, ("", "", _PY_FROZEN) + + file_path = spec.origin + + if spec.loader.is_package(name): + return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY) + + if isinstance(spec.loader, importlib.machinery.SourceFileLoader): + kind = _PY_SOURCE + + elif isinstance( + spec.loader, ( + importlib.machinery.ExtensionFileLoader, + importlib.machinery.AppleFrameworkLoader, + ) + ): + kind = _C_EXTENSION + + elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader): + kind = _PY_COMPILED + + else: # Should never happen. + return None, None, ("", "", _SEARCH_ERROR) + + file = io.open_code(file_path) + suffix = os.path.splitext(file_path)[-1] + + return file, file_path, (suffix, "rb", kind) + + +class Module: + + def __init__(self, name, file=None, path=None): + self.__name__ = name + self.__file__ = file + self.__path__ = path + self.__code__ = None + # The set of global names that are assigned to in the module. + # This includes those names imported through starimports of + # Python modules. + self.globalnames = {} + # The set of starimports this module did that could not be + # resolved, ie. a starimport from a non-Python module. + self.starimports = {} + + def __repr__(self): + s = "Module(%r" % (self.__name__,) + if self.__file__ is not None: + s = s + ", %r" % (self.__file__,) + if self.__path__ is not None: + s = s + ", %r" % (self.__path__,) + s = s + ")" + return s + +class ModuleFinder: + + def __init__(self, path=None, debug=0, excludes=None, replace_paths=None): + if path is None: + path = sys.path + self.path = path + self.modules = {} + self.badmodules = {} + self.debug = debug + self.indent = 0 + self.excludes = excludes if excludes is not None else [] + self.replace_paths = replace_paths if replace_paths is not None else [] + self.processed_paths = [] # Used in debugging only + + def msg(self, level, str, *args): + if level <= self.debug: + for i in range(self.indent): + print(" ", end=' ') + print(str, end=' ') + for arg in args: + print(repr(arg), end=' ') + print() + + def msgin(self, *args): + level = args[0] + if level <= self.debug: + self.indent = self.indent + 1 + self.msg(*args) + + def msgout(self, *args): + level = args[0] + if level <= self.debug: + self.indent = self.indent - 1 + self.msg(*args) + + def run_script(self, pathname): + self.msg(2, "run_script", pathname) + with io.open_code(pathname) as fp: + stuff = ("", "rb", _PY_SOURCE) + self.load_module('__main__', fp, pathname, stuff) + + def load_file(self, pathname): + dir, name = os.path.split(pathname) + name, ext = os.path.splitext(name) + with io.open_code(pathname) as fp: + stuff = (ext, "rb", _PY_SOURCE) + self.load_module(name, fp, pathname, stuff) + + def import_hook(self, name, caller=None, fromlist=None, level=-1): + self.msg(3, "import_hook", name, caller, fromlist, level) + parent = self.determine_parent(caller, level=level) + q, tail = self.find_head_package(parent, name) + m = self.load_tail(q, tail) + if not fromlist: + return q + if m.__path__: + self.ensure_fromlist(m, fromlist) + return None + + def determine_parent(self, caller, level=-1): + self.msgin(4, "determine_parent", caller, level) + if not caller or level == 0: + self.msgout(4, "determine_parent -> None") + return None + pname = caller.__name__ + if level >= 1: # relative import + if caller.__path__: + level -= 1 + if level == 0: + parent = self.modules[pname] + assert parent is caller + self.msgout(4, "determine_parent ->", parent) + return parent + if pname.count(".") < level: + raise ImportError("relative importpath too deep") + pname = ".".join(pname.split(".")[:-level]) + parent = self.modules[pname] + self.msgout(4, "determine_parent ->", parent) + return parent + if caller.__path__: + parent = self.modules[pname] + assert caller is parent + self.msgout(4, "determine_parent ->", parent) + return parent + if '.' in pname: + i = pname.rfind('.') + pname = pname[:i] + parent = self.modules[pname] + assert parent.__name__ == pname + self.msgout(4, "determine_parent ->", parent) + return parent + self.msgout(4, "determine_parent -> None") + return None + + def find_head_package(self, parent, name): + self.msgin(4, "find_head_package", parent, name) + if '.' in name: + i = name.find('.') + head = name[:i] + tail = name[i+1:] + else: + head = name + tail = "" + if parent: + qname = "%s.%s" % (parent.__name__, head) + else: + qname = head + q = self.import_module(head, qname, parent) + if q: + self.msgout(4, "find_head_package ->", (q, tail)) + return q, tail + if parent: + qname = head + parent = None + q = self.import_module(head, qname, parent) + if q: + self.msgout(4, "find_head_package ->", (q, tail)) + return q, tail + self.msgout(4, "raise ImportError: No module named", qname) + raise ImportError("No module named " + qname) + + def load_tail(self, q, tail): + self.msgin(4, "load_tail", q, tail) + m = q + while tail: + i = tail.find('.') + if i < 0: i = len(tail) + head, tail = tail[:i], tail[i+1:] + mname = "%s.%s" % (m.__name__, head) + m = self.import_module(head, mname, m) + if not m: + self.msgout(4, "raise ImportError: No module named", mname) + raise ImportError("No module named " + mname) + self.msgout(4, "load_tail ->", m) + return m + + def ensure_fromlist(self, m, fromlist, recursive=0): + self.msg(4, "ensure_fromlist", m, fromlist, recursive) + for sub in fromlist: + if sub == "*": + if not recursive: + all = self.find_all_submodules(m) + if all: + self.ensure_fromlist(m, all, 1) + elif not hasattr(m, sub): + subname = "%s.%s" % (m.__name__, sub) + submod = self.import_module(sub, subname, m) + if not submod: + raise ImportError("No module named " + subname) + + def find_all_submodules(self, m): + if not m.__path__: + return + modules = {} + # 'suffixes' used to be a list hardcoded to [".py", ".pyc"]. + # But we must also collect Python extension modules - although + # we cannot separate normal dlls from Python extensions. + suffixes = [] + suffixes += importlib.machinery.EXTENSION_SUFFIXES[:] + suffixes += importlib.machinery.SOURCE_SUFFIXES[:] + suffixes += importlib.machinery.BYTECODE_SUFFIXES[:] + for dir in m.__path__: + try: + names = os.listdir(dir) + except OSError: + self.msg(2, "can't list directory", dir) + continue + for name in names: + mod = None + for suff in suffixes: + n = len(suff) + if name[-n:] == suff: + mod = name[:-n] + break + if mod and mod != "__init__": + modules[mod] = mod + return modules.keys() + + def import_module(self, partname, fqname, parent): + self.msgin(3, "import_module", partname, fqname, parent) + try: + m = self.modules[fqname] + except KeyError: + pass + else: + self.msgout(3, "import_module ->", m) + return m + if fqname in self.badmodules: + self.msgout(3, "import_module -> None") + return None + if parent and parent.__path__ is None: + self.msgout(3, "import_module -> None") + return None + try: + fp, pathname, stuff = self.find_module(partname, + parent and parent.__path__, parent) + except ImportError: + self.msgout(3, "import_module ->", None) + return None + + try: + m = self.load_module(fqname, fp, pathname, stuff) + finally: + if fp: + fp.close() + if parent: + setattr(parent, partname, m) + self.msgout(3, "import_module ->", m) + return m + + def load_module(self, fqname, fp, pathname, file_info): + suffix, mode, type = file_info + self.msgin(2, "load_module", fqname, fp and "fp", pathname) + if type == _PKG_DIRECTORY: + m = self.load_package(fqname, pathname) + self.msgout(2, "load_module ->", m) + return m + if type == _PY_SOURCE: + co = compile(fp.read(), pathname, 'exec') + elif type == _PY_COMPILED: + try: + data = fp.read() + importlib._bootstrap_external._classify_pyc(data, fqname, {}) + except ImportError as exc: + self.msgout(2, "raise ImportError: " + str(exc), pathname) + raise + co = marshal.loads(memoryview(data)[16:]) + else: + co = None + m = self.add_module(fqname) + m.__file__ = pathname + if co: + if self.replace_paths: + co = self.replace_paths_in_code(co) + m.__code__ = co + self.scan_code(co, m) + self.msgout(2, "load_module ->", m) + return m + + def _add_badmodule(self, name, caller): + if name not in self.badmodules: + self.badmodules[name] = {} + if caller: + self.badmodules[name][caller.__name__] = 1 + else: + self.badmodules[name]["-"] = 1 + + def _safe_import_hook(self, name, caller, fromlist, level=-1): + # wrapper for self.import_hook() that won't raise ImportError + if name in self.badmodules: + self._add_badmodule(name, caller) + return + try: + self.import_hook(name, caller, level=level) + except ImportError as msg: + self.msg(2, "ImportError:", str(msg)) + self._add_badmodule(name, caller) + except SyntaxError as msg: + self.msg(2, "SyntaxError:", str(msg)) + self._add_badmodule(name, caller) + else: + if fromlist: + for sub in fromlist: + fullname = name + "." + sub + if fullname in self.badmodules: + self._add_badmodule(fullname, caller) + continue + try: + self.import_hook(name, caller, [sub], level=level) + except ImportError as msg: + self.msg(2, "ImportError:", str(msg)) + self._add_badmodule(fullname, caller) + + def scan_opcodes(self, co): + # Scan the code, and yield 'interesting' opcode combinations + for name in dis._find_store_names(co): + yield "store", (name,) + for name, level, fromlist in dis._find_imports(co): + if level == 0: # absolute import + yield "absolute_import", (fromlist, name) + else: # relative import + yield "relative_import", (level, fromlist, name) + + def scan_code(self, co, m): + code = co.co_code + scanner = self.scan_opcodes + for what, args in scanner(co): + if what == "store": + name, = args + m.globalnames[name] = 1 + elif what == "absolute_import": + fromlist, name = args + have_star = 0 + if fromlist is not None: + if "*" in fromlist: + have_star = 1 + fromlist = [f for f in fromlist if f != "*"] + self._safe_import_hook(name, m, fromlist, level=0) + if have_star: + # We've encountered an "import *". If it is a Python module, + # the code has already been parsed and we can suck out the + # global names. + mm = None + if m.__path__: + # At this point we don't know whether 'name' is a + # submodule of 'm' or a global module. Let's just try + # the full name first. + mm = self.modules.get(m.__name__ + "." + name) + if mm is None: + mm = self.modules.get(name) + if mm is not None: + m.globalnames.update(mm.globalnames) + m.starimports.update(mm.starimports) + if mm.__code__ is None: + m.starimports[name] = 1 + else: + m.starimports[name] = 1 + elif what == "relative_import": + level, fromlist, name = args + if name: + self._safe_import_hook(name, m, fromlist, level=level) + else: + parent = self.determine_parent(m, level=level) + self._safe_import_hook(parent.__name__, None, fromlist, level=0) + else: + # We don't expect anything else from the generator. + raise RuntimeError(what) + + for c in co.co_consts: + if isinstance(c, type(co)): + self.scan_code(c, m) + + def load_package(self, fqname, pathname): + self.msgin(2, "load_package", fqname, pathname) + newname = replacePackageMap.get(fqname) + if newname: + fqname = newname + m = self.add_module(fqname) + m.__file__ = pathname + m.__path__ = [pathname] + + # As per comment at top of file, simulate runtime __path__ additions. + m.__path__ = m.__path__ + packagePathMap.get(fqname, []) + + fp, buf, stuff = self.find_module("__init__", m.__path__) + try: + self.load_module(fqname, fp, buf, stuff) + self.msgout(2, "load_package ->", m) + return m + finally: + if fp: + fp.close() + + def add_module(self, fqname): + if fqname in self.modules: + return self.modules[fqname] + self.modules[fqname] = m = Module(fqname) + return m + + def find_module(self, name, path, parent=None): + if parent is not None: + # assert path is not None + fullname = parent.__name__+'.'+name + else: + fullname = name + if fullname in self.excludes: + self.msgout(3, "find_module -> Excluded", fullname) + raise ImportError(name) + + if path is None: + if name in sys.builtin_module_names: + return (None, None, ("", "", _C_BUILTIN)) + + path = self.path + + return _find_module(name, path) + + def report(self): + """Print a report to stdout, listing the found modules with their + paths, as well as modules that are missing, or seem to be missing. + """ + print() + print(" %-25s %s" % ("Name", "File")) + print(" %-25s %s" % ("----", "----")) + # Print modules found + keys = sorted(self.modules.keys()) + for key in keys: + m = self.modules[key] + if m.__path__: + print("P", end=' ') + else: + print("m", end=' ') + print("%-25s" % key, m.__file__ or "") + + # Print missing modules + missing, maybe = self.any_missing_maybe() + if missing: + print() + print("Missing modules:") + for name in missing: + mods = sorted(self.badmodules[name].keys()) + print("?", name, "imported from", ', '.join(mods)) + # Print modules that may be missing, but then again, maybe not... + if maybe: + print() + print("Submodules that appear to be missing, but could also be", end=' ') + print("global names in the parent package:") + for name in maybe: + mods = sorted(self.badmodules[name].keys()) + print("?", name, "imported from", ', '.join(mods)) + + def any_missing(self): + """Return a list of modules that appear to be missing. Use + any_missing_maybe() if you want to know which modules are + certain to be missing, and which *may* be missing. + """ + missing, maybe = self.any_missing_maybe() + return missing + maybe + + def any_missing_maybe(self): + """Return two lists, one with modules that are certainly missing + and one with modules that *may* be missing. The latter names could + either be submodules *or* just global names in the package. + + The reason it can't always be determined is that it's impossible to + tell which names are imported when "from module import *" is done + with an extension module, short of actually importing it. + """ + missing = [] + maybe = [] + for name in self.badmodules: + if name in self.excludes: + continue + i = name.rfind(".") + if i < 0: + missing.append(name) + continue + subname = name[i+1:] + pkgname = name[:i] + pkg = self.modules.get(pkgname) + if pkg is not None: + if pkgname in self.badmodules[name]: + # The package tried to import this module itself and + # failed. It's definitely missing. + missing.append(name) + elif subname in pkg.globalnames: + # It's a global in the package: definitely not missing. + pass + elif pkg.starimports: + # It could be missing, but the package did an "import *" + # from a non-Python module, so we simply can't be sure. + maybe.append(name) + else: + # It's not a global in the package, the package didn't + # do funny star imports, it's very likely to be missing. + # The symbol could be inserted into the package from the + # outside, but since that's not good style we simply list + # it missing. + missing.append(name) + else: + missing.append(name) + missing.sort() + maybe.sort() + return missing, maybe + + def replace_paths_in_code(self, co): + new_filename = original_filename = os.path.normpath(co.co_filename) + for f, r in self.replace_paths: + if original_filename.startswith(f): + new_filename = r + original_filename[len(f):] + break + + if self.debug and original_filename not in self.processed_paths: + if new_filename != original_filename: + self.msgout(2, "co_filename %r changed to %r" \ + % (original_filename,new_filename,)) + else: + self.msgout(2, "co_filename %r remains unchanged" \ + % (original_filename,)) + self.processed_paths.append(original_filename) + + consts = list(co.co_consts) + for i in range(len(consts)): + if isinstance(consts[i], type(co)): + consts[i] = self.replace_paths_in_code(consts[i]) + + return co.replace(co_consts=tuple(consts), co_filename=new_filename) + + +def test(): + # Parse command line + import getopt + try: + opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:") + except getopt.error as msg: + print(msg) + return + + # Process options + debug = 1 + domods = 0 + addpath = [] + exclude = [] + for o, a in opts: + if o == '-d': + debug = debug + 1 + if o == '-m': + domods = 1 + if o == '-p': + addpath = addpath + a.split(os.pathsep) + if o == '-q': + debug = 0 + if o == '-x': + exclude.append(a) + + # Provide default arguments + if not args: + script = "hello.py" + else: + script = args[0] + + # Set the path based on sys.path and the script directory + path = sys.path[:] + path[0] = os.path.dirname(script) + path = addpath + path + if debug > 1: + print("path:") + for item in path: + print(" ", repr(item)) + + # Create the module finder and turn its crank + mf = ModuleFinder(path, debug, exclude) + for arg in args[1:]: + if arg == '-m': + domods = 1 + continue + if domods: + if arg[-2:] == '.*': + mf.import_hook(arg[:-2], None, ["*"]) + else: + mf.import_hook(arg) + else: + mf.load_file(arg) + mf.run_script(script) + mf.report() + return mf # for -i debugging + + +if __name__ == '__main__': + try: + mf = test() + except KeyboardInterrupt: + print("\n[interrupted]") diff --git a/Lib/poplib.py b/Lib/poplib.py new file mode 100644 index 00000000000..4469bff44b4 --- /dev/null +++ b/Lib/poplib.py @@ -0,0 +1,477 @@ +"""A POP3 client class. + +Based on the J. Myers POP3 draft, Jan. 96 +""" + +# Author: David Ascher +# [heavily stealing from nntplib.py] +# Updated: Piers Lauder [Jul '97] +# String method conversion and test jig improvements by ESR, February 2001. +# Added the POP3_SSL class. Methods loosely based on IMAP_SSL. Hector Urtubia Aug 2003 + +# Example (see the test function at the end of this file) + +# Imports + +import errno +import re +import socket +import sys + +try: + import ssl + HAVE_SSL = True +except ImportError: + HAVE_SSL = False + +__all__ = ["POP3","error_proto"] + +# Exception raised when an error or invalid response is received: + +class error_proto(Exception): pass + +# Standard Port +POP3_PORT = 110 + +# POP SSL PORT +POP3_SSL_PORT = 995 + +# Line terminators (we always output CRLF, but accept any of CRLF, LFCR, LF) +CR = b'\r' +LF = b'\n' +CRLF = CR+LF + +# maximal line length when calling readline(). This is to prevent +# reading arbitrary length lines. RFC 1939 limits POP3 line length to +# 512 characters, including CRLF. We have selected 2048 just to be on +# the safe side. +_MAXLINE = 2048 + + +class POP3: + + """This class supports both the minimal and optional command sets. + Arguments can be strings or integers (where appropriate) + (e.g.: retr(1) and retr('1') both work equally well. + + Minimal Command Set: + USER name user(name) + PASS string pass_(string) + STAT stat() + LIST [msg] list(msg = None) + RETR msg retr(msg) + DELE msg dele(msg) + NOOP noop() + RSET rset() + QUIT quit() + + Optional Commands (some servers support these): + RPOP name rpop(name) + APOP name digest apop(name, digest) + TOP msg n top(msg, n) + UIDL [msg] uidl(msg = None) + CAPA capa() + STLS stls() + UTF8 utf8() + + Raises one exception: 'error_proto'. + + Instantiate with: + POP3(hostname, port=110) + + NB: the POP protocol locks the mailbox from user + authorization until QUIT, so be sure to get in, suck + the messages, and quit, each time you access the + mailbox. + + POP is a line-based protocol, which means large mail + messages consume lots of python cycles reading them + line-by-line. + + If it's available on your mail server, use IMAP4 + instead, it doesn't suffer from the two problems + above. + """ + + encoding = 'UTF-8' + + def __init__(self, host, port=POP3_PORT, + timeout=socket._GLOBAL_DEFAULT_TIMEOUT): + self.host = host + self.port = port + self._tls_established = False + sys.audit("poplib.connect", self, host, port) + self.sock = self._create_socket(timeout) + self.file = self.sock.makefile('rb') + self._debugging = 0 + self.welcome = self._getresp() + + def _create_socket(self, timeout): + if timeout is not None and not timeout: + raise ValueError('Non-blocking socket (timeout=0) is not supported') + return socket.create_connection((self.host, self.port), timeout) + + def _putline(self, line): + if self._debugging > 1: print('*put*', repr(line)) + sys.audit("poplib.putline", self, line) + self.sock.sendall(line + CRLF) + + + # Internal: send one command to the server (through _putline()) + + def _putcmd(self, line): + if self._debugging: print('*cmd*', repr(line)) + line = bytes(line, self.encoding) + self._putline(line) + + + # Internal: return one line from the server, stripping CRLF. + # This is where all the CPU time of this module is consumed. + # Raise error_proto('-ERR EOF') if the connection is closed. + + def _getline(self): + line = self.file.readline(_MAXLINE + 1) + if len(line) > _MAXLINE: + raise error_proto('line too long') + + if self._debugging > 1: print('*get*', repr(line)) + if not line: raise error_proto('-ERR EOF') + octets = len(line) + # server can send any combination of CR & LF + # however, 'readline()' returns lines ending in LF + # so only possibilities are ...LF, ...CRLF, CR...LF + if line[-2:] == CRLF: + return line[:-2], octets + if line[:1] == CR: + return line[1:-1], octets + return line[:-1], octets + + + # Internal: get a response from the server. + # Raise 'error_proto' if the response doesn't start with '+'. + + def _getresp(self): + resp, o = self._getline() + if self._debugging > 1: print('*resp*', repr(resp)) + if not resp.startswith(b'+'): + raise error_proto(resp) + return resp + + + # Internal: get a response plus following text from the server. + + def _getlongresp(self): + resp = self._getresp() + list = []; octets = 0 + line, o = self._getline() + while line != b'.': + if line.startswith(b'..'): + o = o-1 + line = line[1:] + octets = octets + o + list.append(line) + line, o = self._getline() + return resp, list, octets + + + # Internal: send a command and get the response + + def _shortcmd(self, line): + self._putcmd(line) + return self._getresp() + + + # Internal: send a command and get the response plus following text + + def _longcmd(self, line): + self._putcmd(line) + return self._getlongresp() + + + # These can be useful: + + def getwelcome(self): + return self.welcome + + + def set_debuglevel(self, level): + self._debugging = level + + + # Here are all the POP commands: + + def user(self, user): + """Send user name, return response + + (should indicate password required). + """ + return self._shortcmd('USER %s' % user) + + + def pass_(self, pswd): + """Send password, return response + + (response includes message count, mailbox size). + + NB: mailbox is locked by server from here to 'quit()' + """ + return self._shortcmd('PASS %s' % pswd) + + + def stat(self): + """Get mailbox status. + + Result is tuple of 2 ints (message count, mailbox size) + """ + retval = self._shortcmd('STAT') + rets = retval.split() + if self._debugging: print('*stat*', repr(rets)) + + # Check if the response has enough elements + # RFC 1939 requires at least 3 elements (+OK, message count, mailbox size) + # but allows additional data after the required fields + if len(rets) < 3: + raise error_proto("Invalid STAT response format") + + try: + numMessages = int(rets[1]) + sizeMessages = int(rets[2]) + except ValueError: + raise error_proto("Invalid STAT response data: non-numeric values") + + return (numMessages, sizeMessages) + + + def list(self, which=None): + """Request listing, return result. + + Result without a message number argument is in form + ['response', ['mesg_num octets', ...], octets]. + + Result when a message number argument is given is a + single response: the "scan listing" for that message. + """ + if which is not None: + return self._shortcmd('LIST %s' % which) + return self._longcmd('LIST') + + + def retr(self, which): + """Retrieve whole message number 'which'. + + Result is in form ['response', ['line', ...], octets]. + """ + return self._longcmd('RETR %s' % which) + + + def dele(self, which): + """Delete message number 'which'. + + Result is 'response'. + """ + return self._shortcmd('DELE %s' % which) + + + def noop(self): + """Does nothing. + + One supposes the response indicates the server is alive. + """ + return self._shortcmd('NOOP') + + + def rset(self): + """Unmark all messages marked for deletion.""" + return self._shortcmd('RSET') + + + def quit(self): + """Signoff: commit changes on server, unlock mailbox, close connection.""" + resp = self._shortcmd('QUIT') + self.close() + return resp + + def close(self): + """Close the connection without assuming anything about it.""" + try: + file = self.file + self.file = None + if file is not None: + file.close() + finally: + sock = self.sock + self.sock = None + if sock is not None: + try: + sock.shutdown(socket.SHUT_RDWR) + except OSError as exc: + # The server might already have closed the connection. + # On Windows, this may result in WSAEINVAL (error 10022): + # An invalid operation was attempted. + if (exc.errno != errno.ENOTCONN + and getattr(exc, 'winerror', 0) != 10022): + raise + finally: + sock.close() + + #__del__ = quit + + + # optional commands: + + def rpop(self, user): + """Send RPOP command to access the mailbox with an alternate user.""" + return self._shortcmd('RPOP %s' % user) + + + timestamp = re.compile(br'\+OK.[^<]*(<.*>)') + + def apop(self, user, password): + """Authorisation + + - only possible if server has supplied a timestamp in initial greeting. + + Args: + user - mailbox user; + password - mailbox password. + + NB: mailbox is locked by server from here to 'quit()' + """ + secret = bytes(password, self.encoding) + m = self.timestamp.match(self.welcome) + if not m: + raise error_proto('-ERR APOP not supported by server') + import hashlib + digest = m.group(1)+secret + digest = hashlib.md5(digest).hexdigest() + return self._shortcmd('APOP %s %s' % (user, digest)) + + + def top(self, which, howmuch): + """Retrieve message header of message number 'which' + and first 'howmuch' lines of message body. + + Result is in form ['response', ['line', ...], octets]. + """ + return self._longcmd('TOP %s %s' % (which, howmuch)) + + + def uidl(self, which=None): + """Return message digest (unique id) list. + + If 'which', result contains unique id for that message + in the form 'response mesgnum uid', otherwise result is + the list ['response', ['mesgnum uid', ...], octets] + """ + if which is not None: + return self._shortcmd('UIDL %s' % which) + return self._longcmd('UIDL') + + + def utf8(self): + """Try to enter UTF-8 mode (see RFC 6856). Returns server response. + """ + return self._shortcmd('UTF8') + + + def capa(self): + """Return server capabilities (RFC 2449) as a dictionary + >>> c=poplib.POP3('localhost') + >>> c.capa() + {'IMPLEMENTATION': ['Cyrus', 'POP3', 'server', 'v2.2.12'], + 'TOP': [], 'LOGIN-DELAY': ['0'], 'AUTH-RESP-CODE': [], + 'EXPIRE': ['NEVER'], 'USER': [], 'STLS': [], 'PIPELINING': [], + 'UIDL': [], 'RESP-CODES': []} + >>> + + Really, according to RFC 2449, the cyrus folks should avoid + having the implementation split into multiple arguments... + """ + def _parsecap(line): + lst = line.decode('ascii').split() + return lst[0], lst[1:] + + caps = {} + try: + resp = self._longcmd('CAPA') + rawcaps = resp[1] + for capline in rawcaps: + capnm, capargs = _parsecap(capline) + caps[capnm] = capargs + except error_proto: + raise error_proto('-ERR CAPA not supported by server') + return caps + + + def stls(self, context=None): + """Start a TLS session on the active connection as specified in RFC 2595. + + context - a ssl.SSLContext + """ + if not HAVE_SSL: + raise error_proto('-ERR TLS support missing') + if self._tls_established: + raise error_proto('-ERR TLS session already established') + caps = self.capa() + if not 'STLS' in caps: + raise error_proto('-ERR STLS not supported by server') + if context is None: + context = ssl._create_stdlib_context() + resp = self._shortcmd('STLS') + self.sock = context.wrap_socket(self.sock, + server_hostname=self.host) + self.file = self.sock.makefile('rb') + self._tls_established = True + return resp + + +if HAVE_SSL: + + class POP3_SSL(POP3): + """POP3 client class over SSL connection + + Instantiate with: POP3_SSL(hostname, port=995, context=None) + + hostname - the hostname of the pop3 over ssl server + port - port number + context - a ssl.SSLContext + + See the methods of the parent class POP3 for more documentation. + """ + + def __init__(self, host, port=POP3_SSL_PORT, + *, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, context=None): + if context is None: + context = ssl._create_stdlib_context() + self.context = context + POP3.__init__(self, host, port, timeout) + + def _create_socket(self, timeout): + sock = POP3._create_socket(self, timeout) + sock = self.context.wrap_socket(sock, + server_hostname=self.host) + return sock + + def stls(self, context=None): + """The method unconditionally raises an exception since the + STLS command doesn't make any sense on an already established + SSL/TLS session. + """ + raise error_proto('-ERR TLS session already established') + + __all__.append("POP3_SSL") + +if __name__ == "__main__": + a = POP3(sys.argv[1]) + print(a.getwelcome()) + a.user(sys.argv[2]) + a.pass_(sys.argv[3]) + a.list() + (numMsgs, totalSize) = a.stat() + for i in range(1, numMsgs + 1): + (header, msg, octets) = a.retr(i) + print("Message %d:" % i) + for line in msg: + print(' ' + line) + print('-----------------------') + a.quit() diff --git a/Lib/test/_test_atexit.py b/Lib/test/_test_atexit.py index db4edd72c51..2e961d6a485 100644 --- a/Lib/test/_test_atexit.py +++ b/Lib/test/_test_atexit.py @@ -47,7 +47,6 @@ def func2(*args, **kwargs): ('func2', (), {}), ('func1', (1, 2), {})]) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_badargs(self): def func(): pass @@ -55,14 +54,12 @@ def func(): # func() has no parameter, but it's called with 2 parameters self.assert_raises_unraisable(TypeError, func, 1 ,2) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_raise(self): def raise_type_error(): raise TypeError self.assert_raises_unraisable(TypeError, raise_type_error) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_raise_unnormalized(self): # bpo-10756: Make sure that an unnormalized exception is handled # properly. @@ -71,7 +68,6 @@ def div_zero(): self.assert_raises_unraisable(ZeroDivisionError, div_zero) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_exit(self): self.assert_raises_unraisable(SystemExit, sys.exit) @@ -122,7 +118,6 @@ def test_bound_methods(self): atexit._run_exitfuncs() self.assertEqual(l, [5]) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_atexit_with_unregistered_function(self): # See bpo-46025 for more info def func(): @@ -140,7 +135,6 @@ def func(): finally: atexit.unregister(func) - @unittest.skip("TODO: RUSTPYTHON; Hangs") def test_eq_unregister_clear(self): # Issue #112127: callback's __eq__ may call unregister or _clear class Evil: @@ -154,7 +148,6 @@ def __eq__(self, other): atexit.unregister(Evil()) atexit._clear() - @unittest.skip("TODO: RUSTPYTHON; Hangs") def test_eq_unregister(self): # Issue #112127: callback's __eq__ may call unregister def f1(): diff --git a/Lib/test/_test_gc_fast_cycles.py b/Lib/test/_test_gc_fast_cycles.py new file mode 100644 index 00000000000..4e2c7d72a02 --- /dev/null +++ b/Lib/test/_test_gc_fast_cycles.py @@ -0,0 +1,48 @@ +# Run by test_gc. +from test import support +import _testinternalcapi +import gc +import unittest + +class IncrementalGCTests(unittest.TestCase): + + # Use small increments to emulate longer running process in a shorter time + @support.gc_threshold(200, 10) + def test_incremental_gc_handles_fast_cycle_creation(self): + + class LinkedList: + + #Use slots to reduce number of implicit objects + __slots__ = "next", "prev", "surprise" + + def __init__(self, next=None, prev=None): + self.next = next + if next is not None: + next.prev = self + self.prev = prev + if prev is not None: + prev.next = self + + def make_ll(depth): + head = LinkedList() + for i in range(depth): + head = LinkedList(head, head.prev) + return head + + head = make_ll(1000) + + assert(gc.isenabled()) + olds = [] + initial_heap_size = _testinternalcapi.get_tracked_heap_size() + for i in range(20_000): + newhead = make_ll(20) + newhead.surprise = head + olds.append(newhead) + if len(olds) == 20: + new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size + self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations") + del olds[:] + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 894cebda57b..35ce70fced2 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -1459,7 +1459,7 @@ def _acquire_release(lock, timeout, l=None, n=1): for _ in range(n): lock.release() - @unittest.skip("TODO: RUSTPYTHON; flaky timeout") + @unittest.skip("TODO: RUSTPYTHON; flaky timeout - thread start latency") def test_repr_rlock(self): if self.TYPE != 'processes': self.skipTest('test not appropriate for {}'.format(self.TYPE)) @@ -4415,7 +4415,6 @@ def test_shared_memory_across_processes(self): sms.close() - @unittest.skip("TODO: RUSTPYTHON; flaky") @unittest.skipIf(os.name != "posix", "not feasible in non-posix platforms") def test_shared_memory_SharedMemoryServer_ignores_sigint(self): # bpo-36368: protect SharedMemoryManager server process from @@ -4440,7 +4439,6 @@ def test_shared_memory_SharedMemoryServer_ignores_sigint(self): smm.shutdown() - @unittest.skip("TODO: RUSTPYTHON: sem_unlink cleanup race causes spurious stderr output") @unittest.skipIf(os.name != "posix", "resource_tracker is posix only") @resource_tracker_format_subtests def test_shared_memory_SharedMemoryManager_reuses_resource_tracker(self): diff --git a/Lib/test/test_asyncio/test_events.py b/Lib/test/test_asyncio/test_events.py index 1a06b426f71..b60c7452f3f 100644 --- a/Lib/test/test_asyncio/test_events.py +++ b/Lib/test/test_asyncio/test_events.py @@ -2876,7 +2876,6 @@ def test_get_event_loop_after_set_none(self): policy.set_event_loop(None) self.assertRaises(RuntimeError, policy.get_event_loop) - @unittest.expectedFailure # TODO: RUSTPYTHON; - mock.patch doesn't work correctly with threading.current_thread @mock.patch('asyncio.events.threading.current_thread') def test_get_event_loop_thread(self, m_current_thread): diff --git a/Lib/test/test_asyncio/test_unix_events.py b/Lib/test/test_asyncio/test_unix_events.py index 0faf32f79ea..520f5c733c3 100644 --- a/Lib/test/test_asyncio/test_unix_events.py +++ b/Lib/test/test_asyncio/test_unix_events.py @@ -1179,8 +1179,6 @@ async def runner(): wsock.close() -# TODO: RUSTPYTHON, fork() segfaults due to stale parking_lot global state -@unittest.skip("TODO: RUSTPYTHON") @support.requires_fork() class TestFork(unittest.TestCase): diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index a2d2e3bb395..2f38550ff94 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -1365,7 +1365,6 @@ def test_map_pickle(self): # strict map tests based on strict zip tests - @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: Unexpected keyword argument strict def test_map_pickle_strict(self): a = (1, 2, 3) b = (4, 5, 6) @@ -1374,7 +1373,6 @@ def test_map_pickle_strict(self): m1 = map(pack, a, b, strict=True) self.check_iter_pickle(m1, t, proto) - @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: Unexpected keyword argument strict def test_map_pickle_strict_fail(self): a = (1, 2, 3) b = (4, 5, 6, 7) @@ -1385,7 +1383,6 @@ def test_map_pickle_strict_fail(self): self.assertEqual(self.iter_error(m1, ValueError), t) self.assertEqual(self.iter_error(m2, ValueError), t) - @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: Unexpected keyword argument strict def test_map_strict(self): self.assertEqual(tuple(map(pack, (1, 2, 3), 'abc', strict=True)), ((1, 'a'), (2, 'b'), (3, 'c'))) @@ -1412,7 +1409,6 @@ def test_map_strict(self): self.assertRaises(ValueError, tuple, map(pack, 'a', t2, t3, strict=True)) - @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: Unexpected keyword argument strict def test_map_strict_iterators(self): x = iter(range(5)) y = [0] @@ -1422,7 +1418,6 @@ def test_map_strict_iterators(self): self.assertEqual(next(x), 2) self.assertEqual(next(z), 1) - @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: Unexpected keyword argument strict def test_map_strict_error_handling(self): class Error(Exception): @@ -1456,7 +1451,6 @@ def __next__(self): l8 = self.iter_error(map(pack, Iter(3), "AB", strict=True), ValueError) self.assertEqual(l8, [(2, "A"), (1, "B")]) - @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: Unexpected keyword argument strict def test_map_strict_error_handling_stopiteration(self): class Iter: diff --git a/Lib/test/test_concurrent_futures/test_process_pool.py b/Lib/test/test_concurrent_futures/test_process_pool.py index ef318dfc7e1..5d4e9677f5c 100644 --- a/Lib/test/test_concurrent_futures/test_process_pool.py +++ b/Lib/test/test_concurrent_futures/test_process_pool.py @@ -85,7 +85,6 @@ def test_traceback(self): self.assertIn('raise RuntimeError(123) # some comment', f1.getvalue()) - @unittest.skip('TODO: RUSTPYTHON flaky EOFError') @hashlib_helper.requires_hashdigest('md5') def test_ressources_gced_in_workers(self): # Ensure that argument for a job are correctly gc-ed after the job diff --git a/Lib/test/test_concurrent_futures/test_wait.py b/Lib/test/test_concurrent_futures/test_wait.py index 818e0d51a2c..6749a690f6c 100644 --- a/Lib/test/test_concurrent_futures/test_wait.py +++ b/Lib/test/test_concurrent_futures/test_wait.py @@ -200,20 +200,5 @@ def future_func(): def setUpModule(): setup_module() -class ProcessPoolForkWaitTest(ProcessPoolForkWaitTest): # TODO: RUSTPYTHON - @unittest.skipIf(sys.platform == 'linux', "TODO: RUSTPYTHON flaky") - def test_first_completed(self): super().test_first_completed() # TODO: RUSTPYTHON - @unittest.skipIf(sys.platform == 'linux', "TODO: RUSTPYTHON Fatal Python error: Segmentation fault") - def test_first_completed_some_already_completed(self): super().test_first_completed_some_already_completed() # TODO: RUSTPYTHON - @unittest.skipIf(sys.platform != 'win32', "TODO: RUSTPYTHON flaky") - def test_first_exception(self): super().test_first_exception() # TODO: RUSTPYTHON - @unittest.skipIf(sys.platform == 'linux', "TODO: RUSTPYTHON flaky") - def test_first_exception_one_already_failed(self): super().test_first_exception_one_already_failed() # TODO: RUSTPYTHON - @unittest.skipIf(sys.platform != 'win32', "TODO: RUSTPYTHON flaky") - def test_first_exception_some_already_complete(self): super().test_first_exception_some_already_complete() # TODO: RUSTPYTHON - @unittest.skipIf(sys.platform == 'linux', "TODO: RUSTPYTHON Fatal Python error: Segmentation fault") - def test_timeout(self): super().test_timeout() # TODO: RUSTPYTHON - - if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_dataclasses/__init__.py b/Lib/test/test_dataclasses/__init__.py index 12db84a1209..dfe6b89f1ed 100644 --- a/Lib/test/test_dataclasses/__init__.py +++ b/Lib/test/test_dataclasses/__init__.py @@ -3672,7 +3672,6 @@ class A: self.assertEqual(obj.a, 'a') self.assertEqual(obj.b, 'b') - @unittest.expectedFailure # TODO: RUSTPYTHON def test_slots_no_weakref(self): @dataclass(slots=True) class A: @@ -3687,7 +3686,6 @@ class A: with self.assertRaises(AttributeError): a.__weakref__ - @unittest.expectedFailure # TODO: RUSTPYTHON def test_slots_weakref(self): @dataclass(slots=True, weakref_slot=True) class A: @@ -3748,7 +3746,6 @@ def test_weakref_slot_make_dataclass(self): "weakref_slot is True but slots is False"): B = make_dataclass('B', [('a', int),], weakref_slot=True) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_weakref_slot_subclass_weakref_slot(self): @dataclass(slots=True, weakref_slot=True) class Base: @@ -3767,7 +3764,6 @@ class A(Base): a_ref = weakref.ref(a) self.assertIs(a.__weakref__, a_ref) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_weakref_slot_subclass_no_weakref_slot(self): @dataclass(slots=True, weakref_slot=True) class Base: @@ -3785,7 +3781,6 @@ class A(Base): a_ref = weakref.ref(a) self.assertIs(a.__weakref__, a_ref) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_weakref_slot_normal_base_weakref_slot(self): class Base: __slots__ = ('__weakref__',) @@ -3830,7 +3825,6 @@ class B[T2]: self.assertTrue(B.__weakref__) B() - @unittest.expectedFailure # TODO: RUSTPYTHON def test_dataclass_derived_generic_from_base(self): T = typing.TypeVar('T') diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index c948d156cdb..1f7c5452c4d 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -1321,7 +1321,6 @@ class X(object): with self.assertRaisesRegex(AttributeError, "'X' object has no attribute 'a'"): X().a - @unittest.expectedFailure # TODO: RUSTPYTHON def test_slots_special(self): # Testing __dict__ and __weakref__ in __slots__... class D(object): @@ -2294,7 +2293,6 @@ def __contains__(self, value): self.assertIn(i, p10) self.assertNotIn(10, p10) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_weakrefs(self): # Testing weak references... import weakref @@ -3976,7 +3974,6 @@ def __init__(self, x): o = trash(o) del o - @unittest.expectedFailure # TODO: RUSTPYTHON def test_slots_multiple_inheritance(self): # SF bug 575229, multiple inheritance w/ slots dumps core class A(object): diff --git a/Lib/test/test_fork1.py b/Lib/test/test_fork1.py index 4f4a5ee0507..a6523bbc518 100644 --- a/Lib/test/test_fork1.py +++ b/Lib/test/test_fork1.py @@ -19,7 +19,6 @@ class ForkTest(ForkWait): - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: process 44587 exited with code 1, but exit code 42 is expected def test_threaded_import_lock_fork(self): """Check fork() in main thread works while a subthread is doing an import""" import_started = threading.Event() diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index 5badff612b8..6868c87171d 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -423,7 +423,6 @@ def test_non_ascii(self): self.assertEqual(format(1+2j, "\u2007^8"), "\u2007(1+2j)\u2007") self.assertEqual(format(0j, "\u2007^4"), "\u20070j\u2007") - @unittest.skip("TODO: RUSTPYTHON; formatting does not support locales. See https://github.com/RustPython/RustPython/issues/5181") def test_locale(self): try: oldloc = locale.setlocale(locale.LC_ALL) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 3e3092dcae1..879a2875aaa 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -236,6 +236,8 @@ def test_function(self): # is 3 because it includes f's code object. self.assertIn(gc.collect(), (2, 3)) + # TODO: RUSTPYTHON - weakref clear ordering differs from 3.15+ + @unittest.expectedFailure def test_function_tp_clear_leaves_consistent_state(self): # https://github.com/python/cpython/issues/91636 code = """if 1: @@ -262,9 +264,11 @@ class Cyclic(tuple): # finalizer. def __del__(self): - # 5. Create a weakref to `func` now. If we had created - # it earlier, it would have been cleared by the - # garbage collector before calling the finalizers. + # 5. Create a weakref to `func` now. In previous + # versions of Python, this would avoid having it + # cleared by the garbage collector before calling + # the finalizers. Now, weakrefs get cleared after + # calling finalizers. self[1].ref = weakref.ref(self[0]) # 6. Drop the global reference to `latefin`. The only @@ -293,16 +297,42 @@ def func(): # which will find `cyc` and `func` as garbage. gc.collect() - # 9. Previously, this would crash because `func_qualname` - # had been NULL-ed out by func_clear(). + # 9. Previously, this would crash because the weakref + # created in the finalizer revealed the function after + # `tp_clear` was called and `func_qualname` + # had been NULL-ed out by func_clear(). Now, we clear + # weakrefs to unreachable objects before calling `tp_clear` + # but after calling finalizers. print(f"{func=}") """ - # We're mostly just checking that this doesn't crash. rc, stdout, stderr = assert_python_ok("-c", code) self.assertEqual(rc, 0) - self.assertRegex(stdout, rb"""\A\s*func=\s*\z""") + # The `func` global is None because the weakref was cleared. + self.assertRegex(stdout, rb"""\A\s*func=None""") self.assertFalse(stderr) + # TODO: RUSTPYTHON - _datetime module not available + @unittest.expectedFailure + def test_datetime_weakref_cycle(self): + # https://github.com/python/cpython/issues/132413 + # If the weakref used by the datetime extension gets cleared by the GC (due to being + # in an unreachable cycle) then datetime functions would crash (get_module_state() + # was returning a NULL pointer). This bug is fixed by clearing weakrefs without + # callbacks *after* running finalizers. + code = """if 1: + import _datetime + class C: + def __del__(self): + print('__del__ called') + _datetime.timedelta(days=1) # crash? + + l = [C()] + l.append(l) + """ + rc, stdout, stderr = assert_python_ok("-c", code) + self.assertEqual(rc, 0) + self.assertEqual(stdout.strip(), b'__del__ called') + @refcount_test def test_frame(self): def f(): @@ -652,9 +682,8 @@ def callback(ignored): gc.collect() self.assertEqual(len(ouch), 2) # else the callbacks didn't run for x in ouch: - # If the callback resurrected one of these guys, the instance - # would be damaged, with an empty __dict__. - self.assertEqual(x, None) + # The weakref should be cleared before executing the callback. + self.assertIsNone(x) def test_bug21435(self): # This is a poor test - its only virtue is that it happened to @@ -821,11 +850,15 @@ def test_get_stats(self): self.assertEqual(len(stats), 3) for st in stats: self.assertIsInstance(st, dict) - self.assertEqual(set(st), - {"collected", "collections", "uncollectable"}) + self.assertEqual( + set(st), + {"collected", "collections", "uncollectable", "candidates", "duration"} + ) self.assertGreaterEqual(st["collected"], 0) self.assertGreaterEqual(st["collections"], 0) self.assertGreaterEqual(st["uncollectable"], 0) + self.assertGreaterEqual(st["candidates"], 0) + self.assertGreaterEqual(st["duration"], 0) # Check that collection counts are incremented correctly if gc.isenabled(): self.addCleanup(gc.enable) @@ -836,11 +869,25 @@ def test_get_stats(self): self.assertEqual(new[0]["collections"], old[0]["collections"] + 1) self.assertEqual(new[1]["collections"], old[1]["collections"]) self.assertEqual(new[2]["collections"], old[2]["collections"]) + self.assertGreater(new[0]["duration"], old[0]["duration"]) + self.assertEqual(new[1]["duration"], old[1]["duration"]) + self.assertEqual(new[2]["duration"], old[2]["duration"]) + for stat in ["collected", "uncollectable", "candidates"]: + self.assertGreaterEqual(new[0][stat], old[0][stat]) + self.assertEqual(new[1][stat], old[1][stat]) + self.assertEqual(new[2][stat], old[2][stat]) gc.collect(2) - new = gc.get_stats() - self.assertEqual(new[0]["collections"], old[0]["collections"] + 1) + old, new = new, gc.get_stats() + self.assertEqual(new[0]["collections"], old[0]["collections"]) self.assertEqual(new[1]["collections"], old[1]["collections"]) self.assertEqual(new[2]["collections"], old[2]["collections"] + 1) + self.assertEqual(new[0]["duration"], old[0]["duration"]) + self.assertEqual(new[1]["duration"], old[1]["duration"]) + self.assertGreater(new[2]["duration"], old[2]["duration"]) + for stat in ["collected", "uncollectable", "candidates"]: + self.assertEqual(new[0][stat], old[0][stat]) + self.assertEqual(new[1][stat], old[1][stat]) + self.assertGreaterEqual(new[2][stat], old[2][stat]) def test_freeze(self): gc.freeze() @@ -1156,6 +1203,37 @@ def test_something(self): """) assert_python_ok("-c", source) + def test_do_not_cleanup_type_subclasses_before_finalization(self): + # See https://github.com/python/cpython/issues/135552 + # If we cleanup weakrefs for tp_subclasses before calling + # the finalizer (__del__) then the line `fail = BaseNode.next.next` + # should fail because we are trying to access a subclass + # attribute. But subclass type cache was not properly invalidated. + code = """ + class BaseNode: + def __del__(self): + BaseNode.next = BaseNode.next.next + fail = BaseNode.next.next + + class Node(BaseNode): + pass + + BaseNode.next = Node() + BaseNode.next.next = Node() + """ + # this test checks garbage collection while interp + # finalization + assert_python_ok("-c", textwrap.dedent(code)) + + code_inside_function = textwrap.dedent(F""" + def test(): + {textwrap.indent(code, ' ')} + + test() + """) + # this test checks regular garbage collection + assert_python_ok("-c", code_inside_function) + @unittest.skipUnless(Py_GIL_DISABLED, "requires free-threaded GC") @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi") @@ -1260,9 +1338,11 @@ def test_collect(self): # Check that we got the right info dict for all callbacks for v in self.visit: info = v[2] - self.assertTrue("generation" in info) - self.assertTrue("collected" in info) - self.assertTrue("uncollectable" in info) + self.assertIn("generation", info) + self.assertIn("collected", info) + self.assertIn("uncollectable", info) + self.assertIn("candidates", info) + self.assertIn("duration", info) def test_collect_generation(self): self.preclean() @@ -1450,6 +1530,7 @@ def callback(ignored): self.assertEqual(x, None) @gc_threshold(1000, 0, 0) + @unittest.skipIf(Py_GIL_DISABLED, "requires GC generations or increments") def test_bug1055820d(self): # Corresponds to temp2d.py in the bug report. This is very much like # test_bug1055820c, but uses a __del__ method instead of a weakref diff --git a/Lib/test/test_generator_stop.py b/Lib/test/test_generator_stop.py new file mode 100644 index 00000000000..9cacdfff4a2 --- /dev/null +++ b/Lib/test/test_generator_stop.py @@ -0,0 +1,35 @@ +from __future__ import generator_stop + +import unittest + + +class TestPEP479(unittest.TestCase): + def test_stopiteration_wrapping(self): + def f(): + raise StopIteration + def g(): + yield f() + with self.assertRaisesRegex(RuntimeError, + "generator raised StopIteration"): + next(g()) + + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: is not + def test_stopiteration_wrapping_context(self): + def f(): + raise StopIteration + def g(): + yield f() + + try: + next(g()) + except RuntimeError as exc: + self.assertIs(type(exc.__cause__), StopIteration) + self.assertIs(type(exc.__context__), StopIteration) + self.assertTrue(exc.__suppress_context__) + else: + self.fail('__cause__, __context__, or __suppress_context__ ' + 'were not properly set') + + +if __name__ == '__main__': + unittest.main() diff --git a/Lib/test/test_generators.py b/Lib/test/test_generators.py index dc5b23b0f93..8da74ff530d 100644 --- a/Lib/test/test_generators.py +++ b/Lib/test/test_generators.py @@ -136,6 +136,19 @@ def gen(): self.assertEqual(len(resurrected), 1) self.assertIsInstance(resurrected[0].gi_code, types.CodeType) + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: is not None + def test_exhausted_generator_frame_cycle(self): + def g(): + yield + + generator = g() + frame = generator.gi_frame + self.assertIsNone(frame.f_back) + next(generator) + self.assertIsNone(frame.f_back) + next(generator, None) + self.assertIsNone(frame.f_back) + class GeneratorTest(unittest.TestCase): @@ -292,6 +305,34 @@ def __iter__(self): self.assertEqual([1, 2], list(i for i in C())) + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: False is not true + def test_close_clears_frame(self): + # gh-142766: Test that closing a generator clears its frame + class DetectDelete: + def __init__(self): + DetectDelete.deleted = False + + def __del__(self): + DetectDelete.deleted = True + + def generator(arg): + yield + + # Test a freshly created generator (not suspended) + g = generator(DetectDelete()) + g.close() + self.assertTrue(DetectDelete.deleted) + + # Test a suspended generator + g = generator(DetectDelete()) + next(g) + g.close() + self.assertTrue(DetectDelete.deleted) + + # Clear via gi_frame.clear() + g = generator(DetectDelete()) + g.gi_frame.clear() + self.assertTrue(DetectDelete.deleted) class ModifyUnderlyingIterableTest(unittest.TestCase): iterables = [ diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index 71d03f3a3f9..8e49aa8954e 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -1,7 +1,7 @@ from decimal import Decimal -from test.support import verbose, is_android, is_emscripten, is_wasi +from test.support import cpython_only, verbose, is_android, linked_to_musl, os_helper from test.support.warnings_helper import check_warnings -from test.support.import_helper import import_fresh_module +from test.support.import_helper import ensure_lazy_imports, import_fresh_module from unittest import mock import unittest import locale @@ -9,6 +9,11 @@ import sys import codecs +class LazyImportTest(unittest.TestCase): + @cpython_only + def test_lazy_import(self): + ensure_lazy_imports("locale", {"re", "warnings"}) + class BaseLocalizedTest(unittest.TestCase): # @@ -351,10 +356,7 @@ def setUp(self): @unittest.skipIf(sys.platform.startswith('aix'), 'bpo-29972: broken test on AIX') - @unittest.skipIf( - is_emscripten or is_wasi, - "musl libc issue on Emscripten/WASI, bpo-46390" - ) + @unittest.skipIf(linked_to_musl(), "musl libc issue, bpo-46390") @unittest.skipIf(sys.platform.startswith("netbsd"), "gh-124108: NetBSD doesn't support UTF-8 for LC_COLLATE") def test_strcoll_with_diacritic(self): @@ -362,10 +364,7 @@ def test_strcoll_with_diacritic(self): @unittest.skipIf(sys.platform.startswith('aix'), 'bpo-29972: broken test on AIX') - @unittest.skipIf( - is_emscripten or is_wasi, - "musl libc issue on Emscripten/WASI, bpo-46390" - ) + @unittest.skipIf(linked_to_musl(), "musl libc issue, bpo-46390") @unittest.skipIf(sys.platform.startswith("netbsd"), "gh-124108: NetBSD doesn't support UTF-8 for LC_COLLATE") def test_strxfrm_with_diacritic(self): @@ -541,7 +540,6 @@ def test_defaults_UTF8(self): # valid. Furthermore LC_CTYPE=UTF is used by the UTF-8 locale coercing # during interpreter startup (on macOS). import _locale - import os self.assertEqual(locale._parse_localename('UTF-8'), (None, 'UTF-8')) @@ -551,25 +549,14 @@ def test_defaults_UTF8(self): else: orig_getlocale = None - orig_env = {} try: - for key in ('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE'): - if key in os.environ: - orig_env[key] = os.environ[key] - del os.environ[key] - - os.environ['LC_CTYPE'] = 'UTF-8' - - with check_warnings(('', DeprecationWarning)): - self.assertEqual(locale.getdefaultlocale(), (None, 'UTF-8')) + with os_helper.EnvironmentVarGuard() as env: + env.unset('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE') + env.set('LC_CTYPE', 'UTF-8') + with check_warnings(('', DeprecationWarning)): + self.assertEqual(locale.getdefaultlocale(), (None, 'UTF-8')) finally: - for k in orig_env: - os.environ[k] = orig_env[k] - - if 'LC_CTYPE' not in orig_env: - del os.environ['LC_CTYPE'] - if orig_getlocale is not None: _locale._getdefaultlocale = orig_getlocale diff --git a/Lib/test/test_modulefinder.py b/Lib/test/test_modulefinder.py new file mode 100644 index 00000000000..51f7fd257e0 --- /dev/null +++ b/Lib/test/test_modulefinder.py @@ -0,0 +1,440 @@ +import os +import errno +import importlib.machinery +import py_compile +import shutil +import unittest +import tempfile + +from test import support + +import modulefinder + +# Each test description is a list of 5 items: +# +# 1. a module name that will be imported by modulefinder +# 2. a list of module names that modulefinder is required to find +# 3. a list of module names that modulefinder should complain +# about because they are not found +# 4. a list of module names that modulefinder should complain +# about because they MAY be not found +# 5. a string specifying packages to create; the format is obvious imo. +# +# Each package will be created in test_dir, and test_dir will be +# removed after the tests again. +# Modulefinder searches in a path that contains test_dir, plus +# the standard Lib directory. + +maybe_test = [ + "a.module", + ["a", "a.module", "sys", + "b"], + ["c"], ["b.something"], + """\ +a/__init__.py +a/module.py + from b import something + from c import something +b/__init__.py + from sys import * +""", +] + +maybe_test_new = [ + "a.module", + ["a", "a.module", "sys", + "b", "__future__"], + ["c"], ["b.something"], + """\ +a/__init__.py +a/module.py + from b import something + from c import something +b/__init__.py + from __future__ import absolute_import + from sys import * +"""] + +package_test = [ + "a.module", + ["a", "a.b", "a.c", "a.module", "mymodule", "sys"], + ["blahblah", "c"], [], + """\ +mymodule.py +a/__init__.py + import blahblah + from a import b + import c +a/module.py + import sys + from a import b as x + from a.c import sillyname +a/b.py +a/c.py + from a.module import x + import mymodule as sillyname + from sys import version_info +"""] + +absolute_import_test = [ + "a.module", + ["a", "a.module", + "b", "b.x", "b.y", "b.z", + "__future__", "sys", "gc"], + ["blahblah", "z"], [], + """\ +mymodule.py +a/__init__.py +a/module.py + from __future__ import absolute_import + import sys # sys + import blahblah # fails + import gc # gc + import b.x # b.x + from b import y # b.y + from b.z import * # b.z.* +a/gc.py +a/sys.py + import mymodule +a/b/__init__.py +a/b/x.py +a/b/y.py +a/b/z.py +b/__init__.py + import z +b/unused.py +b/x.py +b/y.py +b/z.py +"""] + +relative_import_test = [ + "a.module", + ["__future__", + "a", "a.module", + "a.b", "a.b.y", "a.b.z", + "a.b.c", "a.b.c.moduleC", + "a.b.c.d", "a.b.c.e", + "a.b.x", + "gc"], + [], [], + """\ +mymodule.py +a/__init__.py + from .b import y, z # a.b.y, a.b.z +a/module.py + from __future__ import absolute_import # __future__ + import gc # gc +a/gc.py +a/sys.py +a/b/__init__.py + from ..b import x # a.b.x + #from a.b.c import moduleC + from .c import moduleC # a.b.moduleC +a/b/x.py +a/b/y.py +a/b/z.py +a/b/g.py +a/b/c/__init__.py + from ..c import e # a.b.c.e +a/b/c/moduleC.py + from ..c import d # a.b.c.d +a/b/c/d.py +a/b/c/e.py +a/b/c/x.py +"""] + +relative_import_test_2 = [ + "a.module", + ["a", "a.module", + "a.sys", + "a.b", "a.b.y", "a.b.z", + "a.b.c", "a.b.c.d", + "a.b.c.e", + "a.b.c.moduleC", + "a.b.c.f", + "a.b.x", + "a.another"], + [], [], + """\ +mymodule.py +a/__init__.py + from . import sys # a.sys +a/another.py +a/module.py + from .b import y, z # a.b.y, a.b.z +a/gc.py +a/sys.py +a/b/__init__.py + from .c import moduleC # a.b.c.moduleC + from .c import d # a.b.c.d +a/b/x.py +a/b/y.py +a/b/z.py +a/b/c/__init__.py + from . import e # a.b.c.e +a/b/c/moduleC.py + # + from . import f # a.b.c.f + from .. import x # a.b.x + from ... import another # a.another +a/b/c/d.py +a/b/c/e.py +a/b/c/f.py +"""] + +relative_import_test_3 = [ + "a.module", + ["a", "a.module"], + ["a.bar"], + [], + """\ +a/__init__.py + def foo(): pass +a/module.py + from . import foo + from . import bar +"""] + +relative_import_test_4 = [ + "a.module", + ["a", "a.module"], + [], + [], + """\ +a/__init__.py + def foo(): pass +a/module.py + from . import * +"""] + +bytecode_test = [ + "a", + ["a"], + [], + [], + "" +] + +syntax_error_test = [ + "a.module", + ["a", "a.module", "b"], + ["b.module"], [], + """\ +a/__init__.py +a/module.py + import b.module +b/__init__.py +b/module.py + ? # SyntaxError: invalid syntax +"""] + + +same_name_as_bad_test = [ + "a.module", + ["a", "a.module", "b", "b.c"], + ["c"], [], + """\ +a/__init__.py +a/module.py + import c + from b import c +b/__init__.py +b/c.py +"""] + +coding_default_utf8_test = [ + "a_utf8", + ["a_utf8", "b_utf8"], + [], [], + """\ +a_utf8.py + # use the default of utf8 + print('Unicode test A code point 2090 \u2090 that is not valid in cp1252') + import b_utf8 +b_utf8.py + # use the default of utf8 + print('Unicode test B code point 2090 \u2090 that is not valid in cp1252') +"""] + +coding_explicit_utf8_test = [ + "a_utf8", + ["a_utf8", "b_utf8"], + [], [], + """\ +a_utf8.py + # coding=utf8 + print('Unicode test A code point 2090 \u2090 that is not valid in cp1252') + import b_utf8 +b_utf8.py + # use the default of utf8 + print('Unicode test B code point 2090 \u2090 that is not valid in cp1252') +"""] + +coding_explicit_cp1252_test = [ + "a_cp1252", + ["a_cp1252", "b_utf8"], + [], [], + b"""\ +a_cp1252.py + # coding=cp1252 + # 0xe2 is not allowed in utf8 + print('CP1252 test P\xe2t\xe9') + import b_utf8 +""" + """\ +b_utf8.py + # use the default of utf8 + print('Unicode test A code point 2090 \u2090 that is not valid in cp1252') +""".encode('utf-8')] + +def open_file(path): + dirname = os.path.dirname(path) + try: + os.makedirs(dirname) + except OSError as e: + if e.errno != errno.EEXIST: + raise + return open(path, 'wb') + + +def create_package(test_dir, source): + ofi = None + try: + for line in source.splitlines(): + if type(line) != bytes: + line = line.encode('utf-8') + if line.startswith(b' ') or line.startswith(b'\t'): + ofi.write(line.strip() + b'\n') + else: + if ofi: + ofi.close() + if type(line) == bytes: + line = line.decode('utf-8') + ofi = open_file(os.path.join(test_dir, line.strip())) + finally: + if ofi: + ofi.close() + +class ModuleFinderTest(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + self.test_path = [self.test_dir, os.path.dirname(tempfile.__file__)] + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def _do_test(self, info, report=False, debug=0, replace_paths=[], modulefinder_class=modulefinder.ModuleFinder): + import_this, modules, missing, maybe_missing, source = info + create_package(self.test_dir, source) + mf = modulefinder_class(path=self.test_path, debug=debug, + replace_paths=replace_paths) + mf.import_hook(import_this) + if report: + mf.report() +## # This wouldn't work in general when executed several times: +## opath = sys.path[:] +## sys.path = self.test_path +## try: +## __import__(import_this) +## except: +## import traceback; traceback.print_exc() +## sys.path = opath +## return + modules = sorted(set(modules)) + found = sorted(mf.modules) + # check if we found what we expected, not more, not less + self.assertEqual(found, modules) + + # check for missing and maybe missing modules + bad, maybe = mf.any_missing_maybe() + self.assertEqual(bad, missing) + self.assertEqual(maybe, maybe_missing) + + def test_package(self): + self._do_test(package_test) + + def test_maybe(self): + self._do_test(maybe_test) + + def test_maybe_new(self): + self._do_test(maybe_test_new) + + def test_absolute_imports(self): + self._do_test(absolute_import_test) + + def test_relative_imports(self): + self._do_test(relative_import_test) + + def test_relative_imports_2(self): + self._do_test(relative_import_test_2) + + def test_relative_imports_3(self): + self._do_test(relative_import_test_3) + + def test_relative_imports_4(self): + self._do_test(relative_import_test_4) + + def test_syntax_error(self): + self._do_test(syntax_error_test) + + def test_same_name_as_bad(self): + self._do_test(same_name_as_bad_test) + + def test_bytecode(self): + base_path = os.path.join(self.test_dir, 'a') + source_path = base_path + importlib.machinery.SOURCE_SUFFIXES[0] + bytecode_path = base_path + importlib.machinery.BYTECODE_SUFFIXES[0] + with open_file(source_path) as file: + file.write('testing_modulefinder = True\n'.encode('utf-8')) + py_compile.compile(source_path, cfile=bytecode_path) + os.remove(source_path) + self._do_test(bytecode_test) + + # TODO: RUSTPYTHON; panics at code.rs with 'called Option::unwrap() on a None value' + @unittest.skip("TODO: RUSTPYTHON; panics in co_filename replacement") + def test_replace_paths(self): + old_path = os.path.join(self.test_dir, 'a', 'module.py') + new_path = os.path.join(self.test_dir, 'a', 'spam.py') + with support.captured_stdout() as output: + self._do_test(maybe_test, debug=2, + replace_paths=[(old_path, new_path)]) + output = output.getvalue() + expected = "co_filename %r changed to %r" % (old_path, new_path) + self.assertIn(expected, output) + + def test_extended_opargs(self): + extended_opargs_test = [ + "a", + ["a", "b"], + [], [], + """\ +a.py + %r + import b +b.py +""" % list(range(2**16))] # 2**16 constants + self._do_test(extended_opargs_test) + + def test_coding_default_utf8(self): + self._do_test(coding_default_utf8_test) + + def test_coding_explicit_utf8(self): + self._do_test(coding_explicit_utf8_test) + + def test_coding_explicit_cp1252(self): + self._do_test(coding_explicit_cp1252_test) + + def test_load_module_api(self): + class CheckLoadModuleApi(modulefinder.ModuleFinder): + def __init__(self, *args, **kwds): + super().__init__(*args, **kwds) + + def load_module(self, fqname, fp, pathname, file_info): + # confirm that the fileinfo is a tuple of 3 elements + suffix, mode, type = file_info + return super().load_module(fqname, fp, pathname, file_info) + + self._do_test(absolute_import_test, modulefinder_class=CheckLoadModuleApi) + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_multiprocessing_fork/test_manager.py b/Lib/test/test_multiprocessing_fork/test_manager.py index f8d7eddd652..9efbb83bbb7 100644 --- a/Lib/test/test_multiprocessing_fork/test_manager.py +++ b/Lib/test/test_multiprocessing_fork/test_manager.py @@ -3,22 +3,5 @@ install_tests_in_module_dict(globals(), 'fork', only_type="manager") -import sys # TODO: RUSTPYTHON -class WithManagerTestCondition(WithManagerTestCondition): # TODO: RUSTPYTHON - @unittest.skipIf(sys.platform == 'linux', 'TODO: RUSTPYTHON, times out') - def test_notify_all(self): super().test_notify_all() # TODO: RUSTPYTHON - -class WithManagerTestQueue(WithManagerTestQueue): # TODO: RUSTPYTHON - @unittest.skipIf(sys.platform == 'linux', 'TODO: RUSTPYTHON, times out') - def test_fork(self): super().test_fork() # TODO: RUSTPYTHON - -local_globs = globals().copy() # TODO: RUSTPYTHON -for name, base in local_globs.items(): # TODO: RUSTPYTHON - if name.startswith('WithManagerTest') and issubclass(base, unittest.TestCase): # TODO: RUSTPYTHON - base = unittest.skipIf( # TODO: RUSTPYTHON - sys.platform == 'linux', # TODO: RUSTPYTHON - 'TODO: RUSTPYTHON flaky BrokenPipeError, flaky ConnectionRefusedError, flaky ConnectionResetError, flaky EOFError' - )(base) # TODO: RUSTPYTHON - if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_multiprocessing_fork/test_misc.py b/Lib/test/test_multiprocessing_fork/test_misc.py index bcf0858258e..891a494020c 100644 --- a/Lib/test/test_multiprocessing_fork/test_misc.py +++ b/Lib/test/test_multiprocessing_fork/test_misc.py @@ -3,24 +3,5 @@ install_tests_in_module_dict(globals(), 'fork', exclude_types=True) -import sys # TODO: RUSTPYTHON -class TestManagerExceptions(TestManagerExceptions): # TODO: RUSTPYTHON - @unittest.skipIf(sys.platform == 'linux', "TODO: RUSTPYTHON flaky") - def test_queue_get(self): super().test_queue_get() # TODO: RUSTPYTHON - -@unittest.skipIf(sys.platform == 'linux', "TODO: RUSTPYTHON flaky") -class TestInitializers(TestInitializers): pass # TODO: RUSTPYTHON - -class TestStartMethod(TestStartMethod): # TODO: RUSTPYTHON - @unittest.skipIf(sys.platform == 'linux', "TODO: RUSTPYTHON flaky") - def test_nested_startmethod(self): super().test_nested_startmethod() # TODO: RUSTPYTHON - -@unittest.skipIf(sys.platform == 'linux', "TODO: RUSTPYTHON flaky") -class TestSyncManagerTypes(TestSyncManagerTypes): pass # TODO: RUSTPYTHON - -class MiscTestCase(MiscTestCase): # TODO: RUSTPYTHON - @unittest.skipIf(sys.platform == 'linux', "TODO: RUSTPYTHON flaky") - def test_forked_thread_not_started(self): super().test_forked_thread_not_started() # TODO: RUSTPYTHON - if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_multiprocessing_fork/test_threads.py b/Lib/test/test_multiprocessing_fork/test_threads.py index 1065ebf7fe4..1670e34cb17 100644 --- a/Lib/test/test_multiprocessing_fork/test_threads.py +++ b/Lib/test/test_multiprocessing_fork/test_threads.py @@ -3,14 +3,5 @@ install_tests_in_module_dict(globals(), 'fork', only_type="threads") -import os, sys # TODO: RUSTPYTHON -class WithThreadsTestPool(WithThreadsTestPool): # TODO: RUSTPYTHON - @unittest.skip("TODO: RUSTPYTHON; flaky environment pollution when running rustpython -m test --fail-env-changed due to unknown reason") - def test_terminate(self): super().test_terminate() # TODO: RUSTPYTHON - -class WithThreadsTestManagerRestart(WithThreadsTestManagerRestart): # TODO: RUSTPYTHON - @unittest.skipIf(sys.platform == 'linux', 'TODO: RUSTPYTHON flaky flaky BrokenPipeError, flaky ConnectionRefusedError, flaky ConnectionResetError, flaky EOFError') - def test_rapid_restart(self): super().test_rapid_restart() # TODO: RUSTPYTHON - if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index d63dc60be31..00bd75bab51 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -5574,7 +5574,6 @@ def test_fork_warns_when_non_python_thread_exists(self): self.assertEqual(err.decode("utf-8"), "") self.assertEqual(out.decode("utf-8"), "") - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: b"can't fork at interpreter shutdown" not found in b"Exception ignored in: \nAttributeError: 'NoneType' object has no attribute 'fork'\n" def test_fork_at_finalization(self): code = """if 1: import atexit diff --git a/Lib/test/test_poplib.py b/Lib/test/test_poplib.py new file mode 100644 index 00000000000..ef2da97f867 --- /dev/null +++ b/Lib/test/test_poplib.py @@ -0,0 +1,571 @@ +"""Test script for poplib module.""" + +# Modified by Giampaolo Rodola' to give poplib.POP3 and poplib.POP3_SSL +# a real test suite + +import poplib +import socket +import os +import errno +import threading + +import unittest +from unittest import TestCase, skipUnless +from test import support as test_support +from test.support import hashlib_helper +from test.support import socket_helper +from test.support import threading_helper +from test.support import asynchat +from test.support import asyncore + + +test_support.requires_working_socket(module=True) + +HOST = socket_helper.HOST +PORT = 0 + +SUPPORTS_SSL = False +if hasattr(poplib, 'POP3_SSL'): + import ssl + + SUPPORTS_SSL = True + CERTFILE = os.path.join(os.path.dirname(__file__) or os.curdir, "certdata", "keycert3.pem") + CAFILE = os.path.join(os.path.dirname(__file__) or os.curdir, "certdata", "pycacert.pem") + +requires_ssl = skipUnless(SUPPORTS_SSL, 'SSL not supported') + +# the dummy data returned by server when LIST and RETR commands are issued +LIST_RESP = b'1 1\r\n2 2\r\n3 3\r\n4 4\r\n5 5\r\n.\r\n' +RETR_RESP = b"""From: postmaster@python.org\ +\r\nContent-Type: text/plain\r\n\ +MIME-Version: 1.0\r\n\ +Subject: Dummy\r\n\ +\r\n\ +line1\r\n\ +line2\r\n\ +line3\r\n\ +.\r\n""" + + +class DummyPOP3Handler(asynchat.async_chat): + + CAPAS = {'UIDL': [], 'IMPLEMENTATION': ['python-testlib-pop-server']} + enable_UTF8 = False + + def __init__(self, conn): + asynchat.async_chat.__init__(self, conn) + self.set_terminator(b"\r\n") + self.in_buffer = [] + self.push('+OK dummy pop3 server ready. ') + self.tls_active = False + self.tls_starting = False + + def collect_incoming_data(self, data): + self.in_buffer.append(data) + + def found_terminator(self): + line = b''.join(self.in_buffer) + line = str(line, 'ISO-8859-1') + self.in_buffer = [] + cmd = line.split(' ')[0].lower() + space = line.find(' ') + if space != -1: + arg = line[space + 1:] + else: + arg = "" + if hasattr(self, 'cmd_' + cmd): + method = getattr(self, 'cmd_' + cmd) + method(arg) + else: + self.push('-ERR unrecognized POP3 command "%s".' %cmd) + + def handle_error(self): + raise + + def push(self, data): + asynchat.async_chat.push(self, data.encode("ISO-8859-1") + b'\r\n') + + def cmd_echo(self, arg): + # sends back the received string (used by the test suite) + self.push(arg) + + def cmd_user(self, arg): + if arg != "guido": + self.push("-ERR no such user") + self.push('+OK password required') + + def cmd_pass(self, arg): + if arg != "python": + self.push("-ERR wrong password") + self.push('+OK 10 messages') + + def cmd_stat(self, arg): + self.push('+OK 10 100') + + def cmd_list(self, arg): + if arg: + self.push('+OK %s %s' % (arg, arg)) + else: + self.push('+OK') + asynchat.async_chat.push(self, LIST_RESP) + + cmd_uidl = cmd_list + + def cmd_retr(self, arg): + self.push('+OK %s bytes' %len(RETR_RESP)) + asynchat.async_chat.push(self, RETR_RESP) + + cmd_top = cmd_retr + + def cmd_dele(self, arg): + self.push('+OK message marked for deletion.') + + def cmd_noop(self, arg): + self.push('+OK done nothing.') + + def cmd_rpop(self, arg): + self.push('+OK done nothing.') + + def cmd_apop(self, arg): + self.push('+OK done nothing.') + + def cmd_quit(self, arg): + self.push('+OK closing.') + self.close_when_done() + + def _get_capas(self): + _capas = dict(self.CAPAS) + if not self.tls_active and SUPPORTS_SSL: + _capas['STLS'] = [] + return _capas + + def cmd_capa(self, arg): + self.push('+OK Capability list follows') + if self._get_capas(): + for cap, params in self._get_capas().items(): + _ln = [cap] + if params: + _ln.extend(params) + self.push(' '.join(_ln)) + self.push('.') + + def cmd_utf8(self, arg): + self.push('+OK I know RFC6856' + if self.enable_UTF8 + else '-ERR What is UTF8?!') + + if SUPPORTS_SSL: + + def cmd_stls(self, arg): + if self.tls_active is False: + self.push('+OK Begin TLS negotiation') + context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + context.load_cert_chain(CERTFILE) + tls_sock = context.wrap_socket(self.socket, + server_side=True, + do_handshake_on_connect=False, + suppress_ragged_eofs=False) + self.del_channel() + self.set_socket(tls_sock) + self.tls_active = True + self.tls_starting = True + self.in_buffer = [] + self._do_tls_handshake() + else: + self.push('-ERR Command not permitted when TLS active') + + def _do_tls_handshake(self): + try: + self.socket.do_handshake() + except ssl.SSLError as err: + if err.args[0] in (ssl.SSL_ERROR_WANT_READ, + ssl.SSL_ERROR_WANT_WRITE): + return + elif err.args[0] == ssl.SSL_ERROR_EOF: + return self.handle_close() + # TODO: SSLError does not expose alert information + elif ("SSLV3_ALERT_BAD_CERTIFICATE" in err.args[1] or + "SSLV3_ALERT_CERTIFICATE_UNKNOWN" in err.args[1]): + return self.handle_close() + raise + except OSError as err: + if err.args[0] == errno.ECONNABORTED: + return self.handle_close() + else: + self.tls_active = True + self.tls_starting = False + + def handle_read(self): + if self.tls_starting: + self._do_tls_handshake() + else: + try: + asynchat.async_chat.handle_read(self) + except ssl.SSLEOFError: + self.handle_close() + +class DummyPOP3Server(asyncore.dispatcher, threading.Thread): + + handler = DummyPOP3Handler + + def __init__(self, address, af=socket.AF_INET): + threading.Thread.__init__(self) + asyncore.dispatcher.__init__(self) + self.daemon = True + self.create_socket(af, socket.SOCK_STREAM) + self.bind(address) + self.listen(5) + self.active = False + self.active_lock = threading.Lock() + self.host, self.port = self.socket.getsockname()[:2] + self.handler_instance = None + + def start(self): + assert not self.active + self.__flag = threading.Event() + threading.Thread.start(self) + self.__flag.wait() + + def run(self): + self.active = True + self.__flag.set() + try: + while self.active and asyncore.socket_map: + with self.active_lock: + asyncore.loop(timeout=0.1, count=1) + finally: + asyncore.close_all(ignore_all=True) + + def stop(self): + assert self.active + self.active = False + self.join() + + def handle_accepted(self, conn, addr): + self.handler_instance = self.handler(conn) + + def handle_connect(self): + self.close() + handle_read = handle_connect + + def writable(self): + return 0 + + def handle_error(self): + raise + + +class TestPOP3Class(TestCase): + def assertOK(self, resp): + self.assertStartsWith(resp, b"+OK") + + def setUp(self): + self.server = DummyPOP3Server((HOST, PORT)) + self.server.start() + self.client = poplib.POP3(self.server.host, self.server.port, + timeout=test_support.LOOPBACK_TIMEOUT) + + def tearDown(self): + self.client.close() + self.server.stop() + # Explicitly clear the attribute to prevent dangling thread + self.server = None + + def test_getwelcome(self): + self.assertEqual(self.client.getwelcome(), + b'+OK dummy pop3 server ready. ') + + def test_exceptions(self): + self.assertRaises(poplib.error_proto, self.client._shortcmd, 'echo -err') + + def test_user(self): + self.assertOK(self.client.user('guido')) + self.assertRaises(poplib.error_proto, self.client.user, 'invalid') + + def test_pass_(self): + self.assertOK(self.client.pass_('python')) + self.assertRaises(poplib.error_proto, self.client.user, 'invalid') + + def test_stat(self): + self.assertEqual(self.client.stat(), (10, 100)) + + original_shortcmd = self.client._shortcmd + def mock_shortcmd_invalid_format(cmd): + if cmd == 'STAT': + return b'+OK' + return original_shortcmd(cmd) + + self.client._shortcmd = mock_shortcmd_invalid_format + with self.assertRaises(poplib.error_proto): + self.client.stat() + + def mock_shortcmd_invalid_data(cmd): + if cmd == 'STAT': + return b'+OK abc def' + return original_shortcmd(cmd) + + self.client._shortcmd = mock_shortcmd_invalid_data + with self.assertRaises(poplib.error_proto): + self.client.stat() + + def mock_shortcmd_extra_fields(cmd): + if cmd == 'STAT': + return b'+OK 1 2 3 4 5' + return original_shortcmd(cmd) + + self.client._shortcmd = mock_shortcmd_extra_fields + + result = self.client.stat() + self.assertEqual(result, (1, 2)) + + self.client._shortcmd = original_shortcmd + + def test_list(self): + self.assertEqual(self.client.list()[1:], + ([b'1 1', b'2 2', b'3 3', b'4 4', b'5 5'], + 25)) + self.assertEndsWith(self.client.list('1'), b"OK 1 1") + + def test_retr(self): + expected = (b'+OK 116 bytes', + [b'From: postmaster@python.org', b'Content-Type: text/plain', + b'MIME-Version: 1.0', b'Subject: Dummy', + b'', b'line1', b'line2', b'line3'], + 113) + foo = self.client.retr('foo') + self.assertEqual(foo, expected) + + def test_too_long_lines(self): + self.assertRaises(poplib.error_proto, self.client._shortcmd, + 'echo +%s' % ((poplib._MAXLINE + 10) * 'a')) + + def test_dele(self): + self.assertOK(self.client.dele('foo')) + + def test_noop(self): + self.assertOK(self.client.noop()) + + def test_rpop(self): + self.assertOK(self.client.rpop('foo')) + + @hashlib_helper.requires_hashdigest('md5', openssl=True) + def test_apop_normal(self): + self.assertOK(self.client.apop('foo', 'dummypassword')) + + @hashlib_helper.requires_hashdigest('md5', openssl=True) + def test_apop_REDOS(self): + # Replace welcome with very long evil welcome. + # NB The upper bound on welcome length is currently 2048. + # At this length, evil input makes each apop call take + # on the order of milliseconds instead of microseconds. + evil_welcome = b'+OK' + (b'<' * 1000000) + with test_support.swap_attr(self.client, 'welcome', evil_welcome): + # The evil welcome is invalid, so apop should throw. + self.assertRaises(poplib.error_proto, self.client.apop, 'a', 'kb') + + def test_top(self): + expected = (b'+OK 116 bytes', + [b'From: postmaster@python.org', b'Content-Type: text/plain', + b'MIME-Version: 1.0', b'Subject: Dummy', b'', + b'line1', b'line2', b'line3'], + 113) + self.assertEqual(self.client.top(1, 1), expected) + + def test_uidl(self): + self.client.uidl() + self.client.uidl('foo') + + def test_utf8_raises_if_unsupported(self): + self.server.handler.enable_UTF8 = False + self.assertRaises(poplib.error_proto, self.client.utf8) + + def test_utf8(self): + self.server.handler.enable_UTF8 = True + expected = b'+OK I know RFC6856' + result = self.client.utf8() + self.assertEqual(result, expected) + + def test_capa(self): + capa = self.client.capa() + self.assertTrue('IMPLEMENTATION' in capa.keys()) + + def test_quit(self): + resp = self.client.quit() + self.assertTrue(resp) + self.assertIsNone(self.client.sock) + self.assertIsNone(self.client.file) + + @requires_ssl + def test_stls_capa(self): + capa = self.client.capa() + self.assertTrue('STLS' in capa.keys()) + + @requires_ssl + def test_stls(self): + expected = b'+OK Begin TLS negotiation' + resp = self.client.stls() + self.assertEqual(resp, expected) + + @requires_ssl + def test_stls_context(self): + expected = b'+OK Begin TLS negotiation' + ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + ctx.load_verify_locations(CAFILE) + self.assertEqual(ctx.verify_mode, ssl.CERT_REQUIRED) + self.assertEqual(ctx.check_hostname, True) + with self.assertRaises(ssl.CertificateError): + resp = self.client.stls(context=ctx) + self.client = poplib.POP3("localhost", self.server.port, + timeout=test_support.LOOPBACK_TIMEOUT) + resp = self.client.stls(context=ctx) + self.assertEqual(resp, expected) + + +if SUPPORTS_SSL: + from test.test_ftplib import SSLConnection + + class DummyPOP3_SSLHandler(SSLConnection, DummyPOP3Handler): + + def __init__(self, conn): + asynchat.async_chat.__init__(self, conn) + self.secure_connection() + self.set_terminator(b"\r\n") + self.in_buffer = [] + self.push('+OK dummy pop3 server ready. ') + self.tls_active = True + self.tls_starting = False + + +@requires_ssl +class TestPOP3_SSLClass(TestPOP3Class): + # repeat previous tests by using poplib.POP3_SSL + + def setUp(self): + self.server = DummyPOP3Server((HOST, PORT)) + self.server.handler = DummyPOP3_SSLHandler + self.server.start() + self.client = poplib.POP3_SSL(self.server.host, self.server.port) + + def test__all__(self): + self.assertIn('POP3_SSL', poplib.__all__) + + def test_context(self): + ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + + self.client.quit() + self.client = poplib.POP3_SSL(self.server.host, self.server.port, + context=ctx) + self.assertIsInstance(self.client.sock, ssl.SSLSocket) + self.assertIs(self.client.sock.context, ctx) + self.assertStartsWith(self.client.noop(), b'+OK') + + def test_stls(self): + self.assertRaises(poplib.error_proto, self.client.stls) + + test_stls_context = test_stls + + def test_stls_capa(self): + capa = self.client.capa() + self.assertFalse('STLS' in capa.keys()) + + +@requires_ssl +class TestPOP3_TLSClass(TestPOP3Class): + # repeat previous tests by using poplib.POP3.stls() + + def setUp(self): + self.server = DummyPOP3Server((HOST, PORT)) + self.server.start() + self.client = poplib.POP3(self.server.host, self.server.port, + timeout=test_support.LOOPBACK_TIMEOUT) + self.client.stls() + + def tearDown(self): + if self.client.file is not None and self.client.sock is not None: + try: + self.client.quit() + except poplib.error_proto: + # happens in the test_too_long_lines case; the overlong + # response will be treated as response to QUIT and raise + # this exception + self.client.close() + self.server.stop() + # Explicitly clear the attribute to prevent dangling thread + self.server = None + + def test_stls(self): + self.assertRaises(poplib.error_proto, self.client.stls) + + test_stls_context = test_stls + + def test_stls_capa(self): + capa = self.client.capa() + self.assertFalse(b'STLS' in capa.keys()) + + +class TestTimeouts(TestCase): + + def setUp(self): + self.evt = threading.Event() + self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.sock.settimeout(60) # Safety net. Look issue 11812 + self.port = socket_helper.bind_port(self.sock) + self.thread = threading.Thread(target=self.server, args=(self.evt, self.sock)) + self.thread.daemon = True + self.thread.start() + self.evt.wait() + + def tearDown(self): + self.thread.join() + # Explicitly clear the attribute to prevent dangling thread + self.thread = None + + def server(self, evt, serv): + serv.listen() + evt.set() + try: + conn, addr = serv.accept() + conn.send(b"+ Hola mundo\n") + conn.close() + except TimeoutError: + pass + finally: + serv.close() + + def testTimeoutDefault(self): + self.assertIsNone(socket.getdefaulttimeout()) + socket.setdefaulttimeout(test_support.LOOPBACK_TIMEOUT) + try: + pop = poplib.POP3(HOST, self.port) + finally: + socket.setdefaulttimeout(None) + self.assertEqual(pop.sock.gettimeout(), test_support.LOOPBACK_TIMEOUT) + pop.close() + + def testTimeoutNone(self): + self.assertIsNone(socket.getdefaulttimeout()) + socket.setdefaulttimeout(30) + try: + pop = poplib.POP3(HOST, self.port, timeout=None) + finally: + socket.setdefaulttimeout(None) + self.assertIsNone(pop.sock.gettimeout()) + pop.close() + + def testTimeoutValue(self): + pop = poplib.POP3(HOST, self.port, timeout=test_support.LOOPBACK_TIMEOUT) + self.assertEqual(pop.sock.gettimeout(), test_support.LOOPBACK_TIMEOUT) + pop.close() + with self.assertRaises(ValueError): + poplib.POP3(HOST, self.port, timeout=0) + + +def setUpModule(): + thread_info = threading_helper.threading_setup() + unittest.addModuleCleanup(threading_helper.threading_cleanup, *thread_info) + + +if __name__ == '__main__': + unittest.main() diff --git a/Lib/test/test_symtable.py b/Lib/test/test_symtable.py index ae93ee8d91f..1653ab4a718 100644 --- a/Lib/test/test_symtable.py +++ b/Lib/test/test_symtable.py @@ -561,7 +561,6 @@ def get_identifiers_recursive(self, st, res): for ch in st.get_children(): self.get_identifiers_recursive(ch, res) - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: 2 != 1 def test_loopvar_in_only_one_scope(self): # ensure that the loop variable appears only once in the symtable comps = [ diff --git a/Lib/test/test_tabnanny.py b/Lib/test/test_tabnanny.py index 372be9eb8c3..d7a77eb26e4 100644 --- a/Lib/test/test_tabnanny.py +++ b/Lib/test/test_tabnanny.py @@ -316,7 +316,6 @@ def validate_cmd(self, *args, stdout="", stderr="", partial=False, expect_failur self.assertListEqual(out.splitlines(), stdout.splitlines()) self.assertListEqual(err.splitlines(), stderr.splitlines()) - @unittest.expectedFailure # TODO: RUSTPYTHON; Should displays error when errored python file is given. def test_with_errored_file(self): """Should displays error when errored python file is given.""" with TemporaryPyFile(SOURCE_CODES["wrong_indented"]) as file_path: diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 8db0bbdb949..17693ae093f 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -1162,8 +1162,6 @@ def import_threading(): self.assertEqual(out, b'') self.assertEqual(err, b'') - # TODO: RUSTPYTHON - __del__ not called during interpreter finalization (no cyclic GC) - @unittest.expectedFailure def test_start_new_thread_at_finalization(self): code = """if 1: import _thread diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 44ef4e24165..394a87c3601 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1,17 +1,22 @@ -from test import support -from test.support import os_helper -from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP, - STRING, ENDMARKER, ENCODING, tok_name, detect_encoding, - open as tokenize_open, Untokenizer, generate_tokens, - NEWLINE) -from io import BytesIO, StringIO +import contextlib +import itertools +import os +import re +import string +import tempfile +import token +import tokenize import unittest +from io import BytesIO, StringIO from textwrap import dedent from unittest import TestCase, mock -from test.test_grammar import (VALID_UNDERSCORE_LITERALS, - INVALID_UNDERSCORE_LITERALS) -import os -import token +from test import support +from test.support import os_helper +from test.support.script_helper import run_test_script, make_script, run_python_until_end +from test.support.numbers import ( + VALID_UNDERSCORE_LITERALS, + INVALID_UNDERSCORE_LITERALS, +) # Converts a source string into a list of textual representation @@ -24,12 +29,12 @@ def stringify_tokens_from_source(token_generator, source_string): missing_trailing_nl = source_string[-1] not in '\r\n' for type, token, start, end, line in token_generator: - if type == ENDMARKER: + if type == tokenize.ENDMARKER: break # Ignore the new line on the last line if the input lacks one - if missing_trailing_nl and type == NEWLINE and end[0] == num_lines: + if missing_trailing_nl and type == tokenize.NEWLINE and end[0] == num_lines: continue - type = tok_name[type] + type = tokenize.tok_name[type] result.append(f" {type:10} {token!r:13} {start} {end}") return result @@ -45,18 +50,37 @@ def check_tokenize(self, s, expected): # Format the tokens in s in a table format. # The ENDMARKER and final NEWLINE are omitted. f = BytesIO(s.encode('utf-8')) - result = stringify_tokens_from_source(tokenize(f.readline), s) + result = stringify_tokens_from_source(tokenize.tokenize(f.readline), s) self.assertEqual(result, [" ENCODING 'utf-8' (0, 0) (0, 0)"] + expected.rstrip().splitlines()) + def test_invalid_readline(self): + def gen(): + yield "sdfosdg" + yield "sdfosdg" + with self.assertRaises(TypeError): + list(tokenize.tokenize(gen().__next__)) + + def gen(): + yield b"sdfosdg" + yield b"sdfosdg" + with self.assertRaises(TypeError): + list(tokenize.generate_tokens(gen().__next__)) + + def gen(): + yield "sdfosdg" + 1/0 + with self.assertRaises(ZeroDivisionError): + list(tokenize.generate_tokens(gen().__next__)) + def test_implicit_newline(self): # Make sure that the tokenizer puts in an implicit NEWLINE # when the input lacks a trailing new line. f = BytesIO("x".encode('utf-8')) - tokens = list(tokenize(f.readline)) - self.assertEqual(tokens[-2].type, NEWLINE) - self.assertEqual(tokens[-1].type, ENDMARKER) + tokens = list(tokenize.tokenize(f.readline)) + self.assertEqual(tokens[-2].type, tokenize.NEWLINE) + self.assertEqual(tokens[-1].type, tokenize.ENDMARKER) def test_basic(self): self.check_tokenize("1 + 1", """\ @@ -83,6 +107,32 @@ def test_basic(self): NEWLINE '\\n' (4, 26) (4, 27) DEDENT '' (5, 0) (5, 0) """) + + self.check_tokenize("if True:\r\n # NL\r\n foo='bar'\r\n\r\n", """\ + NAME 'if' (1, 0) (1, 2) + NAME 'True' (1, 3) (1, 7) + OP ':' (1, 7) (1, 8) + NEWLINE '\\r\\n' (1, 8) (1, 10) + COMMENT '# NL' (2, 4) (2, 8) + NL '\\r\\n' (2, 8) (2, 10) + INDENT ' ' (3, 0) (3, 4) + NAME 'foo' (3, 4) (3, 7) + OP '=' (3, 7) (3, 8) + STRING "\'bar\'" (3, 8) (3, 13) + NEWLINE '\\r\\n' (3, 13) (3, 15) + NL '\\r\\n' (4, 0) (4, 2) + DEDENT '' (5, 0) (5, 0) + """) + + self.check_tokenize("x = 1 + \\\r\n1\r\n", """\ + NAME 'x' (1, 0) (1, 1) + OP '=' (1, 2) (1, 3) + NUMBER '1' (1, 4) (1, 5) + OP '+' (1, 6) (1, 7) + NUMBER '1' (2, 0) (2, 1) + NEWLINE '\\r\\n' (2, 1) (2, 3) + """) + indent_error_file = b"""\ def k(x): x += 2 @@ -91,9 +141,18 @@ def k(x): readline = BytesIO(indent_error_file).readline with self.assertRaisesRegex(IndentationError, "unindent does not match any " - "outer indentation level"): - for tok in tokenize(readline): + "outer indentation level") as e: + for tok in tokenize.tokenize(readline): pass + self.assertEqual(e.exception.lineno, 3) + self.assertEqual(e.exception.filename, '') + self.assertEqual(e.exception.end_lineno, None) + self.assertEqual(e.exception.end_offset, None) + self.assertEqual( + e.exception.msg, + 'unindent does not match any outer indentation level') + self.assertEqual(e.exception.offset, 9) + self.assertEqual(e.exception.text, ' x += 5') def test_int(self): # Ordinary integers and binary operators @@ -177,7 +236,7 @@ def test_long(self): """) def test_float(self): - # Floating point numbers + # Floating-point numbers self.check_tokenize("x = 3.14159", """\ NAME 'x' (1, 0) (1, 1) OP '=' (1, 2) (1, 3) @@ -219,8 +278,8 @@ def test_float(self): def test_underscore_literals(self): def number_token(s): f = BytesIO(s.encode('utf-8')) - for toktype, token, start, end, line in tokenize(f.readline): - if toktype == NUMBER: + for toktype, token, start, end, line in tokenize.tokenize(f.readline): + if toktype == tokenize.NUMBER: return token return 'invalid token' for lit in VALID_UNDERSCORE_LITERALS: @@ -228,7 +287,16 @@ def number_token(s): # this won't work with compound complex inputs continue self.assertEqual(number_token(lit), lit) + # Valid cases with extra underscores in the tokenize module + # See gh-105549 for context + extra_valid_cases = {"0_7", "09_99"} for lit in INVALID_UNDERSCORE_LITERALS: + if lit in extra_valid_cases: + continue + try: + number_token(lit) + except tokenize.TokenError: + continue self.assertNotEqual(number_token(lit), lit) def test_string(self): @@ -380,21 +448,175 @@ def test_string(self): STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4) """) self.check_tokenize('f"abc"', """\ - STRING 'f"abc"' (1, 0) (1, 6) + FSTRING_START 'f"' (1, 0) (1, 2) + FSTRING_MIDDLE 'abc' (1, 2) (1, 5) + FSTRING_END '"' (1, 5) (1, 6) """) self.check_tokenize('fR"a{b}c"', """\ - STRING 'fR"a{b}c"' (1, 0) (1, 9) + FSTRING_START 'fR"' (1, 0) (1, 3) + FSTRING_MIDDLE 'a' (1, 3) (1, 4) + OP '{' (1, 4) (1, 5) + NAME 'b' (1, 5) (1, 6) + OP '}' (1, 6) (1, 7) + FSTRING_MIDDLE 'c' (1, 7) (1, 8) + FSTRING_END '"' (1, 8) (1, 9) + """) + self.check_tokenize('fR"a{{{b!r}}}c"', """\ + FSTRING_START 'fR"' (1, 0) (1, 3) + FSTRING_MIDDLE 'a{' (1, 3) (1, 5) + OP '{' (1, 6) (1, 7) + NAME 'b' (1, 7) (1, 8) + OP '!' (1, 8) (1, 9) + NAME 'r' (1, 9) (1, 10) + OP '}' (1, 10) (1, 11) + FSTRING_MIDDLE '}' (1, 11) (1, 12) + FSTRING_MIDDLE 'c' (1, 13) (1, 14) + FSTRING_END '"' (1, 14) (1, 15) + """) + self.check_tokenize('f"{{{1+1}}}"', """\ + FSTRING_START 'f"' (1, 0) (1, 2) + FSTRING_MIDDLE '{' (1, 2) (1, 3) + OP '{' (1, 4) (1, 5) + NUMBER '1' (1, 5) (1, 6) + OP '+' (1, 6) (1, 7) + NUMBER '1' (1, 7) (1, 8) + OP '}' (1, 8) (1, 9) + FSTRING_MIDDLE '}' (1, 9) (1, 10) + FSTRING_END '"' (1, 11) (1, 12) + """) + self.check_tokenize('f"""{f\'\'\'{f\'{f"{1+1}"}\'}\'\'\'}"""', """\ + FSTRING_START 'f\"""' (1, 0) (1, 4) + OP '{' (1, 4) (1, 5) + FSTRING_START "f'''" (1, 5) (1, 9) + OP '{' (1, 9) (1, 10) + FSTRING_START "f'" (1, 10) (1, 12) + OP '{' (1, 12) (1, 13) + FSTRING_START 'f"' (1, 13) (1, 15) + OP '{' (1, 15) (1, 16) + NUMBER '1' (1, 16) (1, 17) + OP '+' (1, 17) (1, 18) + NUMBER '1' (1, 18) (1, 19) + OP '}' (1, 19) (1, 20) + FSTRING_END '"' (1, 20) (1, 21) + OP '}' (1, 21) (1, 22) + FSTRING_END "'" (1, 22) (1, 23) + OP '}' (1, 23) (1, 24) + FSTRING_END "'''" (1, 24) (1, 27) + OP '}' (1, 27) (1, 28) + FSTRING_END '\"""' (1, 28) (1, 31) + """) + self.check_tokenize('f""" x\nstr(data, encoding={invalid!r})\n"""', """\ + FSTRING_START 'f\"""' (1, 0) (1, 4) + FSTRING_MIDDLE ' x\\nstr(data, encoding=' (1, 4) (2, 19) + OP '{' (2, 19) (2, 20) + NAME 'invalid' (2, 20) (2, 27) + OP '!' (2, 27) (2, 28) + NAME 'r' (2, 28) (2, 29) + OP '}' (2, 29) (2, 30) + FSTRING_MIDDLE ')\\n' (2, 30) (3, 0) + FSTRING_END '\"""' (3, 0) (3, 3) + """) + self.check_tokenize('f"""123456789\nsomething{None}bad"""', """\ + FSTRING_START 'f\"""' (1, 0) (1, 4) + FSTRING_MIDDLE '123456789\\nsomething' (1, 4) (2, 9) + OP '{' (2, 9) (2, 10) + NAME 'None' (2, 10) (2, 14) + OP '}' (2, 14) (2, 15) + FSTRING_MIDDLE 'bad' (2, 15) (2, 18) + FSTRING_END '\"""' (2, 18) (2, 21) """) self.check_tokenize('f"""abc"""', """\ - STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10) + FSTRING_START 'f\"""' (1, 0) (1, 4) + FSTRING_MIDDLE 'abc' (1, 4) (1, 7) + FSTRING_END '\"""' (1, 7) (1, 10) """) self.check_tokenize(r'f"abc\ def"', """\ - STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4) + FSTRING_START 'f"' (1, 0) (1, 2) + FSTRING_MIDDLE 'abc\\\\\\ndef' (1, 2) (2, 3) + FSTRING_END '"' (2, 3) (2, 4) """) self.check_tokenize(r'Rf"abc\ def"', """\ - STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4) + FSTRING_START 'Rf"' (1, 0) (1, 3) + FSTRING_MIDDLE 'abc\\\\\\ndef' (1, 3) (2, 3) + FSTRING_END '"' (2, 3) (2, 4) + """) + self.check_tokenize("f'some words {a+b:.3f} more words {c+d=} final words'", """\ + FSTRING_START "f'" (1, 0) (1, 2) + FSTRING_MIDDLE 'some words ' (1, 2) (1, 13) + OP '{' (1, 13) (1, 14) + NAME 'a' (1, 14) (1, 15) + OP '+' (1, 15) (1, 16) + NAME 'b' (1, 16) (1, 17) + OP ':' (1, 17) (1, 18) + FSTRING_MIDDLE '.3f' (1, 18) (1, 21) + OP '}' (1, 21) (1, 22) + FSTRING_MIDDLE ' more words ' (1, 22) (1, 34) + OP '{' (1, 34) (1, 35) + NAME 'c' (1, 35) (1, 36) + OP '+' (1, 36) (1, 37) + NAME 'd' (1, 37) (1, 38) + OP '=' (1, 38) (1, 39) + OP '}' (1, 39) (1, 40) + FSTRING_MIDDLE ' final words' (1, 40) (1, 52) + FSTRING_END "'" (1, 52) (1, 53) + """) + self.check_tokenize("""\ +f'''{ +3 +=}'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + OP '{' (1, 4) (1, 5) + NL '\\n' (1, 5) (1, 6) + NUMBER '3' (2, 0) (2, 1) + NL '\\n' (2, 1) (2, 2) + OP '=' (3, 0) (3, 1) + OP '}' (3, 1) (3, 2) + FSTRING_END "'''" (3, 2) (3, 5) + """) + self.check_tokenize("""\ +f'''__{ + x:a +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + OP '{' (1, 6) (1, 7) + NL '\\n' (1, 7) (1, 8) + NAME 'x' (2, 4) (2, 5) + OP ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a\\n' (2, 6) (3, 0) + OP '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'''" (3, 3) (3, 6) + """) + self.check_tokenize("""\ +f'''__{ + x:a + b + c + d +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + OP '{' (1, 6) (1, 7) + NL '\\n' (1, 7) (1, 8) + NAME 'x' (2, 4) (2, 5) + OP ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a\\n b\\n c\\n d\\n' (2, 6) (6, 0) + OP '}' (6, 0) (6, 1) + FSTRING_MIDDLE '__' (6, 1) (6, 3) + FSTRING_END "'''" (6, 3) (6, 6) + """) + + self.check_tokenize("""\ + '''Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli + aktualni pracownicy, obecni pracownicy''' +""", """\ + INDENT ' ' (1, 0) (1, 4) + STRING "'''Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli\\n aktualni pracownicy, obecni pracownicy'''" (1, 4) (2, 45) + NEWLINE '\\n' (2, 45) (2, 46) + DEDENT '' (3, 0) (3, 0) """) def test_function(self): @@ -945,29 +1167,95 @@ async def bar(): pass DEDENT '' (7, 0) (7, 0) """) + def test_newline_after_parenthesized_block_with_comment(self): + self.check_tokenize('''\ +[ + # A comment here + 1 +] +''', """\ + OP '[' (1, 0) (1, 1) + NL '\\n' (1, 1) (1, 2) + COMMENT '# A comment here' (2, 4) (2, 20) + NL '\\n' (2, 20) (2, 21) + NUMBER '1' (3, 4) (3, 5) + NL '\\n' (3, 5) (3, 6) + OP ']' (4, 0) (4, 1) + NEWLINE '\\n' (4, 1) (4, 2) + """) + + def test_closing_parenthesis_from_different_line(self): + self.check_tokenize("); x", """\ + OP ')' (1, 0) (1, 1) + OP ';' (1, 1) (1, 2) + NAME 'x' (1, 3) (1, 4) + """) + + def test_multiline_non_ascii_fstring(self): + self.check_tokenize("""\ +a = f''' + Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli'''""", """\ + NAME 'a' (1, 0) (1, 1) + OP '=' (1, 2) (1, 3) + FSTRING_START "f\'\'\'" (1, 4) (1, 8) + FSTRING_MIDDLE '\\n Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli' (1, 8) (2, 68) + FSTRING_END "\'\'\'" (2, 68) (2, 71) + """) + + def test_multiline_non_ascii_fstring_with_expr(self): + self.check_tokenize("""\ +f''' + 🔗 This is a test {test_arg1}🔗 +🔗'''""", """\ + FSTRING_START "f\'\'\'" (1, 0) (1, 4) + FSTRING_MIDDLE '\\n 🔗 This is a test ' (1, 4) (2, 21) + OP '{' (2, 21) (2, 22) + NAME 'test_arg1' (2, 22) (2, 31) + OP '}' (2, 31) (2, 32) + FSTRING_MIDDLE '🔗\\n🔗' (2, 32) (3, 1) + FSTRING_END "\'\'\'" (3, 1) (3, 4) + """) + + # gh-139516, the '\n' is explicit to ensure no trailing whitespace which would invalidate the test + self.check_tokenize('''f"{f(a=lambda: 'à'\n)}"''', """\ + FSTRING_START \'f"\' (1, 0) (1, 2) + OP '{' (1, 2) (1, 3) + NAME 'f' (1, 3) (1, 4) + OP '(' (1, 4) (1, 5) + NAME 'a' (1, 5) (1, 6) + OP '=' (1, 6) (1, 7) + NAME 'lambda' (1, 7) (1, 13) + OP ':' (1, 13) (1, 14) + STRING "\'à\'" (1, 15) (1, 18) + NL '\\n' (1, 18) (1, 19) + OP ')' (2, 0) (2, 1) + OP '}' (2, 1) (2, 2) + FSTRING_END \'"\' (2, 2) (2, 3) + """) + class GenerateTokensTest(TokenizeTest): def check_tokenize(self, s, expected): # Format the tokens in s in a table format. # The ENDMARKER and final NEWLINE are omitted. f = StringIO(s) - result = stringify_tokens_from_source(generate_tokens(f.readline), s) + result = stringify_tokens_from_source(tokenize.generate_tokens(f.readline), s) self.assertEqual(result, expected.rstrip().splitlines()) def decistmt(s): result = [] - g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string + g = tokenize.tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string for toknum, tokval, _, _, _ in g: - if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens + if toknum == tokenize.NUMBER and '.' in tokval: # replace NUMBER tokens result.extend([ - (NAME, 'Decimal'), - (OP, '('), - (STRING, repr(tokval)), - (OP, ')') + (tokenize.NAME, 'Decimal'), + (tokenize.OP, '('), + (tokenize.STRING, repr(tokval)), + (tokenize.OP, ')') ]) else: result.append((toknum, tokval)) - return untokenize(result).decode('utf-8') + return tokenize.untokenize(result).decode('utf-8').strip() class TestMisc(TestCase): @@ -991,6 +1279,13 @@ def test_decistmt(self): self.assertEqual(eval(decistmt(s)), Decimal('-3.217160342717258261933904529E-7')) + def test___all__(self): + expected = token.__all__ + [ + "TokenInfo", "TokenError", "generate_tokens", + "detect_encoding", "untokenize", "open", "tokenize", + ] + self.assertCountEqual(tokenize.__all__, expected) + class TestTokenizerAdheresToPep0263(TestCase): """ @@ -998,8 +1293,9 @@ class TestTokenizerAdheresToPep0263(TestCase): """ def _testFile(self, filename): - path = os.path.join(os.path.dirname(__file__), filename) - TestRoundtrip.check_roundtrip(self, open(path, 'rb')) + path = os.path.join(os.path.dirname(__file__), 'tokenizedata', filename) + with open(path, 'rb') as f: + TestRoundtrip.check_roundtrip(self, f) def test_utf8_coding_cookie_and_no_utf8_bom(self): f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt' @@ -1024,8 +1320,6 @@ def test_utf8_coding_cookie_and_utf8_bom(self): f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt' self._testFile(f) - # TODO: RUSTPYTHON - @unittest.expectedFailure # "bad_coding.py" and "bad_coding2.py" make the WASM CI fail def test_bad_coding_cookie(self): self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py') self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py') @@ -1041,33 +1335,18 @@ def readline(): nonlocal first if not first: first = True - return line + yield line else: - return b'' + yield b'' # skip the initial encoding token and the end tokens - tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2] - expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')] + tokens = list(tokenize._generate_tokens_from_c_tokenizer(readline().__next__, + encoding='utf-8', + extra_tokens=True))[:-2] + expected_tokens = [tokenize.TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')] self.assertEqual(tokens, expected_tokens, "bytes not decoded with encoding") - def test__tokenize_does_not_decode_with_encoding_none(self): - literal = '"ЉЊЈЁЂ"' - first = False - def readline(): - nonlocal first - if not first: - first = True - return literal - else: - return b'' - - # skip the end tokens - tokens = list(_tokenize(readline, encoding=None))[:-2] - expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')] - self.assertEqual(tokens, expected_tokens, - "string not tokenized when encoding is None") - class TestDetectEncoding(TestCase): @@ -1084,24 +1363,63 @@ def readline(): def test_no_bom_no_encoding_cookie(self): lines = ( - b'# something\n', + b'#!/home/\xc3\xa4/bin/python\n', + b'# something \xe2\x82\xac\n', b'print(something)\n', b'do_something(else)\n' ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'utf-8') self.assertEqual(consumed_lines, list(lines[:2])) + def test_no_bom_no_encoding_cookie_first_line_error(self): + lines = ( + b'#!/home/\xa4/bin/python\n\n', + b'print(something)\n', + b'do_something(else)\n' + ) + with self.assertRaises(SyntaxError): + tokenize.detect_encoding(self.get_readline(lines)) + + def test_no_bom_no_encoding_cookie_second_line_error(self): + lines = ( + b'#!/usr/bin/python\n', + b'# something \xe2\n', + b'print(something)\n', + b'do_something(else)\n' + ) + with self.assertRaises(SyntaxError): + tokenize.detect_encoding(self.get_readline(lines)) + def test_bom_no_cookie(self): lines = ( - b'\xef\xbb\xbf# something\n', + b'\xef\xbb\xbf#!/home/\xc3\xa4/bin/python\n', b'print(something)\n', b'do_something(else)\n' ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, - [b'# something\n', b'print(something)\n']) + [b'#!/home/\xc3\xa4/bin/python\n', b'print(something)\n']) + + def test_bom_no_cookie_first_line_error(self): + lines = ( + b'\xef\xbb\xbf#!/home/\xa4/bin/python\n', + b'print(something)\n', + b'do_something(else)\n' + ) + with self.assertRaises(SyntaxError): + tokenize.detect_encoding(self.get_readline(lines)) + + def test_bom_no_cookie_second_line_error(self): + lines = ( + b'\xef\xbb\xbf#!/usr/bin/python\n', + b'# something \xe2\n', + b'print(something)\n', + b'do_something(else)\n' + ) + with self.assertRaises(SyntaxError): + tokenize.detect_encoding(self.get_readline(lines)) def test_cookie_first_line_no_bom(self): lines = ( @@ -1109,7 +1427,7 @@ def test_cookie_first_line_no_bom(self): b'print(something)\n', b'do_something(else)\n' ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'iso-8859-1') self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n']) @@ -1119,7 +1437,7 @@ def test_matched_bom_and_cookie_first_line(self): b'print(something)\n', b'do_something(else)\n' ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, [b'# coding=utf-8\n']) @@ -1130,7 +1448,7 @@ def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self): b'do_something(else)\n' ) readline = self.get_readline(lines) - self.assertRaises(SyntaxError, detect_encoding, readline) + self.assertRaises(SyntaxError, tokenize.detect_encoding, readline) def test_cookie_second_line_no_bom(self): lines = ( @@ -1139,7 +1457,7 @@ def test_cookie_second_line_no_bom(self): b'print(something)\n', b'do_something(else)\n' ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'ascii') expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n'] self.assertEqual(consumed_lines, expected) @@ -1151,7 +1469,7 @@ def test_matched_bom_and_cookie_second_line(self): b'print(something)\n', b'do_something(else)\n' ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, [b'#! something\n', b'f# coding=utf-8\n']) @@ -1164,7 +1482,7 @@ def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self): b'do_something(else)\n' ) readline = self.get_readline(lines) - self.assertRaises(SyntaxError, detect_encoding, readline) + self.assertRaises(SyntaxError, tokenize.detect_encoding, readline) def test_cookie_second_line_noncommented_first_line(self): lines = ( @@ -1172,21 +1490,65 @@ def test_cookie_second_line_noncommented_first_line(self): b'# vim: set fileencoding=iso8859-15 :\n', b"print('\xe2\x82\xac')\n" ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'utf-8') expected = [b"print('\xc2\xa3')\n"] self.assertEqual(consumed_lines, expected) - def test_cookie_second_line_commented_first_line(self): + def test_first_non_utf8_coding_line(self): lines = ( - b"#print('\xc2\xa3')\n", - b'# vim: set fileencoding=iso8859-15 :\n', - b"print('\xe2\x82\xac')\n" + b'#coding:iso-8859-15 \xa4\n', + b'print(something)\n' ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) - self.assertEqual(encoding, 'iso8859-15') - expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n'] - self.assertEqual(consumed_lines, expected) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) + self.assertEqual(encoding, 'iso-8859-15') + self.assertEqual(consumed_lines, list(lines[:1])) + + def test_first_utf8_coding_line_error(self): + lines = ( + b'#coding:ascii \xc3\xa4\n', + b'print(something)\n' + ) + with self.assertRaises(SyntaxError): + tokenize.detect_encoding(self.get_readline(lines)) + + def test_second_non_utf8_coding_line(self): + lines = ( + b'#!/usr/bin/python\n', + b'#coding:iso-8859-15 \xa4\n', + b'print(something)\n' + ) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) + self.assertEqual(encoding, 'iso-8859-15') + self.assertEqual(consumed_lines, list(lines[:2])) + + def test_second_utf8_coding_line_error(self): + lines = ( + b'#!/usr/bin/python\n', + b'#coding:ascii \xc3\xa4\n', + b'print(something)\n' + ) + with self.assertRaises(SyntaxError): + tokenize.detect_encoding(self.get_readline(lines)) + + def test_non_utf8_shebang(self): + lines = ( + b'#!/home/\xa4/bin/python\n', + b'#coding:iso-8859-15\n', + b'print(something)\n' + ) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) + self.assertEqual(encoding, 'iso-8859-15') + self.assertEqual(consumed_lines, list(lines[:2])) + + def test_utf8_shebang_error(self): + lines = ( + b'#!/home/\xc3\xa4/bin/python\n', + b'#coding:ascii\n', + b'print(something)\n' + ) + with self.assertRaises(SyntaxError): + tokenize.detect_encoding(self.get_readline(lines)) def test_cookie_second_line_empty_first_line(self): lines = ( @@ -1194,13 +1556,77 @@ def test_cookie_second_line_empty_first_line(self): b'# vim: set fileencoding=iso8859-15 :\n', b"print('\xe2\x82\xac')\n" ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'iso8859-15') expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n'] self.assertEqual(consumed_lines, expected) + def test_cookie_third_line(self): + lines = ( + b'#!/home/\xc3\xa4/bin/python\n', + b'# something\n', + b'# vim: set fileencoding=ascii :\n', + b'print(something)\n', + b'do_something(else)\n' + ) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) + self.assertEqual(encoding, 'utf-8') + self.assertEqual(consumed_lines, list(lines[:2])) + + def test_double_coding_line(self): + # If the first line matches the second line is ignored. + lines = ( + b'#coding:iso8859-15\n', + b'#coding:latin1\n', + b'print(something)\n' + ) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) + self.assertEqual(encoding, 'iso8859-15') + self.assertEqual(consumed_lines, list(lines[:1])) + + def test_double_coding_same_line(self): + lines = ( + b'#coding:iso8859-15 coding:latin1\n', + b'print(something)\n' + ) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) + self.assertEqual(encoding, 'iso8859-15') + self.assertEqual(consumed_lines, list(lines[:1])) + + def test_double_coding_utf8(self): + lines = ( + b'#coding:utf-8\n', + b'#coding:latin1\n', + b'print(something)\n' + ) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) + self.assertEqual(encoding, 'utf-8') + self.assertEqual(consumed_lines, list(lines[:1])) + + def test_nul_in_first_coding_line(self): + lines = ( + b'#coding:iso8859-15\x00\n', + b'\n', + b'\n', + b'print(something)\n' + ) + with self.assertRaisesRegex(SyntaxError, + "source code cannot contain null bytes"): + tokenize.detect_encoding(self.get_readline(lines)) + + def test_nul_in_second_coding_line(self): + lines = ( + b'#!/usr/bin/python\n', + b'#coding:iso8859-15\x00\n', + b'\n', + b'print(something)\n' + ) + with self.assertRaisesRegex(SyntaxError, + "source code cannot contain null bytes"): + tokenize.detect_encoding(self.get_readline(lines)) + def test_latin1_normalization(self): - # See get_normal_name() in tokenizer.c. + # See get_normal_name() in Parser/tokenizer/helpers.c. encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix", "iso-8859-1-unix", "iso-latin-1-mac") for encoding in encodings: @@ -1211,21 +1637,20 @@ def test_latin1_normalization(self): b"print(things)\n", b"do_something += 4\n") rl = self.get_readline(lines) - found, consumed_lines = detect_encoding(rl) + found, consumed_lines = tokenize.detect_encoding(rl) self.assertEqual(found, "iso-8859-1") def test_syntaxerror_latin1(self): - # Issue 14629: need to raise SyntaxError if the first + # Issue 14629: need to raise TokenError if the first # line(s) have non-UTF-8 characters lines = ( b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S ) readline = self.get_readline(lines) - self.assertRaises(SyntaxError, detect_encoding, readline) - + self.assertRaises(SyntaxError, tokenize.detect_encoding, readline) def test_utf8_normalization(self): - # See get_normal_name() in tokenizer.c. + # See get_normal_name() in Parser/tokenizer/helpers.c. encodings = ("utf-8", "utf-8-mac", "utf-8-unix") for encoding in encodings: for rep in ("-", "_"): @@ -1234,39 +1659,40 @@ def test_utf8_normalization(self): b"# coding: " + enc.encode("ascii") + b"\n", b"1 + 3\n") rl = self.get_readline(lines) - found, consumed_lines = detect_encoding(rl) + found, consumed_lines = tokenize.detect_encoding(rl) self.assertEqual(found, "utf-8") def test_short_files(self): readline = self.get_readline((b'print(something)\n',)) - encoding, consumed_lines = detect_encoding(readline) + encoding, consumed_lines = tokenize.detect_encoding(readline) self.assertEqual(encoding, 'utf-8') self.assertEqual(consumed_lines, [b'print(something)\n']) - encoding, consumed_lines = detect_encoding(self.get_readline(())) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(())) self.assertEqual(encoding, 'utf-8') self.assertEqual(consumed_lines, []) readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',)) - encoding, consumed_lines = detect_encoding(readline) + encoding, consumed_lines = tokenize.detect_encoding(readline) self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, [b'print(something)\n']) readline = self.get_readline((b'\xef\xbb\xbf',)) - encoding, consumed_lines = detect_encoding(readline) + encoding, consumed_lines = tokenize.detect_encoding(readline) self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, []) readline = self.get_readline((b'# coding: bad\n',)) - self.assertRaises(SyntaxError, detect_encoding, readline) + self.assertRaises(SyntaxError, tokenize.detect_encoding, readline) def test_false_encoding(self): # Issue 18873: "Encoding" detected in non-comment lines readline = self.get_readline((b'print("#coding=fake")',)) - encoding, consumed_lines = detect_encoding(readline) + encoding, consumed_lines = tokenize.detect_encoding(readline) self.assertEqual(encoding, 'utf-8') self.assertEqual(consumed_lines, [b'print("#coding=fake")']) + @support.thread_unsafe def test_open(self): filename = os_helper.TESTFN + '.py' self.addCleanup(os_helper.unlink, filename) @@ -1276,14 +1702,14 @@ def test_open(self): with open(filename, 'w', encoding=encoding) as fp: print("# coding: %s" % encoding, file=fp) print("print('euro:\u20ac')", file=fp) - with tokenize_open(filename) as fp: + with tokenize.open(filename) as fp: self.assertEqual(fp.encoding, encoding) self.assertEqual(fp.mode, 'r') # test BOM (no coding cookie) with open(filename, 'w', encoding='utf-8-sig') as fp: print("print('euro:\u20ac')", file=fp) - with tokenize_open(filename) as fp: + with tokenize.open(filename) as fp: self.assertEqual(fp.encoding, 'utf-8-sig') self.assertEqual(fp.mode, 'r') @@ -1310,16 +1736,16 @@ def readline(self): ins = Bunk(lines, path) # Make sure lacking a name isn't an issue. del ins.name - detect_encoding(ins.readline) + tokenize.detect_encoding(ins.readline) with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)): ins = Bunk(lines, path) - detect_encoding(ins.readline) + tokenize.detect_encoding(ins.readline) def test_open_error(self): # Issue #23840: open() must close the binary file on error m = BytesIO(b'#coding:xxx') with mock.patch('tokenize._builtin_open', return_value=m): - self.assertRaises(SyntaxError, tokenize_open, 'foobar') + self.assertRaises(SyntaxError, tokenize.open, 'foobar') self.assertTrue(m.closed) @@ -1327,17 +1753,20 @@ class TestTokenize(TestCase): def test_tokenize(self): import tokenize as tokenize_module - encoding = object() + encoding = "utf-8" encoding_used = None def mock_detect_encoding(readline): return encoding, [b'first', b'second'] - def mock__tokenize(readline, encoding): + def mock__tokenize(readline, encoding, **kwargs): nonlocal encoding_used encoding_used = encoding out = [] while True: - next_line = readline() + try: + next_line = readline() + except StopIteration: + return out if next_line: out.append(next_line) continue @@ -1352,16 +1781,16 @@ def mock_readline(): return str(counter).encode() orig_detect_encoding = tokenize_module.detect_encoding - orig__tokenize = tokenize_module._tokenize + orig_c_token = tokenize_module._generate_tokens_from_c_tokenizer tokenize_module.detect_encoding = mock_detect_encoding - tokenize_module._tokenize = mock__tokenize + tokenize_module._generate_tokens_from_c_tokenizer = mock__tokenize try: - results = tokenize(mock_readline) - self.assertEqual(list(results), + results = tokenize.tokenize(mock_readline) + self.assertEqual(list(results)[1:], [b'first', b'second', b'1', b'2', b'3', b'4']) finally: tokenize_module.detect_encoding = orig_detect_encoding - tokenize_module._tokenize = orig__tokenize + tokenize_module._generate_tokens_from_c_tokenizer = orig_c_token self.assertEqual(encoding_used, encoding) @@ -1373,23 +1802,23 @@ def test_oneline_defs(self): buf = '\n'.join(buf) # Test that 500 consequent, one-line defs is OK - toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline)) + toks = list(tokenize.tokenize(BytesIO(buf.encode('utf-8')).readline)) self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER # [-2] is always NEWLINE def assertExactTypeEqual(self, opstr, *optypes): - tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline)) + tokens = list(tokenize.tokenize(BytesIO(opstr.encode('utf-8')).readline)) num_optypes = len(optypes) self.assertEqual(len(tokens), 3 + num_optypes) - self.assertEqual(tok_name[tokens[0].exact_type], - tok_name[ENCODING]) + self.assertEqual(tokenize.tok_name[tokens[0].exact_type], + tokenize.tok_name[tokenize.ENCODING]) for i in range(num_optypes): - self.assertEqual(tok_name[tokens[i + 1].exact_type], - tok_name[optypes[i]]) - self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type], - tok_name[token.NEWLINE]) - self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type], - tok_name[token.ENDMARKER]) + self.assertEqual(tokenize.tok_name[tokens[i + 1].exact_type], + tokenize.tok_name[optypes[i]]) + self.assertEqual(tokenize.tok_name[tokens[1 + num_optypes].exact_type], + tokenize.tok_name[token.NEWLINE]) + self.assertEqual(tokenize.tok_name[tokens[2 + num_optypes].exact_type], + tokenize.tok_name[token.ENDMARKER]) def test_exact_type(self): self.assertExactTypeEqual('()', token.LPAR, token.RPAR) @@ -1439,11 +1868,11 @@ def test_exact_type(self): self.assertExactTypeEqual('@=', token.ATEQUAL) self.assertExactTypeEqual('a**2+b**2==c**2', - NAME, token.DOUBLESTAR, NUMBER, + tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER, token.PLUS, - NAME, token.DOUBLESTAR, NUMBER, + tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER, token.EQEQUAL, - NAME, token.DOUBLESTAR, NUMBER) + tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER) self.assertExactTypeEqual('{1, 2, 3}', token.LBRACE, token.NUMBER, token.COMMA, @@ -1463,19 +1892,55 @@ def test_pathological_trailing_whitespace(self): def test_comment_at_the_end_of_the_source_without_newline(self): # See http://bugs.python.org/issue44667 source = 'b = 1\n\n#test' - expected_tokens = [token.NAME, token.EQUAL, token.NUMBER, token.NEWLINE, token.NL, token.COMMENT] + expected_tokens = [ + tokenize.TokenInfo(type=token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''), + tokenize.TokenInfo(type=token.NAME, string='b', start=(1, 0), end=(1, 1), line='b = 1\n'), + tokenize.TokenInfo(type=token.OP, string='=', start=(1, 2), end=(1, 3), line='b = 1\n'), + tokenize.TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'), + tokenize.TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'), + tokenize.TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'), + tokenize.TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test'), + tokenize.TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test'), + tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='') + ] + + tokens = list(tokenize.tokenize(BytesIO(source.encode('utf-8')).readline)) + self.assertEqual(tokens, expected_tokens) + + @unittest.expectedFailure # TODO: RUSTPYTHON; Diff is 869 characters long. Set self.maxDiff to None to see it. + def test_newline_and_space_at_the_end_of_the_source_without_newline(self): + # See https://github.com/python/cpython/issues/105435 + source = 'a\n ' + expected_tokens = [ + tokenize.TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''), + tokenize.TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'), + tokenize.TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'), + tokenize.TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' '), + tokenize.TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='') + ] + + tokens = list(tokenize.tokenize(BytesIO(source.encode('utf-8')).readline)) + self.assertEqual(tokens, expected_tokens) + + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: b'SyntaxError' not found in b'OSError: stream did not contain valid UTF-8\n' + def test_invalid_character_in_fstring_middle(self): + # See gh-103824 + script = b'''F""" + \xe5"""''' + + with os_helper.temp_dir() as temp_dir: + filename = os.path.join(temp_dir, "script.py") + with open(filename, 'wb') as file: + file.write(script) + rs, _ = run_python_until_end(filename) + self.assertIn(b"SyntaxError", rs.err) - tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline)) - self.assertEqual(tok_name[tokens[0].exact_type], tok_name[ENCODING]) - for i in range(6): - self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]]) - self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER]) class UntokenizeTest(TestCase): def test_bad_input_order(self): # raise if previous row - u = Untokenizer() + u = tokenize.Untokenizer() u.prev_row = 2 u.prev_col = 2 with self.assertRaises(ValueError) as cm: @@ -1487,7 +1952,7 @@ def test_bad_input_order(self): def test_backslash_continuation(self): # The problem is that \ leaves no token - u = Untokenizer() + u = tokenize.Untokenizer() u.prev_row = 1 u.prev_col = 1 u.tokens = [] @@ -1499,17 +1964,33 @@ def test_backslash_continuation(self): TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n') def test_iter_compat(self): - u = Untokenizer() - token = (NAME, 'Hello') - tokens = [(ENCODING, 'utf-8'), token] + u = tokenize.Untokenizer() + token = (tokenize.NAME, 'Hello') + tokens = [(tokenize.ENCODING, 'utf-8'), token] u.compat(token, iter([])) self.assertEqual(u.tokens, ["Hello "]) - u = Untokenizer() + u = tokenize.Untokenizer() self.assertEqual(u.untokenize(iter([token])), 'Hello ') - u = Untokenizer() + u = tokenize.Untokenizer() self.assertEqual(u.untokenize(iter(tokens)), 'Hello ') self.assertEqual(u.encoding, 'utf-8') - self.assertEqual(untokenize(iter(tokens)), b'Hello ') + self.assertEqual(tokenize.untokenize(iter(tokens)), b'Hello ') + + +def contains_ambiguous_backslash(source): + """Return `True` if the source contains a backslash on a + line by itself. For example: + + a = (1 + \\ + ) + + Code like this cannot be untokenized exactly. This is because + the tokenizer does not produce any tokens for the line containing + the backslash and so there is no way to know its indent. + """ + pattern = re.compile(br'\n\s*\\\r?\n') + return pattern.search(source) is not None class TestRoundtrip(TestCase): @@ -1522,6 +2003,9 @@ def check_roundtrip(self, f): tokenize.untokenize(), and the latter tokenized again to 2-tuples. The test fails if the 3 pair tokenizations do not match. + If the source code can be untokenized unambiguously, the + untokenized code must match the original code exactly. + When untokenize bugs are fixed, untokenize with 5-tuples should reproduce code that does not contain a backslash continuation following spaces. A proper test should test this. @@ -1531,21 +2015,38 @@ def check_roundtrip(self, f): code = f.encode('utf-8') else: code = f.read() - f.close() readline = iter(code.splitlines(keepends=True)).__next__ - tokens5 = list(tokenize(readline)) + tokens5 = list(tokenize.tokenize(readline)) tokens2 = [tok[:2] for tok in tokens5] # Reproduce tokens2 from pairs - bytes_from2 = untokenize(tokens2) + bytes_from2 = tokenize.untokenize(tokens2) readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__ - tokens2_from2 = [tok[:2] for tok in tokenize(readline2)] + tokens2_from2 = [tok[:2] for tok in tokenize.tokenize(readline2)] self.assertEqual(tokens2_from2, tokens2) # Reproduce tokens2 from 5-tuples - bytes_from5 = untokenize(tokens5) + bytes_from5 = tokenize.untokenize(tokens5) readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__ - tokens2_from5 = [tok[:2] for tok in tokenize(readline5)] + tokens2_from5 = [tok[:2] for tok in tokenize.tokenize(readline5)] self.assertEqual(tokens2_from5, tokens2) + if not contains_ambiguous_backslash(code): + # The BOM does not produce a token so there is no way to preserve it. + code_without_bom = code.removeprefix(b'\xef\xbb\xbf') + readline = iter(code_without_bom.splitlines(keepends=True)).__next__ + untokenized_code = tokenize.untokenize(tokenize.tokenize(readline)) + self.assertEqual(code_without_bom, untokenized_code) + + def check_line_extraction(self, f): + if isinstance(f, str): + code = f.encode('utf-8') + else: + code = f.read() + readline = iter(code.splitlines(keepends=True)).__next__ + for tok in tokenize.tokenize(readline): + if tok.type in {tokenize.ENCODING, tokenize.ENDMARKER}: + continue + self.assertEqual(tok.string, tok.line[tok.start[1]: tok.end[1]]) + def test_roundtrip(self): # There are some standard formatting practices that are easy to get right. @@ -1561,7 +2062,7 @@ def test_roundtrip(self): self.check_roundtrip("if x == 1 : \n" " print(x)\n") - fn = support.findfile("tokenize_tests.txt") + fn = support.findfile("tokenize_tests.txt", subdir="tokenizedata") with open(fn, 'rb') as f: self.check_roundtrip(f) self.check_roundtrip("if x == 1:\n" @@ -1585,6 +2086,67 @@ def test_roundtrip(self): " print('Can not import' # comment2\n)" "else: print('Loaded')\n") + self.check_roundtrip("f'\\N{EXCLAMATION MARK}'") + self.check_roundtrip(r"f'\\N{SNAKE}'") + self.check_roundtrip(r"f'\\N{{SNAKE}}'") + self.check_roundtrip(r"f'\N{SNAKE}'") + self.check_roundtrip(r"f'\\\N{SNAKE}'") + self.check_roundtrip(r"f'\\\\\N{SNAKE}'") + self.check_roundtrip(r"f'\\\\\\\N{SNAKE}'") + + self.check_roundtrip(r"f'\\N{1}'") + self.check_roundtrip(r"f'\\\\N{2}'") + self.check_roundtrip(r"f'\\\\\\N{3}'") + self.check_roundtrip(r"f'\\\\\\\\N{4}'") + + self.check_roundtrip(r"f'\\N{{'") + self.check_roundtrip(r"f'\\\\N{{'") + self.check_roundtrip(r"f'\\\\\\N{{'") + self.check_roundtrip(r"f'\\\\\\\\N{{'") + + self.check_roundtrip(r"f'\n{{foo}}'") + self.check_roundtrip(r"f'\\n{{foo}}'") + self.check_roundtrip(r"f'\\\n{{foo}}'") + self.check_roundtrip(r"f'\\\\n{{foo}}'") + + self.check_roundtrip(r"f'\t{{foo}}'") + self.check_roundtrip(r"f'\\t{{foo}}'") + self.check_roundtrip(r"f'\\\t{{foo}}'") + self.check_roundtrip(r"f'\\\\t{{foo}}'") + + self.check_roundtrip(r"rf'\t{{foo}}'") + self.check_roundtrip(r"rf'\\t{{foo}}'") + self.check_roundtrip(r"rf'\\\t{{foo}}'") + self.check_roundtrip(r"rf'\\\\t{{foo}}'") + + self.check_roundtrip(r"rf'\{{foo}}'") + self.check_roundtrip(r"f'\\{{foo}}'") + self.check_roundtrip(r"rf'\\\{{foo}}'") + self.check_roundtrip(r"f'\\\\{{foo}}'") + cases = [ + """ +if 1: + "foo" +"bar" +""", + """ +if 1: + ("foo" + "bar") +""", + """ +if 1: + "foo" + "bar" +""" ] + for case in cases: + self.check_roundtrip(case) + + self.check_roundtrip(r"t'{ {}}'") + self.check_roundtrip(r"t'{f'{ {}}'}{ {}}'") + self.check_roundtrip(r"f'{t'{ {}}'}{ {}}'") + + def test_continuation(self): # Balancing continuation self.check_roundtrip("a = (3,4, \n" @@ -1611,26 +2173,14 @@ def test_string_concatenation(self): # Two string literals on the same line self.check_roundtrip("'' ''") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_random_files(self): # Test roundtrip on random python modules. # pass the '-ucpu' option to process the full directory. import glob, random - fn = support.findfile("tokenize_tests.txt") - tempdir = os.path.dirname(fn) or os.curdir + tempdir = os.path.dirname(__file__) or os.curdir testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py")) - # Tokenize is broken on test_pep3131.py because regular expressions are - # broken on the obscure unicode identifiers in it. *sigh* - # With roundtrip extended to test the 5-tuple mode of untokenize, - # 7 more testfiles fail. Remove them also until the failure is diagnosed. - - testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py")) - for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'): - testfiles.remove(os.path.join(tempdir, "test_%s.py") % f) - if not support.is_resource_enabled("cpu"): testfiles = random.sample(testfiles, 10) @@ -1640,12 +2190,13 @@ def test_random_files(self): with open(testfile, 'rb') as f: with self.subTest(file=testfile): self.check_roundtrip(f) + self.check_line_extraction(f) def roundtrip(self, code): if isinstance(code, str): code = code.encode('utf-8') - return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8') + return tokenize.untokenize(tokenize.tokenize(BytesIO(code).readline)).decode('utf-8') def test_indentation_semantics_retained(self): """ @@ -1658,5 +2209,1279 @@ def test_indentation_semantics_retained(self): self.check_roundtrip(code) +class InvalidPythonTests(TestCase): + def test_number_followed_by_name(self): + # See issue #gh-105549 + source = "2sin(x)" + expected_tokens = [ + tokenize.TokenInfo(type=token.NUMBER, string='2', start=(1, 0), end=(1, 1), line='2sin(x)'), + tokenize.TokenInfo(type=token.NAME, string='sin', start=(1, 1), end=(1, 4), line='2sin(x)'), + tokenize.TokenInfo(type=token.OP, string='(', start=(1, 4), end=(1, 5), line='2sin(x)'), + tokenize.TokenInfo(type=token.NAME, string='x', start=(1, 5), end=(1, 6), line='2sin(x)'), + tokenize.TokenInfo(type=token.OP, string=')', start=(1, 6), end=(1, 7), line='2sin(x)'), + tokenize.TokenInfo(type=token.NEWLINE, string='', start=(1, 7), end=(1, 8), line='2sin(x)'), + tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='') + ] + + tokens = list(tokenize.generate_tokens(StringIO(source).readline)) + self.assertEqual(tokens, expected_tokens) + + @unittest.expectedFailure # TODO: RUSTPYTHON; Diff is 855 characters long. Set self.maxDiff to None to see it. + def test_number_starting_with_zero(self): + source = "01234" + expected_tokens = [ + tokenize.TokenInfo(type=token.NUMBER, string='01234', start=(1, 0), end=(1, 5), line='01234'), + tokenize.TokenInfo(type=token.NEWLINE, string='', start=(1, 5), end=(1, 6), line='01234'), + tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='') + ] + + tokens = list(tokenize.generate_tokens(StringIO(source).readline)) + self.assertEqual(tokens, expected_tokens) + +class CTokenizeTest(TestCase): + def check_tokenize(self, s, expected): + # Format the tokens in s in a table format. + # The ENDMARKER and final NEWLINE are omitted. + f = StringIO(s) + with self.subTest(source=s): + result = stringify_tokens_from_source( + tokenize._generate_tokens_from_c_tokenizer(f.readline), s + ) + self.assertEqual(result, expected.rstrip().splitlines()) + + def test_encoding(self): + def readline(encoding): + yield "1+1".encode(encoding) + + expected = [ + tokenize.TokenInfo(type=tokenize.NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1'), + tokenize.TokenInfo(type=tokenize.OP, string='+', start=(1, 1), end=(1, 2), line='1+1'), + tokenize.TokenInfo(type=tokenize.NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1'), + tokenize.TokenInfo(type=tokenize.NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1'), + tokenize.TokenInfo(type=tokenize.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='') + ] + for encoding in ["utf-8", "latin-1", "utf-16"]: + with self.subTest(encoding=encoding): + tokens = list(tokenize._generate_tokens_from_c_tokenizer( + readline(encoding).__next__, + extra_tokens=True, + encoding=encoding, + )) + self.assertEqual(tokens, expected) + + def test_int(self): + + self.check_tokenize('0xff <= 255', """\ + NUMBER '0xff' (1, 0) (1, 4) + LESSEQUAL '<=' (1, 5) (1, 7) + NUMBER '255' (1, 8) (1, 11) + """) + + self.check_tokenize('0b10 <= 255', """\ + NUMBER '0b10' (1, 0) (1, 4) + LESSEQUAL '<=' (1, 5) (1, 7) + NUMBER '255' (1, 8) (1, 11) + """) + + self.check_tokenize('0o123 <= 0O123', """\ + NUMBER '0o123' (1, 0) (1, 5) + LESSEQUAL '<=' (1, 6) (1, 8) + NUMBER '0O123' (1, 9) (1, 14) + """) + + self.check_tokenize('1234567 > ~0x15', """\ + NUMBER '1234567' (1, 0) (1, 7) + GREATER '>' (1, 8) (1, 9) + TILDE '~' (1, 10) (1, 11) + NUMBER '0x15' (1, 11) (1, 15) + """) + + self.check_tokenize('2134568 != 1231515', """\ + NUMBER '2134568' (1, 0) (1, 7) + NOTEQUAL '!=' (1, 8) (1, 10) + NUMBER '1231515' (1, 11) (1, 18) + """) + + self.check_tokenize('(-124561-1) & 200000000', """\ + LPAR '(' (1, 0) (1, 1) + MINUS '-' (1, 1) (1, 2) + NUMBER '124561' (1, 2) (1, 8) + MINUS '-' (1, 8) (1, 9) + NUMBER '1' (1, 9) (1, 10) + RPAR ')' (1, 10) (1, 11) + AMPER '&' (1, 12) (1, 13) + NUMBER '200000000' (1, 14) (1, 23) + """) + + self.check_tokenize('0xdeadbeef != -1', """\ + NUMBER '0xdeadbeef' (1, 0) (1, 10) + NOTEQUAL '!=' (1, 11) (1, 13) + MINUS '-' (1, 14) (1, 15) + NUMBER '1' (1, 15) (1, 16) + """) + + self.check_tokenize('0xdeadc0de & 12345', """\ + NUMBER '0xdeadc0de' (1, 0) (1, 10) + AMPER '&' (1, 11) (1, 12) + NUMBER '12345' (1, 13) (1, 18) + """) + + self.check_tokenize('0xFF & 0x15 | 1234', """\ + NUMBER '0xFF' (1, 0) (1, 4) + AMPER '&' (1, 5) (1, 6) + NUMBER '0x15' (1, 7) (1, 11) + VBAR '|' (1, 12) (1, 13) + NUMBER '1234' (1, 14) (1, 18) + """) + + def test_float(self): + + self.check_tokenize('x = 3.14159', """\ + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + NUMBER '3.14159' (1, 4) (1, 11) + """) + + self.check_tokenize('x = 314159.', """\ + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + NUMBER '314159.' (1, 4) (1, 11) + """) + + self.check_tokenize('x = .314159', """\ + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + NUMBER '.314159' (1, 4) (1, 11) + """) + + self.check_tokenize('x = 3e14159', """\ + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + NUMBER '3e14159' (1, 4) (1, 11) + """) + + self.check_tokenize('x = 3E123', """\ + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + NUMBER '3E123' (1, 4) (1, 9) + """) + + self.check_tokenize('x+y = 3e-1230', """\ + NAME 'x' (1, 0) (1, 1) + PLUS '+' (1, 1) (1, 2) + NAME 'y' (1, 2) (1, 3) + EQUAL '=' (1, 4) (1, 5) + NUMBER '3e-1230' (1, 6) (1, 13) + """) + + self.check_tokenize('x = 3.14e159', """\ + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + NUMBER '3.14e159' (1, 4) (1, 12) + """) + + def test_string(self): + + self.check_tokenize('x = \'\'; y = ""', """\ + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + STRING "''" (1, 4) (1, 6) + SEMI ';' (1, 6) (1, 7) + NAME 'y' (1, 8) (1, 9) + EQUAL '=' (1, 10) (1, 11) + STRING '""' (1, 12) (1, 14) + """) + + self.check_tokenize('x = \'"\'; y = "\'"', """\ + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + STRING '\\'"\\'' (1, 4) (1, 7) + SEMI ';' (1, 7) (1, 8) + NAME 'y' (1, 9) (1, 10) + EQUAL '=' (1, 11) (1, 12) + STRING '"\\'"' (1, 13) (1, 16) + """) + + self.check_tokenize('x = "doesn\'t "shrink", does it"', """\ + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + STRING '"doesn\\'t "' (1, 4) (1, 14) + NAME 'shrink' (1, 14) (1, 20) + STRING '", does it"' (1, 20) (1, 31) + """) + + self.check_tokenize("x = 'abc' + 'ABC'", """\ + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + STRING "'abc'" (1, 4) (1, 9) + PLUS '+' (1, 10) (1, 11) + STRING "'ABC'" (1, 12) (1, 17) + """) + + self.check_tokenize('y = "ABC" + "ABC"', """\ + NAME 'y' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + STRING '"ABC"' (1, 4) (1, 9) + PLUS '+' (1, 10) (1, 11) + STRING '"ABC"' (1, 12) (1, 17) + """) + + self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\ + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + STRING "r'abc'" (1, 4) (1, 10) + PLUS '+' (1, 11) (1, 12) + STRING "r'ABC'" (1, 13) (1, 19) + PLUS '+' (1, 20) (1, 21) + STRING "R'ABC'" (1, 22) (1, 28) + PLUS '+' (1, 29) (1, 30) + STRING "R'ABC'" (1, 31) (1, 37) + """) + + self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\ + NAME 'y' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + STRING 'r"abc"' (1, 4) (1, 10) + PLUS '+' (1, 11) (1, 12) + STRING 'r"ABC"' (1, 13) (1, 19) + PLUS '+' (1, 20) (1, 21) + STRING 'R"ABC"' (1, 22) (1, 28) + PLUS '+' (1, 29) (1, 30) + STRING 'R"ABC"' (1, 31) (1, 37) + """) + + self.check_tokenize("u'abc' + U'abc'", """\ + STRING "u'abc'" (1, 0) (1, 6) + PLUS '+' (1, 7) (1, 8) + STRING "U'abc'" (1, 9) (1, 15) + """) + + self.check_tokenize('u"abc" + U"abc"', """\ + STRING 'u"abc"' (1, 0) (1, 6) + PLUS '+' (1, 7) (1, 8) + STRING 'U"abc"' (1, 9) (1, 15) + """) + + self.check_tokenize("b'abc' + B'abc'", """\ + STRING "b'abc'" (1, 0) (1, 6) + PLUS '+' (1, 7) (1, 8) + STRING "B'abc'" (1, 9) (1, 15) + """) + + self.check_tokenize('b"abc" + B"abc"', """\ + STRING 'b"abc"' (1, 0) (1, 6) + PLUS '+' (1, 7) (1, 8) + STRING 'B"abc"' (1, 9) (1, 15) + """) + + self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\ + STRING "br'abc'" (1, 0) (1, 7) + PLUS '+' (1, 8) (1, 9) + STRING "bR'abc'" (1, 10) (1, 17) + PLUS '+' (1, 18) (1, 19) + STRING "Br'abc'" (1, 20) (1, 27) + PLUS '+' (1, 28) (1, 29) + STRING "BR'abc'" (1, 30) (1, 37) + """) + + self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\ + STRING 'br"abc"' (1, 0) (1, 7) + PLUS '+' (1, 8) (1, 9) + STRING 'bR"abc"' (1, 10) (1, 17) + PLUS '+' (1, 18) (1, 19) + STRING 'Br"abc"' (1, 20) (1, 27) + PLUS '+' (1, 28) (1, 29) + STRING 'BR"abc"' (1, 30) (1, 37) + """) + + self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\ + STRING "rb'abc'" (1, 0) (1, 7) + PLUS '+' (1, 8) (1, 9) + STRING "rB'abc'" (1, 10) (1, 17) + PLUS '+' (1, 18) (1, 19) + STRING "Rb'abc'" (1, 20) (1, 27) + PLUS '+' (1, 28) (1, 29) + STRING "RB'abc'" (1, 30) (1, 37) + """) + + self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\ + STRING 'rb"abc"' (1, 0) (1, 7) + PLUS '+' (1, 8) (1, 9) + STRING 'rB"abc"' (1, 10) (1, 17) + PLUS '+' (1, 18) (1, 19) + STRING 'Rb"abc"' (1, 20) (1, 27) + PLUS '+' (1, 28) (1, 29) + STRING 'RB"abc"' (1, 30) (1, 37) + """) + + self.check_tokenize('"a\\\nde\\\nfg"', """\ + STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3) + """) + + self.check_tokenize('u"a\\\nde"', """\ + STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3) + """) + + self.check_tokenize('rb"a\\\nd"', """\ + STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2) + """) + + self.check_tokenize(r'"""a\ +b"""', """\ + STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4) + """) + self.check_tokenize(r'u"""a\ +b"""', """\ + STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4) + """) + self.check_tokenize(r'rb"""a\ +b\ +c"""', """\ + STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4) + """) + + self.check_tokenize(r'"hola\\\r\ndfgf"', """\ + STRING \'"hola\\\\\\\\\\\\r\\\\ndfgf"\' (1, 0) (1, 16) + """) + + self.check_tokenize('f"abc"', """\ + FSTRING_START 'f"' (1, 0) (1, 2) + FSTRING_MIDDLE 'abc' (1, 2) (1, 5) + FSTRING_END '"' (1, 5) (1, 6) + """) + + self.check_tokenize('fR"a{b}c"', """\ + FSTRING_START 'fR"' (1, 0) (1, 3) + FSTRING_MIDDLE 'a' (1, 3) (1, 4) + LBRACE '{' (1, 4) (1, 5) + NAME 'b' (1, 5) (1, 6) + RBRACE '}' (1, 6) (1, 7) + FSTRING_MIDDLE 'c' (1, 7) (1, 8) + FSTRING_END '"' (1, 8) (1, 9) + """) + + self.check_tokenize('f"""abc"""', """\ + FSTRING_START 'f\"""' (1, 0) (1, 4) + FSTRING_MIDDLE 'abc' (1, 4) (1, 7) + FSTRING_END '\"""' (1, 7) (1, 10) + """) + + self.check_tokenize(r'f"abc\ +def"', """\ + FSTRING_START \'f"\' (1, 0) (1, 2) + FSTRING_MIDDLE 'abc\\\\\\ndef' (1, 2) (2, 3) + FSTRING_END '"' (2, 3) (2, 4) + """) + + self.check_tokenize('''\ +f"{ +a}"''', """\ + FSTRING_START 'f"' (1, 0) (1, 2) + LBRACE '{' (1, 2) (1, 3) + NAME 'a' (2, 0) (2, 1) + RBRACE '}' (2, 1) (2, 2) + FSTRING_END '"' (2, 2) (2, 3) + """) + + self.check_tokenize(r'Rf"abc\ +def"', """\ + FSTRING_START 'Rf"' (1, 0) (1, 3) + FSTRING_MIDDLE 'abc\\\\\\ndef' (1, 3) (2, 3) + FSTRING_END '"' (2, 3) (2, 4) + """) + + self.check_tokenize(r'f"hola\\\r\ndfgf"', """\ + FSTRING_START \'f"\' (1, 0) (1, 2) + FSTRING_MIDDLE 'hola\\\\\\\\\\\\r\\\\ndfgf' (1, 2) (1, 16) + FSTRING_END \'"\' (1, 16) (1, 17) + """) + + self.check_tokenize("""\ +f'''__{ + x:a +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + LBRACE '{' (1, 6) (1, 7) + NAME 'x' (2, 4) (2, 5) + COLON ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a\\n' (2, 6) (3, 0) + RBRACE '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'''" (3, 3) (3, 6) + """) + + self.check_tokenize("""\ +f'''__{ + x:a + b + c + d +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + LBRACE '{' (1, 6) (1, 7) + NAME 'x' (2, 4) (2, 5) + COLON ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a\\n b\\n c\\n d\\n' (2, 6) (6, 0) + RBRACE '}' (6, 0) (6, 1) + FSTRING_MIDDLE '__' (6, 1) (6, 3) + FSTRING_END "'''" (6, 3) (6, 6) + """) + + def test_function(self): + + self.check_tokenize('def d22(a, b, c=2, d=2, *k): pass', """\ + NAME 'def' (1, 0) (1, 3) + NAME 'd22' (1, 4) (1, 7) + LPAR '(' (1, 7) (1, 8) + NAME 'a' (1, 8) (1, 9) + COMMA ',' (1, 9) (1, 10) + NAME 'b' (1, 11) (1, 12) + COMMA ',' (1, 12) (1, 13) + NAME 'c' (1, 14) (1, 15) + EQUAL '=' (1, 15) (1, 16) + NUMBER '2' (1, 16) (1, 17) + COMMA ',' (1, 17) (1, 18) + NAME 'd' (1, 19) (1, 20) + EQUAL '=' (1, 20) (1, 21) + NUMBER '2' (1, 21) (1, 22) + COMMA ',' (1, 22) (1, 23) + STAR '*' (1, 24) (1, 25) + NAME 'k' (1, 25) (1, 26) + RPAR ')' (1, 26) (1, 27) + COLON ':' (1, 27) (1, 28) + NAME 'pass' (1, 29) (1, 33) + """) + + self.check_tokenize('def d01v_(a=1, *k, **w): pass', """\ + NAME 'def' (1, 0) (1, 3) + NAME 'd01v_' (1, 4) (1, 9) + LPAR '(' (1, 9) (1, 10) + NAME 'a' (1, 10) (1, 11) + EQUAL '=' (1, 11) (1, 12) + NUMBER '1' (1, 12) (1, 13) + COMMA ',' (1, 13) (1, 14) + STAR '*' (1, 15) (1, 16) + NAME 'k' (1, 16) (1, 17) + COMMA ',' (1, 17) (1, 18) + DOUBLESTAR '**' (1, 19) (1, 21) + NAME 'w' (1, 21) (1, 22) + RPAR ')' (1, 22) (1, 23) + COLON ':' (1, 23) (1, 24) + NAME 'pass' (1, 25) (1, 29) + """) + + self.check_tokenize('def d23(a: str, b: int=3) -> int: pass', """\ + NAME 'def' (1, 0) (1, 3) + NAME 'd23' (1, 4) (1, 7) + LPAR '(' (1, 7) (1, 8) + NAME 'a' (1, 8) (1, 9) + COLON ':' (1, 9) (1, 10) + NAME 'str' (1, 11) (1, 14) + COMMA ',' (1, 14) (1, 15) + NAME 'b' (1, 16) (1, 17) + COLON ':' (1, 17) (1, 18) + NAME 'int' (1, 19) (1, 22) + EQUAL '=' (1, 22) (1, 23) + NUMBER '3' (1, 23) (1, 24) + RPAR ')' (1, 24) (1, 25) + RARROW '->' (1, 26) (1, 28) + NAME 'int' (1, 29) (1, 32) + COLON ':' (1, 32) (1, 33) + NAME 'pass' (1, 34) (1, 38) + """) + + def test_comparison(self): + + self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " + "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\ + NAME 'if' (1, 0) (1, 2) + NUMBER '1' (1, 3) (1, 4) + LESS '<' (1, 5) (1, 6) + NUMBER '1' (1, 7) (1, 8) + GREATER '>' (1, 9) (1, 10) + NUMBER '1' (1, 11) (1, 12) + EQEQUAL '==' (1, 13) (1, 15) + NUMBER '1' (1, 16) (1, 17) + GREATEREQUAL '>=' (1, 18) (1, 20) + NUMBER '5' (1, 21) (1, 22) + LESSEQUAL '<=' (1, 23) (1, 25) + NUMBER '0x15' (1, 26) (1, 30) + LESSEQUAL '<=' (1, 31) (1, 33) + NUMBER '0x12' (1, 34) (1, 38) + NOTEQUAL '!=' (1, 39) (1, 41) + NUMBER '1' (1, 42) (1, 43) + NAME 'and' (1, 44) (1, 47) + NUMBER '5' (1, 48) (1, 49) + NAME 'in' (1, 50) (1, 52) + NUMBER '1' (1, 53) (1, 54) + NAME 'not' (1, 55) (1, 58) + NAME 'in' (1, 59) (1, 61) + NUMBER '1' (1, 62) (1, 63) + NAME 'is' (1, 64) (1, 66) + NUMBER '1' (1, 67) (1, 68) + NAME 'or' (1, 69) (1, 71) + NUMBER '5' (1, 72) (1, 73) + NAME 'is' (1, 74) (1, 76) + NAME 'not' (1, 77) (1, 80) + NUMBER '1' (1, 81) (1, 82) + COLON ':' (1, 82) (1, 83) + NAME 'pass' (1, 84) (1, 88) + """) + + def test_additive(self): + + self.check_tokenize('x = 1 - y + 15 - 1 + 0x124 + z + a[5]', """\ + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + NUMBER '1' (1, 4) (1, 5) + MINUS '-' (1, 6) (1, 7) + NAME 'y' (1, 8) (1, 9) + PLUS '+' (1, 10) (1, 11) + NUMBER '15' (1, 12) (1, 14) + MINUS '-' (1, 15) (1, 16) + NUMBER '1' (1, 17) (1, 18) + PLUS '+' (1, 19) (1, 20) + NUMBER '0x124' (1, 21) (1, 26) + PLUS '+' (1, 27) (1, 28) + NAME 'z' (1, 29) (1, 30) + PLUS '+' (1, 31) (1, 32) + NAME 'a' (1, 33) (1, 34) + LSQB '[' (1, 34) (1, 35) + NUMBER '5' (1, 35) (1, 36) + RSQB ']' (1, 36) (1, 37) + """) + + def test_multiplicative(self): + + self.check_tokenize('x = 1//1*1/5*12%0x12@42', """\ + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + NUMBER '1' (1, 4) (1, 5) + DOUBLESLASH '//' (1, 5) (1, 7) + NUMBER '1' (1, 7) (1, 8) + STAR '*' (1, 8) (1, 9) + NUMBER '1' (1, 9) (1, 10) + SLASH '/' (1, 10) (1, 11) + NUMBER '5' (1, 11) (1, 12) + STAR '*' (1, 12) (1, 13) + NUMBER '12' (1, 13) (1, 15) + PERCENT '%' (1, 15) (1, 16) + NUMBER '0x12' (1, 16) (1, 20) + AT '@' (1, 20) (1, 21) + NUMBER '42' (1, 21) (1, 23) + """) + + def test_unary(self): + + self.check_tokenize('~1 ^ 1 & 1 |1 ^ -1', """\ + TILDE '~' (1, 0) (1, 1) + NUMBER '1' (1, 1) (1, 2) + CIRCUMFLEX '^' (1, 3) (1, 4) + NUMBER '1' (1, 5) (1, 6) + AMPER '&' (1, 7) (1, 8) + NUMBER '1' (1, 9) (1, 10) + VBAR '|' (1, 11) (1, 12) + NUMBER '1' (1, 12) (1, 13) + CIRCUMFLEX '^' (1, 14) (1, 15) + MINUS '-' (1, 16) (1, 17) + NUMBER '1' (1, 17) (1, 18) + """) + + self.check_tokenize('-1*1/1+1*1//1 - ---1**1', """\ + MINUS '-' (1, 0) (1, 1) + NUMBER '1' (1, 1) (1, 2) + STAR '*' (1, 2) (1, 3) + NUMBER '1' (1, 3) (1, 4) + SLASH '/' (1, 4) (1, 5) + NUMBER '1' (1, 5) (1, 6) + PLUS '+' (1, 6) (1, 7) + NUMBER '1' (1, 7) (1, 8) + STAR '*' (1, 8) (1, 9) + NUMBER '1' (1, 9) (1, 10) + DOUBLESLASH '//' (1, 10) (1, 12) + NUMBER '1' (1, 12) (1, 13) + MINUS '-' (1, 14) (1, 15) + MINUS '-' (1, 16) (1, 17) + MINUS '-' (1, 17) (1, 18) + MINUS '-' (1, 18) (1, 19) + NUMBER '1' (1, 19) (1, 20) + DOUBLESTAR '**' (1, 20) (1, 22) + NUMBER '1' (1, 22) (1, 23) + """) + + def test_selector(self): + + self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\ + NAME 'import' (1, 0) (1, 6) + NAME 'sys' (1, 7) (1, 10) + COMMA ',' (1, 10) (1, 11) + NAME 'time' (1, 12) (1, 16) + NEWLINE '' (1, 16) (1, 16) + NAME 'x' (2, 0) (2, 1) + EQUAL '=' (2, 2) (2, 3) + NAME 'sys' (2, 4) (2, 7) + DOT '.' (2, 7) (2, 8) + NAME 'modules' (2, 8) (2, 15) + LSQB '[' (2, 15) (2, 16) + STRING "'time'" (2, 16) (2, 22) + RSQB ']' (2, 22) (2, 23) + DOT '.' (2, 23) (2, 24) + NAME 'time' (2, 24) (2, 28) + LPAR '(' (2, 28) (2, 29) + RPAR ')' (2, 29) (2, 30) + """) + + def test_method(self): + + self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\ + AT '@' (1, 0) (1, 1) + NAME 'staticmethod' (1, 1) (1, 13) + NEWLINE '' (1, 13) (1, 13) + NAME 'def' (2, 0) (2, 3) + NAME 'foo' (2, 4) (2, 7) + LPAR '(' (2, 7) (2, 8) + NAME 'x' (2, 8) (2, 9) + COMMA ',' (2, 9) (2, 10) + NAME 'y' (2, 10) (2, 11) + RPAR ')' (2, 11) (2, 12) + COLON ':' (2, 12) (2, 13) + NAME 'pass' (2, 14) (2, 18) + """) + + def test_tabs(self): + + self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\ + AT '@' (1, 0) (1, 1) + NAME 'staticmethod' (1, 1) (1, 13) + NEWLINE '' (1, 13) (1, 13) + NAME 'def' (2, 0) (2, 3) + NAME 'foo' (2, 4) (2, 7) + LPAR '(' (2, 7) (2, 8) + NAME 'x' (2, 8) (2, 9) + COMMA ',' (2, 9) (2, 10) + NAME 'y' (2, 10) (2, 11) + RPAR ')' (2, 11) (2, 12) + COLON ':' (2, 12) (2, 13) + NAME 'pass' (2, 14) (2, 18) + """) + + @unittest.expectedFailure # TODO: RUSTPYTHON + def test_async(self): + + self.check_tokenize('async = 1', """\ + NAME 'async' (1, 0) (1, 5) + EQUAL '=' (1, 6) (1, 7) + NUMBER '1' (1, 8) (1, 9) + """) + + self.check_tokenize('a = (async = 1)', """\ + NAME 'a' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + LPAR '(' (1, 4) (1, 5) + NAME 'async' (1, 5) (1, 10) + EQUAL '=' (1, 11) (1, 12) + NUMBER '1' (1, 13) (1, 14) + RPAR ')' (1, 14) (1, 15) + """) + + self.check_tokenize('async()', """\ + NAME 'async' (1, 0) (1, 5) + LPAR '(' (1, 5) (1, 6) + RPAR ')' (1, 6) (1, 7) + """) + + self.check_tokenize('class async(Bar):pass', """\ + NAME 'class' (1, 0) (1, 5) + NAME 'async' (1, 6) (1, 11) + LPAR '(' (1, 11) (1, 12) + NAME 'Bar' (1, 12) (1, 15) + RPAR ')' (1, 15) (1, 16) + COLON ':' (1, 16) (1, 17) + NAME 'pass' (1, 17) (1, 21) + """) + + self.check_tokenize('class async:pass', """\ + NAME 'class' (1, 0) (1, 5) + NAME 'async' (1, 6) (1, 11) + COLON ':' (1, 11) (1, 12) + NAME 'pass' (1, 12) (1, 16) + """) + + self.check_tokenize('await = 1', """\ + NAME 'await' (1, 0) (1, 5) + EQUAL '=' (1, 6) (1, 7) + NUMBER '1' (1, 8) (1, 9) + """) + + self.check_tokenize('foo.async', """\ + NAME 'foo' (1, 0) (1, 3) + DOT '.' (1, 3) (1, 4) + NAME 'async' (1, 4) (1, 9) + """) + + self.check_tokenize('async for a in b: pass', """\ + NAME 'async' (1, 0) (1, 5) + NAME 'for' (1, 6) (1, 9) + NAME 'a' (1, 10) (1, 11) + NAME 'in' (1, 12) (1, 14) + NAME 'b' (1, 15) (1, 16) + COLON ':' (1, 16) (1, 17) + NAME 'pass' (1, 18) (1, 22) + """) + + self.check_tokenize('async with a as b: pass', """\ + NAME 'async' (1, 0) (1, 5) + NAME 'with' (1, 6) (1, 10) + NAME 'a' (1, 11) (1, 12) + NAME 'as' (1, 13) (1, 15) + NAME 'b' (1, 16) (1, 17) + COLON ':' (1, 17) (1, 18) + NAME 'pass' (1, 19) (1, 23) + """) + + self.check_tokenize('async.foo', """\ + NAME 'async' (1, 0) (1, 5) + DOT '.' (1, 5) (1, 6) + NAME 'foo' (1, 6) (1, 9) + """) + + self.check_tokenize('async', """\ + NAME 'async' (1, 0) (1, 5) + """) + + self.check_tokenize('async\n#comment\nawait', """\ + NAME 'async' (1, 0) (1, 5) + NEWLINE '' (1, 5) (1, 5) + NAME 'await' (3, 0) (3, 5) + """) + + self.check_tokenize('async\n...\nawait', """\ + NAME 'async' (1, 0) (1, 5) + NEWLINE '' (1, 5) (1, 5) + ELLIPSIS '...' (2, 0) (2, 3) + NEWLINE '' (2, 3) (2, 3) + NAME 'await' (3, 0) (3, 5) + """) + + self.check_tokenize('async\nawait', """\ + NAME 'async' (1, 0) (1, 5) + NEWLINE '' (1, 5) (1, 5) + NAME 'await' (2, 0) (2, 5) + """) + + self.check_tokenize('foo.async + 1', """\ + NAME 'foo' (1, 0) (1, 3) + DOT '.' (1, 3) (1, 4) + NAME 'async' (1, 4) (1, 9) + PLUS '+' (1, 10) (1, 11) + NUMBER '1' (1, 12) (1, 13) + """) + + self.check_tokenize('async def foo(): pass', """\ + NAME 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'foo' (1, 10) (1, 13) + LPAR '(' (1, 13) (1, 14) + RPAR ')' (1, 14) (1, 15) + COLON ':' (1, 15) (1, 16) + NAME 'pass' (1, 17) (1, 21) + """) + + self.check_tokenize('''\ +async def foo(): + def foo(await): + await = 1 + if 1: + await +async += 1 +''', """\ + NAME 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'foo' (1, 10) (1, 13) + LPAR '(' (1, 13) (1, 14) + RPAR ')' (1, 14) (1, 15) + COLON ':' (1, 15) (1, 16) + NEWLINE '' (1, 16) (1, 16) + INDENT '' (2, -1) (2, -1) + NAME 'def' (2, 2) (2, 5) + NAME 'foo' (2, 6) (2, 9) + LPAR '(' (2, 9) (2, 10) + NAME 'await' (2, 10) (2, 15) + RPAR ')' (2, 15) (2, 16) + COLON ':' (2, 16) (2, 17) + NEWLINE '' (2, 17) (2, 17) + INDENT '' (3, -1) (3, -1) + NAME 'await' (3, 4) (3, 9) + EQUAL '=' (3, 10) (3, 11) + NUMBER '1' (3, 12) (3, 13) + NEWLINE '' (3, 13) (3, 13) + DEDENT '' (4, -1) (4, -1) + NAME 'if' (4, 2) (4, 4) + NUMBER '1' (4, 5) (4, 6) + COLON ':' (4, 6) (4, 7) + NEWLINE '' (4, 7) (4, 7) + INDENT '' (5, -1) (5, -1) + NAME 'await' (5, 4) (5, 9) + NEWLINE '' (5, 9) (5, 9) + DEDENT '' (6, -1) (6, -1) + DEDENT '' (6, -1) (6, -1) + NAME 'async' (6, 0) (6, 5) + PLUSEQUAL '+=' (6, 6) (6, 8) + NUMBER '1' (6, 9) (6, 10) + NEWLINE '' (6, 10) (6, 10) + """) + + self.check_tokenize('async def foo():\n async for i in 1: pass', """\ + NAME 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'foo' (1, 10) (1, 13) + LPAR '(' (1, 13) (1, 14) + RPAR ')' (1, 14) (1, 15) + COLON ':' (1, 15) (1, 16) + NEWLINE '' (1, 16) (1, 16) + INDENT '' (2, -1) (2, -1) + NAME 'async' (2, 2) (2, 7) + NAME 'for' (2, 8) (2, 11) + NAME 'i' (2, 12) (2, 13) + NAME 'in' (2, 14) (2, 16) + NUMBER '1' (2, 17) (2, 18) + COLON ':' (2, 18) (2, 19) + NAME 'pass' (2, 20) (2, 24) + DEDENT '' (2, -1) (2, -1) + """) + + self.check_tokenize('async def foo(async): await', """\ + NAME 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'foo' (1, 10) (1, 13) + LPAR '(' (1, 13) (1, 14) + NAME 'async' (1, 14) (1, 19) + RPAR ')' (1, 19) (1, 20) + COLON ':' (1, 20) (1, 21) + NAME 'await' (1, 22) (1, 27) + """) + + self.check_tokenize('''\ +def f(): + + def baz(): pass + async def bar(): pass + + await = 2''', """\ + NAME 'def' (1, 0) (1, 3) + NAME 'f' (1, 4) (1, 5) + LPAR '(' (1, 5) (1, 6) + RPAR ')' (1, 6) (1, 7) + COLON ':' (1, 7) (1, 8) + NEWLINE '' (1, 8) (1, 8) + INDENT '' (3, -1) (3, -1) + NAME 'def' (3, 2) (3, 5) + NAME 'baz' (3, 6) (3, 9) + LPAR '(' (3, 9) (3, 10) + RPAR ')' (3, 10) (3, 11) + COLON ':' (3, 11) (3, 12) + NAME 'pass' (3, 13) (3, 17) + NEWLINE '' (3, 17) (3, 17) + NAME 'async' (4, 2) (4, 7) + NAME 'def' (4, 8) (4, 11) + NAME 'bar' (4, 12) (4, 15) + LPAR '(' (4, 15) (4, 16) + RPAR ')' (4, 16) (4, 17) + COLON ':' (4, 17) (4, 18) + NAME 'pass' (4, 19) (4, 23) + NEWLINE '' (4, 23) (4, 23) + NAME 'await' (6, 2) (6, 7) + EQUAL '=' (6, 8) (6, 9) + NUMBER '2' (6, 10) (6, 11) + DEDENT '' (6, -1) (6, -1) + """) + + self.check_tokenize('''\ +async def f(): + + def baz(): pass + async def bar(): pass + + await = 2''', """\ + NAME 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'f' (1, 10) (1, 11) + LPAR '(' (1, 11) (1, 12) + RPAR ')' (1, 12) (1, 13) + COLON ':' (1, 13) (1, 14) + NEWLINE '' (1, 14) (1, 14) + INDENT '' (3, -1) (3, -1) + NAME 'def' (3, 2) (3, 5) + NAME 'baz' (3, 6) (3, 9) + LPAR '(' (3, 9) (3, 10) + RPAR ')' (3, 10) (3, 11) + COLON ':' (3, 11) (3, 12) + NAME 'pass' (3, 13) (3, 17) + NEWLINE '' (3, 17) (3, 17) + NAME 'async' (4, 2) (4, 7) + NAME 'def' (4, 8) (4, 11) + NAME 'bar' (4, 12) (4, 15) + LPAR '(' (4, 15) (4, 16) + RPAR ')' (4, 16) (4, 17) + COLON ':' (4, 17) (4, 18) + NAME 'pass' (4, 19) (4, 23) + NEWLINE '' (4, 23) (4, 23) + NAME 'await' (6, 2) (6, 7) + EQUAL '=' (6, 8) (6, 9) + NUMBER '2' (6, 10) (6, 11) + DEDENT '' (6, -1) (6, -1) + """) + + def test_unicode(self): + + self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\ + NAME 'Örter' (1, 0) (1, 5) + EQUAL '=' (1, 6) (1, 7) + STRING "u'places'" (1, 8) (1, 17) + NEWLINE '' (1, 17) (1, 17) + NAME 'grün' (2, 0) (2, 4) + EQUAL '=' (2, 5) (2, 6) + STRING "U'green'" (2, 7) (2, 15) + """) + + @unittest.expectedFailure # TODO: RUSTPYTHON + def test_invalid_syntax(self): + def get_tokens(string): + the_string = StringIO(string) + return list(tokenize._generate_tokens_from_c_tokenizer(the_string.readline)) + + for case in [ + "(1+2]", + "(1+2}", + "{1+2]", + "1_", + "1.2_", + "1e2_", + "1e+", + + "\xa0", + "€", + "0b12", + "0b1_2", + "0b2", + "0b1_", + "0b", + "0o18", + "0o1_8", + "0o8", + "0o1_", + "0o", + "0x1_", + "0x", + "1_", + "012", + "1.2_", + "1e2_", + "1e+", + "'sdfsdf", + "'''sdfsdf''", + "("*1000+"a"+")"*1000, + "]", + """\ + f'__{ + x:d + }__'""", + " a\n\x00", + ]: + with self.subTest(case=case): + self.assertRaises(tokenize.TokenError, get_tokens, case) + + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: IndentationError not raised by + @support.skip_wasi_stack_overflow() + def test_max_indent(self): + MAXINDENT = 100 + + def generate_source(indents): + source = ''.join((' ' * x) + 'if True:\n' for x in range(indents)) + source += ' ' * indents + 'pass\n' + return source + + valid = generate_source(MAXINDENT - 1) + the_input = StringIO(valid) + tokens = list(tokenize._generate_tokens_from_c_tokenizer(the_input.readline)) + self.assertEqual(tokens[-2].type, tokenize.DEDENT) + self.assertEqual(tokens[-1].type, tokenize.ENDMARKER) + compile(valid, "", "exec") + + invalid = generate_source(MAXINDENT) + the_input = StringIO(invalid) + self.assertRaises(IndentationError, lambda: list(tokenize._generate_tokens_from_c_tokenizer(the_input.readline))) + self.assertRaises( + IndentationError, compile, invalid, "", "exec" + ) + + @unittest.expectedFailure # TODO: RUSTPYTHON; (0, '')] + def test_continuation_lines_indentation(self): + def get_tokens(string): + the_string = StringIO(string) + return [(kind, string) for (kind, string, *_) + in tokenize._generate_tokens_from_c_tokenizer(the_string.readline)] + + code = dedent(""" + def fib(n): + \\ + '''Print a Fibonacci series up to n.''' + \\ + a, b = 0, 1 + """) + + self.check_tokenize(code, """\ + NAME 'def' (2, 0) (2, 3) + NAME 'fib' (2, 4) (2, 7) + LPAR '(' (2, 7) (2, 8) + NAME 'n' (2, 8) (2, 9) + RPAR ')' (2, 9) (2, 10) + COLON ':' (2, 10) (2, 11) + NEWLINE '' (2, 11) (2, 11) + INDENT '' (4, -1) (4, -1) + STRING "'''Print a Fibonacci series up to n.'''" (4, 0) (4, 39) + NEWLINE '' (4, 39) (4, 39) + NAME 'a' (6, 0) (6, 1) + COMMA ',' (6, 1) (6, 2) + NAME 'b' (6, 3) (6, 4) + EQUAL '=' (6, 5) (6, 6) + NUMBER '0' (6, 7) (6, 8) + COMMA ',' (6, 8) (6, 9) + NUMBER '1' (6, 10) (6, 11) + NEWLINE '' (6, 11) (6, 11) + DEDENT '' (6, -1) (6, -1) + """) + + code_no_cont = dedent(""" + def fib(n): + '''Print a Fibonacci series up to n.''' + a, b = 0, 1 + """) + + self.assertEqual(get_tokens(code), get_tokens(code_no_cont)) + + code = dedent(""" + pass + \\ + + pass + """) + + self.check_tokenize(code, """\ + NAME 'pass' (2, 0) (2, 4) + NEWLINE '' (2, 4) (2, 4) + NAME 'pass' (5, 0) (5, 4) + NEWLINE '' (5, 4) (5, 4) + """) + + code_no_cont = dedent(""" + pass + pass + """) + + self.assertEqual(get_tokens(code), get_tokens(code_no_cont)) + + code = dedent(""" + if x: + y = 1 + \\ + \\ + \\ + \\ + foo = 1 + """) + + self.check_tokenize(code, """\ + NAME 'if' (2, 0) (2, 2) + NAME 'x' (2, 3) (2, 4) + COLON ':' (2, 4) (2, 5) + NEWLINE '' (2, 5) (2, 5) + INDENT '' (3, -1) (3, -1) + NAME 'y' (3, 4) (3, 5) + EQUAL '=' (3, 6) (3, 7) + NUMBER '1' (3, 8) (3, 9) + NEWLINE '' (3, 9) (3, 9) + NAME 'foo' (8, 4) (8, 7) + EQUAL '=' (8, 8) (8, 9) + NUMBER '1' (8, 10) (8, 11) + NEWLINE '' (8, 11) (8, 11) + DEDENT '' (8, -1) (8, -1) + """) + + code_no_cont = dedent(""" + if x: + y = 1 + foo = 1 + """) + + self.assertEqual(get_tokens(code), get_tokens(code_no_cont)) + + +class CTokenizerBufferTests(unittest.TestCase): + def test_newline_at_the_end_of_buffer(self): + # See issue 99581: Make sure that if we need to add a new line at the + # end of the buffer, we have enough space in the buffer, specially when + # the current line is as long as the buffer space available. + test_script = f"""\ + #coding: latin-1 + #{"a"*10000} + #{"a"*10002}""" + with os_helper.temp_dir() as temp_dir: + file_name = make_script(temp_dir, 'foo', test_script) + run_test_script(file_name) + + +class CommandLineTest(unittest.TestCase): + def setUp(self): + self.filename = tempfile.mktemp() + self.addCleanup(os_helper.unlink, self.filename) + + @staticmethod + def text_normalize(string): + """Dedent *string* and strip it from its surrounding whitespaces. + + This method is used by the other utility functions so that any + string to write or to match against can be freely indented. + """ + return re.sub(r'\s+', ' ', string).strip() + + def set_source(self, content): + with open(self.filename, 'w') as fp: + fp.write(content) + + def invoke_tokenize(self, *flags): + output = StringIO() + with contextlib.redirect_stdout(output): + tokenize._main(args=[*flags, self.filename]) + return self.text_normalize(output.getvalue()) + + def check_output(self, source, expect, *flags): + with self.subTest(source=source, flags=flags): + self.set_source(source) + res = self.invoke_tokenize(*flags) + expect = self.text_normalize(expect) + self.assertListEqual(res.splitlines(), expect.splitlines()) + + def test_invocation(self): + # test various combinations of parameters + base_flags = ('-e', '--exact') + + self.set_source(''' + def f(): + print(x) + return None + ''') + + for flag in base_flags: + with self.subTest(args=flag): + _ = self.invoke_tokenize(flag) + + with self.assertRaises(SystemExit): + # suppress argparse error message + with contextlib.redirect_stderr(StringIO()): + _ = self.invoke_tokenize('--unknown') + + def test_without_flag(self): + # test 'python -m tokenize source.py' + source = 'a = 1' + expect = ''' + 0,0-0,0: ENCODING 'utf-8' + 1,0-1,1: NAME 'a' + 1,2-1,3: OP '=' + 1,4-1,5: NUMBER '1' + 1,5-1,6: NEWLINE '' + 2,0-2,0: ENDMARKER '' + ''' + self.check_output(source, expect) + + def test_exact_flag(self): + # test 'python -m tokenize -e/--exact source.py' + source = 'a = 1' + expect = ''' + 0,0-0,0: ENCODING 'utf-8' + 1,0-1,1: NAME 'a' + 1,2-1,3: EQUAL '=' + 1,4-1,5: NUMBER '1' + 1,5-1,6: NEWLINE '' + 2,0-2,0: ENDMARKER '' + ''' + for flag in ['-e', '--exact']: + self.check_output(source, expect, flag) + + +class StringPrefixTest(unittest.TestCase): + @staticmethod + def determine_valid_prefixes(): + # Try all lengths until we find a length that has zero valid + # prefixes. This will miss the case where for example there + # are no valid 3 character prefixes, but there are valid 4 + # character prefixes. That seems unlikely. + + single_char_valid_prefixes = set() + + # Find all of the single character string prefixes. Just get + # the lowercase version, we'll deal with combinations of upper + # and lower case later. I'm using this logic just in case + # some uppercase-only prefix is added. + for letter in itertools.chain(string.ascii_lowercase, string.ascii_uppercase): + try: + eval(f'{letter}""') + single_char_valid_prefixes.add(letter.lower()) + except SyntaxError: + pass + + # This logic assumes that all combinations of valid prefixes only use + # the characters that are valid single character prefixes. That seems + # like a valid assumption, but if it ever changes this will need + # adjusting. + valid_prefixes = set() + for length in itertools.count(): + num_at_this_length = 0 + for prefix in ( + "".join(l) + for l in itertools.combinations(single_char_valid_prefixes, length) + ): + for t in itertools.permutations(prefix): + for u in itertools.product(*[(c, c.upper()) for c in t]): + p = "".join(u) + if p == "not": + # 'not' can never be a string prefix, + # because it's a valid expression: not "" + continue + try: + eval(f'{p}""') + + # No syntax error, so p is a valid string + # prefix. + + valid_prefixes.add(p) + num_at_this_length += 1 + except SyntaxError: + pass + if num_at_this_length == 0: + return valid_prefixes + + + def test_prefixes(self): + # Get the list of defined string prefixes. I don't see an + # obvious documented way of doing this, but probably the best + # thing is to split apart tokenize.StringPrefix. + + # Make sure StringPrefix begins and ends in parens. We're + # assuming it's of the form "(a|b|ab)", if a, b, and cd are + # valid string prefixes. + self.assertEqual(tokenize.StringPrefix[0], '(') + self.assertEqual(tokenize.StringPrefix[-1], ')') + + # Then split apart everything else by '|'. + defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|')) + + # Now compute the actual allowed string prefixes and compare + # to what is defined in the tokenize module. + self.assertEqual(defined_prefixes, self.determine_valid_prefixes()) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index 2c03781bc72..5042b3c17b0 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -431,7 +431,6 @@ def test(i, format_spec, result): test(123456, "1=20", '11111111111111123456') test(123456, "*=20", '**************123456') - @unittest.expectedFailure # TODO: RUSTPYTHON; + 1234.57 @run_with_locale('LC_NUMERIC', 'en_US.UTF8', '') def test_float__format__locale(self): # test locale support for __format__ code 'n' @@ -441,7 +440,6 @@ def test_float__format__locale(self): self.assertEqual(locale.format_string('%g', x, grouping=True), format(x, 'n')) self.assertEqual(locale.format_string('%.10g', x, grouping=True), format(x, '.10n')) - @unittest.expectedFailure # TODO: RUSTPYTHON; + 123456789012345678901234567890 @run_with_locale('LC_NUMERIC', 'en_US.UTF8', '') def test_int__format__locale(self): # test locale support for __format__ code 'n' for integers diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py index e04afbb1af5..cd0bdacaaf0 100644 --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -337,7 +337,6 @@ def __bytes__(self): self.assertIn("__bytes__", dir(weakref.proxy(instance))) self.assertEqual(bytes(weakref.proxy(instance)), b"bytes") - @unittest.expectedFailure # TODO: RUSTPYTHON def test_proxy_index(self): class C: def __index__(self): @@ -346,7 +345,6 @@ def __index__(self): p = weakref.proxy(o) self.assertEqual(operator.index(p), 10) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_proxy_div(self): class C: def __floordiv__(self, other): @@ -359,7 +357,6 @@ def __ifloordiv__(self, other): p //= 5 self.assertEqual(p, 21) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_proxy_matmul(self): class C: def __matmul__(self, other): @@ -504,7 +501,6 @@ def __iter__(self): # Calls proxy.__next__ self.assertEqual(list(weak_it), [4, 5, 6]) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_proxy_bad_next(self): # bpo-44720: PyIter_Next() shouldn't be called if the reference # isn't an iterator. @@ -594,7 +590,6 @@ def test_getweakrefs(self): self.assertEqual(weakref.getweakrefs(1), [], "list of refs does not match for int") - @unittest.expectedFailure # TODO: RUSTPYTHON def test_newstyle_number_ops(self): class F(float): pass @@ -1862,7 +1857,6 @@ def test_weak_valued_delitem(self): self.assertEqual(len(d), 1) self.assertEqual(list(d.items()), [('something else', o2)]) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_weak_keyed_bad_delitem(self): d = weakref.WeakKeyDictionary() o = Object('1') diff --git a/Lib/test/test_yield_from.py b/Lib/test/test_yield_from.py index e0e3db0839e..7028a606217 100644 --- a/Lib/test/test_yield_from.py +++ b/Lib/test/test_yield_from.py @@ -538,7 +538,7 @@ def g(): "finishing g", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_broken_getattr_handling(self): """ Test subiterator with a broken getattr implementation @@ -882,7 +882,7 @@ def g(): yield from () self.assertRaises(StopIteration, next, g()) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_delegating_generators_claim_to_be_running(self): # Check with basic iteration def one(): @@ -909,7 +909,7 @@ def two(): pass self.assertEqual(res, [0, 1, 2, 3]) - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: Lists differ: [0, 1, 2] != [0, 1, 2, 3] + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: Lists differ: [0, 1, 2] != [0, 1, 2, 3] def test_delegating_generators_claim_to_be_running_with_throw(self): # Check with throw class MyErr(Exception): @@ -1071,7 +1071,7 @@ def assert_generator_raised_stop_iteration(self): def assert_generator_ignored_generator_exit(self): return self.assertRaisesRegex(RuntimeError, r"^generator ignored GeneratorExit$") - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_close_and_throw_work(self): yielded_first = object() @@ -1209,7 +1209,7 @@ def outer(): self.assertIsNone(caught.exception.__context__.__context__) self.assert_stop_iteration(g) - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: RuntimeError not raised + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: RuntimeError not raised def test_close_and_throw_raise_stop_iteration(self): yielded_first = object() @@ -1449,7 +1449,7 @@ def outer(): self.assertIsNone(caught.exception.__context__.__context__) self.assert_stop_iteration(g) - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: None is not StopIteration() + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: None is not StopIteration() def test_close_and_throw_yield(self): yielded_first = object() @@ -1531,8 +1531,9 @@ def inner(): try: yield yielded_first yield yielded_second - finally: - return returned + except: + pass + return returned def outer(): return (yield from inner()) @@ -1587,6 +1588,19 @@ def outer(): self.assertIsNone(caught.exception.__context__) self.assert_stop_iteration(g) + def test_throws_in_iter(self): + # See GH-126366: NULL pointer dereference if __iter__ + # threw an exception. + class Silly: + def __iter__(self): + raise RuntimeError("nobody expects the spanish inquisition") + + def my_generator(): + yield from Silly() + + with self.assertRaisesRegex(RuntimeError, "nobody expects the spanish inquisition"): + next(iter(my_generator())) + if __name__ == '__main__': unittest.main() diff --git a/Lib/tokenize.py b/Lib/tokenize.py index d72968e4250..1f31258ce36 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -24,10 +24,7 @@ __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, ' 'Skip Montanaro, Raymond Hettinger, Trent Nelson, ' 'Michael Foord') -try: - from builtins import open as _builtin_open -except ImportError: - pass +from builtins import open as _builtin_open from codecs import lookup, BOM_UTF8 import collections import functools @@ -37,13 +34,14 @@ import sys from token import * from token import EXACT_TOKEN_TYPES +import _tokenize -cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) +cookie_re = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) import token __all__ = token.__all__ + ["tokenize", "generate_tokens", "detect_encoding", - "untokenize", "TokenInfo"] + "untokenize", "TokenInfo", "open", "TokenError"] del token class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')): @@ -88,7 +86,7 @@ def _all_string_prefixes(): # The valid string prefixes. Only contain the lower case versions, # and don't contain any permutations (include 'fr', but not # 'rf'). The various permutations will be generated. - _valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr'] + _valid_string_prefixes = ['b', 'r', 'u', 'f', 't', 'br', 'fr', 'tr'] # if we add binary f-strings, add: ['fb', 'fbr'] result = {''} for prefix in _valid_string_prefixes: @@ -134,7 +132,7 @@ def _compile(expr): group("'", r'\\\r?\n'), StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r'\\\r?\n')) -PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple) +PseudoExtras = group(r'\\\r?\n|\z', Comment, Triple) PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) # For a given string prefix plus quotes, endpats maps it to a regex @@ -146,6 +144,7 @@ def _compile(expr): endpats[_prefix + '"'] = Double endpats[_prefix + "'''"] = Single3 endpats[_prefix + '"""'] = Double3 +del _prefix # A set of all of the single and triple quoted string prefixes, # including the opening quotes. @@ -156,13 +155,12 @@ def _compile(expr): single_quoted.add(u) for u in (t + '"""', t + "'''"): triple_quoted.add(u) +del t, u tabsize = 8 class TokenError(Exception): pass -class StopTokenizing(Exception): pass - class Untokenizer: @@ -170,6 +168,8 @@ def __init__(self): self.tokens = [] self.prev_row = 1 self.prev_col = 0 + self.prev_type = None + self.prev_line = "" self.encoding = None def add_whitespace(self, start): @@ -177,14 +177,51 @@ def add_whitespace(self, start): if row < self.prev_row or row == self.prev_row and col < self.prev_col: raise ValueError("start ({},{}) precedes previous end ({},{})" .format(row, col, self.prev_row, self.prev_col)) - row_offset = row - self.prev_row - if row_offset: - self.tokens.append("\\\n" * row_offset) - self.prev_col = 0 + self.add_backslash_continuation(start) col_offset = col - self.prev_col if col_offset: self.tokens.append(" " * col_offset) + def add_backslash_continuation(self, start): + """Add backslash continuation characters if the row has increased + without encountering a newline token. + + This also inserts the correct amount of whitespace before the backslash. + """ + row = start[0] + row_offset = row - self.prev_row + if row_offset == 0: + return + + newline = '\r\n' if self.prev_line.endswith('\r\n') else '\n' + line = self.prev_line.rstrip('\\\r\n') + ws = ''.join(_itertools.takewhile(str.isspace, reversed(line))) + self.tokens.append(ws + f"\\{newline}" * row_offset) + self.prev_col = 0 + + def escape_brackets(self, token): + characters = [] + consume_until_next_bracket = False + for character in token: + if character == "}": + if consume_until_next_bracket: + consume_until_next_bracket = False + else: + characters.append(character) + if character == "{": + n_backslashes = sum( + 1 for char in _itertools.takewhile( + "\\".__eq__, + characters[-2::-1] + ) + ) + if n_backslashes % 2 == 0 or characters[-1] != "N": + characters.append(character) + else: + consume_until_next_bracket = True + characters.append(character) + return "".join(characters) + def untokenize(self, iterable): it = iter(iterable) indents = [] @@ -214,12 +251,22 @@ def untokenize(self, iterable): self.tokens.append(indent) self.prev_col = len(indent) startline = False + elif tok_type in {FSTRING_MIDDLE, TSTRING_MIDDLE}: + if '{' in token or '}' in token: + token = self.escape_brackets(token) + last_line = token.splitlines()[-1] + end_line, end_col = end + extra_chars = last_line.count("{{") + last_line.count("}}") + end = (end_line, end_col + extra_chars) + self.add_whitespace(start) self.tokens.append(token) self.prev_row, self.prev_col = end if tok_type in (NEWLINE, NL): self.prev_row += 1 self.prev_col = 0 + self.prev_type = tok_type + self.prev_line = line return "".join(self.tokens) def compat(self, token, iterable): @@ -227,6 +274,7 @@ def compat(self, token, iterable): toks_append = self.tokens.append startline = token[0] in (NEWLINE, NL) prevstring = False + in_fstring_or_tstring = 0 for tok in _itertools.chain([token], iterable): toknum, tokval = tok[:2] @@ -245,6 +293,10 @@ def compat(self, token, iterable): else: prevstring = False + if toknum in {FSTRING_START, TSTRING_START}: + in_fstring_or_tstring += 1 + elif toknum in {FSTRING_END, TSTRING_END}: + in_fstring_or_tstring -= 1 if toknum == INDENT: indents.append(tokval) continue @@ -256,7 +308,19 @@ def compat(self, token, iterable): elif startline and indents: toks_append(indents[-1]) startline = False + elif toknum in {FSTRING_MIDDLE, TSTRING_MIDDLE}: + tokval = self.escape_brackets(tokval) + + # Insert a space between two consecutive brackets if we are in an f-string or t-string + if tokval in {"{", "}"} and self.tokens and self.tokens[-1] == tokval and in_fstring_or_tstring: + tokval = ' ' + tokval + + # Insert a space between two consecutive f-strings + if toknum in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END): + self.tokens.append(" ") + toks_append(tokval) + self.prev_type = toknum def untokenize(iterable): @@ -268,16 +332,10 @@ def untokenize(iterable): with at least two elements, a token number and token value. If only two tokens are passed, the resulting output is poor. - Round-trip invariant for full input: - Untokenized source will match input source exactly - - Round-trip invariant for limited input: - # Output bytes will tokenize back to the input - t1 = [tok[:2] for tok in tokenize(f.readline)] - newcode = untokenize(t1) - readline = BytesIO(newcode).readline - t2 = [tok[:2] for tok in tokenize(readline)] - assert t1 == t2 + The result is guaranteed to tokenize back to match the input so + that the conversion is lossless and round-trips are assured. + The guarantee applies only to the token type and token string as + the spacing between tokens (column positions) may change. """ ut = Untokenizer() out = ut.untokenize(iterable) @@ -287,7 +345,7 @@ def untokenize(iterable): def _get_normal_name(orig_enc): - """Imitates get_normal_name in tokenizer.c.""" + """Imitates get_normal_name in Parser/tokenizer/helpers.c.""" # Only care about the first 12 characters. enc = orig_enc[:12].lower().replace("_", "-") if enc == "utf-8" or enc.startswith("utf-8-"): @@ -327,22 +385,23 @@ def read_or_stop(): except StopIteration: return b'' - def find_cookie(line): + def check(line, encoding): + # Check if the line matches the encoding. + if 0 in line: + raise SyntaxError("source code cannot contain null bytes") try: - # Decode as UTF-8. Either the line is an encoding declaration, - # in which case it should be pure ASCII, or it must be UTF-8 - # per default encoding. - line_string = line.decode('utf-8') + line.decode(encoding) except UnicodeDecodeError: msg = "invalid or missing encoding declaration" if filename is not None: msg = '{} for {!r}'.format(msg, filename) raise SyntaxError(msg) - match = cookie_re.match(line_string) + def find_cookie(line): + match = cookie_re.match(line) if not match: return None - encoding = _get_normal_name(match.group(1)) + encoding = _get_normal_name(match.group(1).decode()) try: codec = lookup(encoding) except LookupError: @@ -375,18 +434,23 @@ def find_cookie(line): encoding = find_cookie(first) if encoding: + check(first, encoding) return encoding, [first] if not blank_re.match(first): + check(first, default) return default, [first] second = read_or_stop() if not second: + check(first, default) return default, [first] encoding = find_cookie(second) if encoding: + check(first + second, encoding) return encoding, [first, second] + check(first + second, default) return default, [first, second] @@ -405,7 +469,6 @@ def open(filename): buffer.close() raise - def tokenize(readline): """ The tokenize() generator requires one argument, readline, which @@ -426,193 +489,13 @@ def tokenize(readline): which tells you which encoding was used to decode the bytes stream. """ encoding, consumed = detect_encoding(readline) - empty = _itertools.repeat(b"") - rl_gen = _itertools.chain(consumed, iter(readline, b""), empty) - return _tokenize(rl_gen.__next__, encoding) - - -def _tokenize(readline, encoding): - lnum = parenlev = continued = 0 - numchars = '0123456789' - contstr, needcont = '', 0 - contline = None - indents = [0] - + rl_gen = _itertools.chain(consumed, iter(readline, b"")) if encoding is not None: if encoding == "utf-8-sig": # BOM will already have been stripped. encoding = "utf-8" yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '') - last_line = b'' - line = b'' - while True: # loop over lines in stream - try: - # We capture the value of the line variable here because - # readline uses the empty string '' to signal end of input, - # hence `line` itself will always be overwritten at the end - # of this loop. - last_line = line - line = readline() - except StopIteration: - line = b'' - - if encoding is not None: - line = line.decode(encoding) - lnum += 1 - pos, max = 0, len(line) - - if contstr: # continued string - if not line: - raise TokenError("EOF in multi-line string", strstart) - endmatch = endprog.match(line) - if endmatch: - pos = end = endmatch.end(0) - yield TokenInfo(STRING, contstr + line[:end], - strstart, (lnum, end), contline + line) - contstr, needcont = '', 0 - contline = None - elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n': - yield TokenInfo(ERRORTOKEN, contstr + line, - strstart, (lnum, len(line)), contline) - contstr = '' - contline = None - continue - else: - contstr = contstr + line - contline = contline + line - continue - - elif parenlev == 0 and not continued: # new statement - if not line: break - column = 0 - while pos < max: # measure leading whitespace - if line[pos] == ' ': - column += 1 - elif line[pos] == '\t': - column = (column//tabsize + 1)*tabsize - elif line[pos] == '\f': - column = 0 - else: - break - pos += 1 - if pos == max: - break - - if line[pos] in '#\r\n': # skip comments or blank lines - if line[pos] == '#': - comment_token = line[pos:].rstrip('\r\n') - yield TokenInfo(COMMENT, comment_token, - (lnum, pos), (lnum, pos + len(comment_token)), line) - pos += len(comment_token) - - yield TokenInfo(NL, line[pos:], - (lnum, pos), (lnum, len(line)), line) - continue - - if column > indents[-1]: # count indents or dedents - indents.append(column) - yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line) - while column < indents[-1]: - if column not in indents: - raise IndentationError( - "unindent does not match any outer indentation level", - ("", lnum, pos, line)) - indents = indents[:-1] - - yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line) - - else: # continued statement - if not line: - raise TokenError("EOF in multi-line statement", (lnum, 0)) - continued = 0 - - while pos < max: - pseudomatch = _compile(PseudoToken).match(line, pos) - if pseudomatch: # scan for tokens - start, end = pseudomatch.span(1) - spos, epos, pos = (lnum, start), (lnum, end), end - if start == end: - continue - token, initial = line[start:end], line[start] - - if (initial in numchars or # ordinary number - (initial == '.' and token != '.' and token != '...')): - yield TokenInfo(NUMBER, token, spos, epos, line) - elif initial in '\r\n': - if parenlev > 0: - yield TokenInfo(NL, token, spos, epos, line) - else: - yield TokenInfo(NEWLINE, token, spos, epos, line) - - elif initial == '#': - assert not token.endswith("\n") - yield TokenInfo(COMMENT, token, spos, epos, line) - - elif token in triple_quoted: - endprog = _compile(endpats[token]) - endmatch = endprog.match(line, pos) - if endmatch: # all on one line - pos = endmatch.end(0) - token = line[start:pos] - yield TokenInfo(STRING, token, spos, (lnum, pos), line) - else: - strstart = (lnum, start) # multiple lines - contstr = line[start:] - contline = line - break - - # Check up to the first 3 chars of the token to see if - # they're in the single_quoted set. If so, they start - # a string. - # We're using the first 3, because we're looking for - # "rb'" (for example) at the start of the token. If - # we switch to longer prefixes, this needs to be - # adjusted. - # Note that initial == token[:1]. - # Also note that single quote checking must come after - # triple quote checking (above). - elif (initial in single_quoted or - token[:2] in single_quoted or - token[:3] in single_quoted): - if token[-1] == '\n': # continued string - strstart = (lnum, start) - # Again, using the first 3 chars of the - # token. This is looking for the matching end - # regex for the correct type of quote - # character. So it's really looking for - # endpats["'"] or endpats['"'], by trying to - # skip string prefix characters, if any. - endprog = _compile(endpats.get(initial) or - endpats.get(token[1]) or - endpats.get(token[2])) - contstr, needcont = line[start:], 1 - contline = line - break - else: # ordinary string - yield TokenInfo(STRING, token, spos, epos, line) - - elif initial.isidentifier(): # ordinary name - yield TokenInfo(NAME, token, spos, epos, line) - elif initial == '\\': # continued stmt - continued = 1 - else: - if initial in '([{': - parenlev += 1 - elif initial in ')]}': - parenlev -= 1 - yield TokenInfo(OP, token, spos, epos, line) - else: - yield TokenInfo(ERRORTOKEN, line[pos], - (lnum, pos), (lnum, pos+1), line) - pos += 1 - - # Add an implicit NEWLINE if the input doesn't end in one - if last_line and last_line[-1] not in '\r\n' and not last_line.strip().startswith("#"): - yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '') - for indent in indents[1:]: # pop remaining indent levels - yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '') - yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '') - + yield from _generate_tokens_from_c_tokenizer(rl_gen.__next__, encoding, extra_tokens=True) def generate_tokens(readline): """Tokenize a source reading Python code as unicode strings. @@ -620,9 +503,9 @@ def generate_tokens(readline): This has the same API as tokenize(), except that it expects the *readline* callable to return str objects instead of bytes. """ - return _tokenize(readline, None) + return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True) -def main(): +def _main(args=None): import argparse # Helper error handling routines @@ -641,13 +524,13 @@ def error(message, filename=None, location=None): sys.exit(1) # Parse the arguments and options - parser = argparse.ArgumentParser(prog='python -m tokenize') + parser = argparse.ArgumentParser(color=True) parser.add_argument(dest='filename', nargs='?', metavar='filename.py', help='the file to tokenize; defaults to stdin') parser.add_argument('-e', '--exact', dest='exact', action='store_true', help='display token names using the exact type') - args = parser.parse_args() + args = parser.parse_args(args) try: # Tokenize the input @@ -657,7 +540,9 @@ def error(message, filename=None, location=None): tokens = list(tokenize(f.readline)) else: filename = "" - tokens = _tokenize(sys.stdin.readline, None) + tokens = _generate_tokens_from_c_tokenizer( + sys.stdin.readline, extra_tokens=True) + # Output the tokenization for token in tokens: @@ -683,5 +568,31 @@ def error(message, filename=None, location=None): perror("unexpected error: %s" % err) raise +def _transform_msg(msg): + """Transform error messages from the C tokenizer into the Python tokenize + + The C tokenizer is more picky than the Python one, so we need to massage + the error messages a bit for backwards compatibility. + """ + if "unterminated triple-quoted string literal" in msg: + return "EOF in multi-line string" + return msg + +def _generate_tokens_from_c_tokenizer(source, encoding=None, extra_tokens=False): + """Tokenize a source reading Python code as unicode strings using the internal C tokenizer""" + if encoding is None: + it = _tokenize.TokenizerIter(source, extra_tokens=extra_tokens) + else: + it = _tokenize.TokenizerIter(source, encoding=encoding, extra_tokens=extra_tokens) + try: + for info in it: + yield TokenInfo._make(info) + except SyntaxError as e: + if type(e) != SyntaxError: + raise e from None + msg = _transform_msg(e.msg) + raise TokenError(msg, (e.lineno, e.offset)) from None + + if __name__ == "__main__": - main() + _main() diff --git a/crates/codegen/src/compile.rs b/crates/codegen/src/compile.rs index 6f7d8c15236..94c6642aac2 100644 --- a/crates/codegen/src/compile.rs +++ b/crates/codegen/src/compile.rs @@ -29,7 +29,7 @@ use rustpython_compiler_core::{ self, AnyInstruction, Arg as OpArgMarker, BinaryOperator, BuildSliceArgCount, CodeObject, ComparisonOperator, ConstantData, ConvertValueOparg, Instruction, IntrinsicFunction1, Invert, LoadAttr, LoadSuperAttr, OpArg, OpArgType, PseudoInstruction, SpecialMethod, - UnpackExArgs, + UnpackExArgs, oparg, }, }; use rustpython_wtf8::Wtf8Buf; @@ -715,14 +715,14 @@ impl Compiler { } /// Get the index of a local variable. - fn get_local_var_index(&mut self, name: &str) -> CompileResult { + fn get_local_var_index(&mut self, name: &str) -> CompileResult { let info = self.code_stack.last_mut().unwrap(); let idx = info .metadata .varnames .get_index_of(name) .unwrap_or_else(|| info.metadata.varnames.insert_full(name.to_owned()).0); - Ok(idx.to_u32()) + Ok(idx.to_u32().into()) } /// Get the index of a global name. @@ -1283,7 +1283,12 @@ impl Compiler { /// if format > VALUE_WITH_FAKE_GLOBALS (2): raise NotImplementedError fn emit_format_validation(&mut self) -> CompileResult<()> { // Load format parameter (first local variable, index 0) - emit!(self, Instruction::LoadFast { var_num: 0 }); + emit!( + self, + Instruction::LoadFast { + var_num: oparg::VarNum::from_u32(0) + } + ); // Load VALUE_WITH_FAKE_GLOBALS constant (2) self.emit_load_const(ConstantData::Integer { value: 2.into() }); @@ -1562,15 +1567,19 @@ impl Compiler { fn name(&mut self, name: &str) -> bytecode::NameIdx { self._name_inner(name, |i| &mut i.metadata.names) } - fn varname(&mut self, name: &str) -> CompileResult { + + fn varname(&mut self, name: &str) -> CompileResult { // Note: __debug__ checks are now handled in symboltable phase - Ok(self._name_inner(name, |i| &mut i.metadata.varnames)) + Ok(oparg::VarNum::from_u32( + self._name_inner(name, |i| &mut i.metadata.varnames), + )) } + fn _name_inner( &mut self, name: &str, cache: impl FnOnce(&mut ir::CodeInfo) -> &mut IndexSet, - ) -> bytecode::NameIdx { + ) -> u32 { let name = self.mangle(name); let cache = cache(self.current_code_info()); cache @@ -2500,7 +2509,7 @@ impl Compiler { self.compile_expression(value)?; emit!(self, Instruction::ReturnValue); let value_code = self.exit_scope(); - self.make_closure(value_code, bytecode::MakeFunctionFlags::empty())?; + self.make_closure(value_code, bytecode::MakeFunctionFlags::new())?; // Stack: [type_params_tuple, value_closure] // Swap so unpack_sequence reverse gives correct order @@ -2513,7 +2522,7 @@ impl Compiler { let code = self.exit_scope(); self.ctx = prev_ctx; - self.make_closure(code, bytecode::MakeFunctionFlags::empty())?; + self.make_closure(code, bytecode::MakeFunctionFlags::new())?; emit!(self, Instruction::PushNull); emit!(self, Instruction::Call { argc: 0 }); @@ -2552,7 +2561,7 @@ impl Compiler { let code = self.exit_scope(); self.ctx = prev_ctx; - self.make_closure(code, bytecode::MakeFunctionFlags::empty())?; + self.make_closure(code, bytecode::MakeFunctionFlags::new())?; // Stack: [name, None, closure] } @@ -2716,7 +2725,7 @@ impl Compiler { self.ctx = prev_ctx; // Create closure for lazy evaluation - self.make_closure(code, bytecode::MakeFunctionFlags::empty())?; + self.make_closure(code, bytecode::MakeFunctionFlags::new())?; Ok(()) } @@ -3640,7 +3649,7 @@ impl Compiler { &mut self, parameters: &ast::Parameters, ) -> CompileResult { - let mut funcflags = bytecode::MakeFunctionFlags::empty(); + let mut funcflags = bytecode::MakeFunctionFlags::new(); // Handle positional defaults let defaults: Vec<_> = core::iter::empty() @@ -3660,7 +3669,7 @@ impl Compiler { count: defaults.len().to_u32() } ); - funcflags |= bytecode::MakeFunctionFlags::DEFAULTS; + funcflags.insert(bytecode::MakeFunctionFlag::Defaults); } // Handle keyword-only defaults @@ -3685,7 +3694,7 @@ impl Compiler { count: kw_with_defaults.len().to_u32(), } ); - funcflags |= bytecode::MakeFunctionFlags::KW_ONLY_DEFAULTS; + funcflags.insert(bytecode::MakeFunctionFlag::KwOnlyDefaults); } Ok(funcflags) @@ -3835,7 +3844,7 @@ impl Compiler { let annotate_code = self.exit_annotation_scope(saved_ctx); // Make a closure from the code object - self.make_closure(annotate_code, bytecode::MakeFunctionFlags::empty())?; + self.make_closure(annotate_code, bytecode::MakeFunctionFlags::new())?; Ok(true) } @@ -4045,7 +4054,7 @@ impl Compiler { ); // Make a closure from the code object - self.make_closure(annotate_code, bytecode::MakeFunctionFlags::empty())?; + self.make_closure(annotate_code, bytecode::MakeFunctionFlags::new())?; // Store as __annotate_func__ for classes, __annotate__ for modules let name = if parent_scope_type == CompilerScope::Class { @@ -4083,10 +4092,10 @@ impl Compiler { if is_generic { // Count args to pass to type params scope - if funcflags.contains(bytecode::MakeFunctionFlags::DEFAULTS) { + if funcflags.contains(&bytecode::MakeFunctionFlag::Defaults) { num_typeparam_args += 1; } - if funcflags.contains(bytecode::MakeFunctionFlags::KW_ONLY_DEFAULTS) { + if funcflags.contains(&bytecode::MakeFunctionFlag::KwOnlyDefaults) { num_typeparam_args += 1; } @@ -4111,13 +4120,13 @@ impl Compiler { // Add parameter names to varnames for the type params scope // These will be passed as arguments when the closure is called let current_info = self.current_code_info(); - if funcflags.contains(bytecode::MakeFunctionFlags::DEFAULTS) { + if funcflags.contains(&bytecode::MakeFunctionFlag::Defaults) { current_info .metadata .varnames .insert(".defaults".to_owned()); } - if funcflags.contains(bytecode::MakeFunctionFlags::KW_ONLY_DEFAULTS) { + if funcflags.contains(&bytecode::MakeFunctionFlag::KwOnlyDefaults) { current_info .metadata .varnames @@ -4129,16 +4138,16 @@ impl Compiler { // Load defaults/kwdefaults with LOAD_FAST for i in 0..num_typeparam_args { - emit!(self, Instruction::LoadFast { var_num: i as u32 }); + let var_num = oparg::VarNum::from(i as u32); + emit!(self, Instruction::LoadFast { var_num }); } } // Compile annotations as closure (PEP 649) - let annotations_flag = if self.compile_annotations_closure(name, parameters, returns)? { - bytecode::MakeFunctionFlags::ANNOTATE - } else { - bytecode::MakeFunctionFlags::empty() - }; + let mut annotations_flag = bytecode::MakeFunctionFlags::new(); + if self.compile_annotations_closure(name, parameters, returns)? { + annotations_flag.insert(bytecode::MakeFunctionFlag::Annotate); + } // Compile function body let final_funcflags = funcflags | annotations_flag; @@ -4169,7 +4178,7 @@ impl Compiler { self.ctx = saved_ctx; // Make closure for type params code - self.make_closure(type_params_code, bytecode::MakeFunctionFlags::empty())?; + self.make_closure(type_params_code, bytecode::MakeFunctionFlags::new())?; // Call the type params closure with defaults/kwdefaults as arguments. // Call protocol: [callable, self_or_null, arg1, ..., argN] @@ -4337,57 +4346,57 @@ impl Compiler { emit!( self, Instruction::SetFunctionAttribute { - flag: bytecode::MakeFunctionFlags::CLOSURE + flag: bytecode::MakeFunctionFlag::Closure } ); } // Set annotations if present - if flags.contains(bytecode::MakeFunctionFlags::ANNOTATIONS) { + if flags.contains(&bytecode::MakeFunctionFlag::Annotations) { emit!( self, Instruction::SetFunctionAttribute { - flag: bytecode::MakeFunctionFlags::ANNOTATIONS + flag: bytecode::MakeFunctionFlag::Annotations } ); } // Set __annotate__ closure if present (PEP 649) - if flags.contains(bytecode::MakeFunctionFlags::ANNOTATE) { + if flags.contains(&bytecode::MakeFunctionFlag::Annotate) { emit!( self, Instruction::SetFunctionAttribute { - flag: bytecode::MakeFunctionFlags::ANNOTATE + flag: bytecode::MakeFunctionFlag::Annotate } ); } // Set kwdefaults if present - if flags.contains(bytecode::MakeFunctionFlags::KW_ONLY_DEFAULTS) { + if flags.contains(&bytecode::MakeFunctionFlag::KwOnlyDefaults) { emit!( self, Instruction::SetFunctionAttribute { - flag: bytecode::MakeFunctionFlags::KW_ONLY_DEFAULTS + flag: bytecode::MakeFunctionFlag::KwOnlyDefaults } ); } // Set defaults if present - if flags.contains(bytecode::MakeFunctionFlags::DEFAULTS) { + if flags.contains(&bytecode::MakeFunctionFlag::Defaults) { emit!( self, Instruction::SetFunctionAttribute { - flag: bytecode::MakeFunctionFlags::DEFAULTS + flag: bytecode::MakeFunctionFlag::Defaults } ); } // Set type_params if present - if flags.contains(bytecode::MakeFunctionFlags::TYPE_PARAMS) { + if flags.contains(&bytecode::MakeFunctionFlag::TypeParams) { emit!( self, Instruction::SetFunctionAttribute { - flag: bytecode::MakeFunctionFlags::TYPE_PARAMS + flag: bytecode::MakeFunctionFlag::TypeParams } ); } @@ -4679,14 +4688,14 @@ impl Compiler { emit!(self, Instruction::PushNull); // Set up the class function with type params - let mut func_flags = bytecode::MakeFunctionFlags::empty(); + let mut func_flags = bytecode::MakeFunctionFlags::new(); emit!( self, Instruction::LoadName { namei: dot_type_params } ); - func_flags |= bytecode::MakeFunctionFlags::TYPE_PARAMS; + func_flags.insert(bytecode::MakeFunctionFlag::TypeParams); // Create class function with closure self.make_closure(class_code, func_flags)?; @@ -4809,7 +4818,7 @@ impl Compiler { self.ctx = saved_ctx; // Execute the type params function - self.make_closure(type_params_code, bytecode::MakeFunctionFlags::empty())?; + self.make_closure(type_params_code, bytecode::MakeFunctionFlags::new())?; emit!(self, Instruction::PushNull); emit!(self, Instruction::Call { argc: 0 }); } else { @@ -4818,7 +4827,7 @@ impl Compiler { emit!(self, Instruction::PushNull); // Create class function with closure - self.make_closure(class_code, bytecode::MakeFunctionFlags::empty())?; + self.make_closure(class_code, bytecode::MakeFunctionFlags::new())?; self.emit_load_const(ConstantData::Str { value: name.into() }); if let Some(arguments) = arguments { @@ -7086,12 +7095,12 @@ impl Compiler { } self.enter_function(&name, params)?; - let mut func_flags = bytecode::MakeFunctionFlags::empty(); + let mut func_flags = bytecode::MakeFunctionFlags::new(); if have_defaults { - func_flags |= bytecode::MakeFunctionFlags::DEFAULTS; + func_flags.insert(bytecode::MakeFunctionFlag::Defaults); } if have_kwdefaults { - func_flags |= bytecode::MakeFunctionFlags::KW_ONLY_DEFAULTS; + func_flags.insert(bytecode::MakeFunctionFlag::KwOnlyDefaults); } // Set qualname for lambda @@ -7775,7 +7784,7 @@ impl Compiler { self.ctx = prev_ctx; // Create comprehension function with closure - self.make_closure(code, bytecode::MakeFunctionFlags::empty())?; + self.make_closure(code, bytecode::MakeFunctionFlags::new())?; emit!(self, Instruction::PushNull); // Evaluate iterated item: @@ -8075,9 +8084,9 @@ impl Compiler { // fn block_done() - fn arg_constant(&mut self, constant: ConstantData) -> u32 { + fn arg_constant(&mut self, constant: ConstantData) -> oparg::ConstIdx { let info = self.current_code_info(); - info.metadata.consts.insert_full(constant).0.to_u32() + info.metadata.consts.insert_full(constant).0.to_u32().into() } fn emit_load_const(&mut self, constant: ConstantData) { @@ -9069,6 +9078,18 @@ mod tests { fn compile_exec(source: &str) -> CodeObject { let opts = CompileOpts::default(); + compile_exec_with_options(source, opts) + } + + fn compile_exec_optimized(source: &str) -> CodeObject { + let opts = CompileOpts { + optimize: 1, + ..CompileOpts::default() + }; + compile_exec_with_options(source, opts) + } + + fn compile_exec_with_options(source: &str, opts: CompileOpts) -> CodeObject { let source_file = SourceFileBuilder::new("source_path", source).finish(); let parsed = ruff_python_parser::parse( source_file.source_text(), @@ -9137,6 +9158,15 @@ x = Test() and False or False )); } + #[test] + fn test_const_bool_not_op() { + assert_dis_snapshot!(compile_exec_optimized( + "\ +x = not True +" + )); + } + #[test] fn test_nested_double_async_with() { assert_dis_snapshot!(compile_exec( diff --git a/crates/codegen/src/ir.rs b/crates/codegen/src/ir.rs index 43a2dfa5107..67c60dd561d 100644 --- a/crates/codegen/src/ir.rs +++ b/crates/codegen/src/ir.rs @@ -10,7 +10,7 @@ use rustpython_compiler_core::{ bytecode::{ AnyInstruction, Arg, CodeFlags, CodeObject, CodeUnit, CodeUnits, ConstantData, ExceptionTableEntry, InstrDisplayContext, Instruction, InstructionMetadata, Label, OpArg, - PseudoInstruction, PyCodeLocationInfoKind, encode_exception_table, + PseudoInstruction, PyCodeLocationInfoKind, encode_exception_table, oparg, }, varint::{write_signed_varint, write_varint}, }; @@ -342,7 +342,7 @@ impl CodeInfo { } } - let mut block_to_offset = vec![Label(0); blocks.len()]; + let mut block_to_offset = vec![Label::new(0); blocks.len()]; // block_to_index: maps block idx to instruction index (for exception table) // This is the index into the final instructions array, including EXTENDED_ARG and CACHE let mut block_to_index = vec![0u32; blocks.len()]; @@ -351,7 +351,7 @@ impl CodeInfo { loop { let mut num_instructions = 0; for (idx, block) in iter_blocks(&blocks) { - block_to_offset[idx.idx()] = Label(num_instructions as u32); + block_to_offset[idx.idx()] = Label::new(num_instructions as u32); // block_to_index uses the same value as block_to_offset but as u32 // because lasti in frame.rs is the index into instructions array // and instructions array index == byte offset (each instruction is 1 CodeUnit) @@ -369,7 +369,7 @@ impl CodeInfo { while next_block != BlockIdx::NULL { let block = &mut blocks[next_block]; // Track current instruction offset for jump direction resolution - let mut current_offset = block_to_offset[next_block.idx()].0; + let mut current_offset = block_to_offset[next_block.idx()].as_u32(); for info in &mut block.instructions { let target = info.target; let mut op = info.instr.expect_real(); @@ -380,7 +380,7 @@ impl CodeInfo { let offset_after = current_offset + old_arg_size as u32 + old_cache_entries; if target != BlockIdx::NULL { - let target_offset = block_to_offset[target.idx()].0; + let target_offset = block_to_offset[target.idx()].as_u32(); // Direction must be based on concrete instruction offsets. // Empty blocks can share offsets, so block-order-based resolution // may classify some jumps incorrectly. @@ -693,6 +693,33 @@ impl CodeInfo { None } } + (Instruction::LoadConst { consti }, Instruction::ToBool) => { + let consti = consti.get(curr.arg); + let constant = &self.metadata.consts[consti.as_usize()]; + if let ConstantData::Boolean { .. } = constant { + Some((curr_instr, OpArg::from(consti.as_u32()))) + } else { + None + } + } + (Instruction::LoadConst { consti }, Instruction::UnaryNot) => { + let constant = &self.metadata.consts[consti.get(curr.arg).as_usize()]; + match constant { + ConstantData::Boolean { value } => { + let (const_idx, _) = self + .metadata + .consts + .insert_full(ConstantData::Boolean { value: !value }); + Some(( + (Instruction::LoadConst { + consti: Arg::marker(), + }), + OpArg::new(const_idx as u32), + )) + } + _ => None, + } + } _ => None, } }; @@ -1073,15 +1100,19 @@ impl CodeInfo { impl InstrDisplayContext for CodeInfo { type Constant = ConstantData; - fn get_constant(&self, i: usize) -> &ConstantData { - &self.metadata.consts[i] + + fn get_constant(&self, consti: oparg::ConstIdx) -> &ConstantData { + &self.metadata.consts[consti.as_usize()] } + fn get_name(&self, i: usize) -> &str { self.metadata.names[i].as_ref() } - fn get_varname(&self, i: usize) -> &str { - self.metadata.varnames[i].as_ref() + + fn get_varname(&self, var_num: oparg::VarNum) -> &str { + self.metadata.varnames[var_num.as_usize()].as_ref() } + fn get_cell_name(&self, i: usize) -> &str { self.metadata .cellvars diff --git a/crates/codegen/src/snapshots/rustpython_codegen__compile__tests__const_bool_not_op.snap b/crates/codegen/src/snapshots/rustpython_codegen__compile__tests__const_bool_not_op.snap new file mode 100644 index 00000000000..f9a74c2055c --- /dev/null +++ b/crates/codegen/src/snapshots/rustpython_codegen__compile__tests__const_bool_not_op.snap @@ -0,0 +1,9 @@ +--- +source: crates/codegen/src/compile.rs +expression: "compile_exec_optimized(\"\\\nx = not True\n\")" +--- + 1 0 RESUME (0) + 1 LOAD_CONST (False) + 2 STORE_NAME (0, x) + 3 LOAD_CONST (None) + 4 RETURN_VALUE diff --git a/crates/codegen/src/symboltable.rs b/crates/codegen/src/symboltable.rs index 0d868bc0468..fdbdac2b2a7 100644 --- a/crates/codegen/src/symboltable.rs +++ b/crates/codegen/src/symboltable.rs @@ -2037,20 +2037,13 @@ impl SymbolTableBuilder { self.line_index_start(range), ); - // Mark non-generator comprehensions as inlined (PEP 709) - // inline_comp = entry->ste_comprehension && !entry->ste_generator && !ste->ste_can_see_class_scope - // We check is_generator and can_see_class_scope of parent - let parent_can_see_class = self - .tables - .get(self.tables.len().saturating_sub(2)) - .map(|t| t.can_see_class_scope) - .unwrap_or(false); - if !is_generator - && !parent_can_see_class - && let Some(table) = self.tables.last_mut() - { - table.comp_inlined = true; - } + // PEP 709: inlined comprehensions are not yet implemented in the + // compiler (is_inlined_comprehension_context always returns false), + // so do NOT mark comp_inlined here. Setting it would cause the + // symbol-table analyzer to merge comprehension-local symbols into + // the parent scope, while the compiler still emits a separate code + // object — leading to the merged symbols being missing from the + // comprehension's own symbol table lookup. // Register the passed argument to the generator function as the name ".0" self.register_name(".0", SymbolUsage::Parameter, range)?; diff --git a/crates/common/src/format.rs b/crates/common/src/format.rs index 40bc9e53046..930c764acf3 100644 --- a/crates/common/src/format.rs +++ b/crates/common/src/format.rs @@ -12,6 +12,19 @@ use rustpython_literal::format::Case; use crate::wtf8::{CodePoint, Wtf8, Wtf8Buf}; +/// Locale information for 'n' format specifier. +/// Contains thousands separator, decimal point, and grouping pattern +/// from the C library's `localeconv()`. +#[derive(Clone, Debug)] +pub struct LocaleInfo { + pub thousands_sep: String, + pub decimal_point: String, + /// Grouping pattern from `lconv.grouping`. + /// Each element is a group size. The last non-zero element repeats. + /// e.g. `[3, 0]` means groups of 3 repeating forever. + pub grouping: Vec, +} + trait FormatParse { fn parse(text: &Wtf8) -> (Option, &Wtf8) where @@ -460,6 +473,189 @@ impl FormatSpec { } } + /// Returns true if this format spec uses the locale-aware 'n' format type. + pub fn has_locale_format(&self) -> bool { + matches!(self.format_type, Some(FormatType::Number(Case::Lower))) + } + + /// Insert locale-aware thousands separators into an integer string. + /// Follows CPython's GroupGenerator logic for variable-width grouping. + fn insert_locale_grouping(int_part: &str, locale: &LocaleInfo) -> String { + if locale.grouping.is_empty() || locale.thousands_sep.is_empty() || int_part.len() <= 1 { + return int_part.to_string(); + } + + let mut group_idx = 0; + let mut group_size = locale.grouping[0] as usize; + + if group_size == 0 { + return int_part.to_string(); + } + + // Collect groups of digits from right to left + let len = int_part.len(); + let mut groups: Vec<&str> = Vec::new(); + let mut pos = len; + + loop { + if pos <= group_size { + groups.push(&int_part[..pos]); + break; + } + + groups.push(&int_part[pos - group_size..pos]); + pos -= group_size; + + // Advance to next group size + if group_idx + 1 < locale.grouping.len() { + let next = locale.grouping[group_idx + 1] as usize; + if next != 0 { + group_size = next; + group_idx += 1; + } + // 0 means repeat previous group size forever + } + } + + // Groups were collected right-to-left, reverse to get left-to-right + groups.reverse(); + groups.join(&locale.thousands_sep) + } + + /// Apply locale-aware grouping and decimal point replacement to a formatted number. + fn apply_locale_formatting(magnitude_str: String, locale: &LocaleInfo) -> String { + let mut parts = magnitude_str.splitn(2, '.'); + let int_part = parts.next().unwrap(); + let grouped = Self::insert_locale_grouping(int_part, locale); + + if let Some(frac_part) = parts.next() { + format!("{grouped}{}{frac_part}", locale.decimal_point) + } else { + grouped + } + } + + /// Format an integer with locale-aware 'n' format. + pub fn format_int_locale( + &self, + num: &BigInt, + locale: &LocaleInfo, + ) -> Result { + self.validate_format(FormatType::Decimal)?; + let magnitude = num.abs(); + + let raw_magnitude_str = match self.format_type { + Some(FormatType::Number(Case::Lower)) => self.format_int_radix(magnitude, 10), + _ => return self.format_int(num), + }?; + + let magnitude_str = Self::apply_locale_formatting(raw_magnitude_str, locale); + + let format_sign = self.sign.unwrap_or(FormatSign::Minus); + let sign_str = match num.sign() { + Sign::Minus => "-", + _ => match format_sign { + FormatSign::Plus => "+", + FormatSign::Minus => "", + FormatSign::MinusOrSpace => " ", + }, + }; + + self.format_sign_and_align(&AsciiStr::new(&magnitude_str), sign_str, FormatAlign::Right) + } + + /// Format a float with locale-aware 'n' format. + pub fn format_float_locale( + &self, + num: f64, + locale: &LocaleInfo, + ) -> Result { + self.validate_format(FormatType::FixedPoint(Case::Lower))?; + let precision = self.precision.unwrap_or(6); + let magnitude = num.abs(); + + let raw_magnitude_str = match &self.format_type { + Some(FormatType::Number(case)) => { + let precision = if precision == 0 { 1 } else { precision }; + Ok(float::format_general( + precision, + magnitude, + *case, + self.alternate_form, + false, + )) + } + _ => return self.format_float(num), + }?; + + let magnitude_str = Self::apply_locale_formatting(raw_magnitude_str, locale); + + let format_sign = self.sign.unwrap_or(FormatSign::Minus); + let sign_str = if num.is_sign_negative() && !num.is_nan() { + "-" + } else { + match format_sign { + FormatSign::Plus => "+", + FormatSign::Minus => "", + FormatSign::MinusOrSpace => " ", + } + }; + + self.format_sign_and_align(&AsciiStr::new(&magnitude_str), sign_str, FormatAlign::Right) + } + + /// Format a complex number with locale-aware 'n' format. + pub fn format_complex_locale( + &self, + num: &Complex64, + locale: &LocaleInfo, + ) -> Result { + // Reuse format_complex_re_im with 'g' type to get the base formatted parts, + // then apply locale grouping. This matches CPython's format_complex_internal: + // 'n' → 'g', add_parens=0, skip_re=0. + let locale_spec = FormatSpec { + format_type: Some(FormatType::GeneralFormat(Case::Lower)), + ..*self + }; + let (formatted_re, formatted_im) = locale_spec.format_complex_re_im(num)?; + + // Apply locale grouping to both parts + let grouped_re = if formatted_re.is_empty() { + formatted_re + } else { + // Split sign from magnitude, apply grouping, recombine + let (sign, mag) = if formatted_re.starts_with('-') + || formatted_re.starts_with('+') + || formatted_re.starts_with(' ') + { + formatted_re.split_at(1) + } else { + ("", formatted_re.as_str()) + }; + format!( + "{sign}{}", + Self::apply_locale_formatting(mag.to_string(), locale) + ) + }; + + // formatted_im is like "+1234j" or "-1234j" or "1234j" + // Split sign, magnitude, and 'j' suffix + let im_str = &formatted_im; + let (im_sign, im_rest) = if im_str.starts_with('+') || im_str.starts_with('-') { + im_str.split_at(1) + } else { + ("", im_str.as_str()) + }; + let im_mag = im_rest.strip_suffix('j').unwrap_or(im_rest); + let im_grouped = Self::apply_locale_formatting(im_mag.to_string(), locale); + let grouped_im = format!("{im_sign}{im_grouped}j"); + + // No parentheses for 'n' format (CPython: add_parens=0) + let magnitude_str = format!("{grouped_re}{grouped_im}"); + + self.format_sign_and_align(&AsciiStr::new(&magnitude_str), "", FormatAlign::Right) + } + pub fn format_bool(&self, input: bool) -> Result { let x = u8::from(input); match &self.format_type { diff --git a/crates/common/src/lock.rs b/crates/common/src/lock.rs index af680010821..cd7df512d83 100644 --- a/crates/common/src/lock.rs +++ b/crates/common/src/lock.rs @@ -68,32 +68,37 @@ pub type PyMappedRwLockWriteGuard<'a, T> = MappedRwLockWriteGuard<'a, RawRwLock, // can add fn const_{mutex,rw_lock}() if necessary, but we probably won't need to -/// Reset a `PyMutex` to its initial (unlocked) state after `fork()`. +/// Reset a lock to its initial (unlocked) state by zeroing its bytes. /// -/// After `fork()`, locks held by dead parent threads would deadlock in the -/// child. This writes `RawMutex::INIT` via the `Mutex::raw()` accessor, -/// bypassing the normal unlock path which may interact with parking_lot's -/// internal waiter queues. +/// After `fork()`, any lock held by a now-dead thread would remain +/// permanently locked. We zero the raw bytes (the unlocked state for all +/// `parking_lot` raw lock types) instead of using the normal unlock path, +/// which would interact with stale waiter queues. /// /// # Safety /// /// Must only be called from the single-threaded child process immediately /// after `fork()`, before any other thread is created. -#[cfg(unix)] -pub unsafe fn reinit_mutex_after_fork(mutex: &PyMutex) { - // Use Mutex::raw() to access the underlying lock without layout assumptions. - // parking_lot::RawMutex (AtomicU8) and RawCellMutex (Cell) both - // represent the unlocked state as all-zero bytes. +/// The type `T` must represent the unlocked state as all-zero bytes +/// (true for `parking_lot::RawMutex`, `RawRwLock`, `RawReentrantMutex`, etc.). +pub unsafe fn zero_reinit_after_fork(lock: *const T) { unsafe { - let raw = mutex.raw() as *const RawMutex as *mut u8; - core::ptr::write_bytes(raw, 0, core::mem::size_of::()); + core::ptr::write_bytes(lock as *mut u8, 0, core::mem::size_of::()); } } -/// Reset a `PyRwLock` to its initial (unlocked) state after `fork()`. +/// Reset a `PyMutex` after `fork()`. See [`zero_reinit_after_fork`]. +/// +/// # Safety /// -/// Same rationale as [`reinit_mutex_after_fork`] — dead threads' read or -/// write locks would cause permanent deadlock in the child. +/// Must only be called from the single-threaded child process immediately +/// after `fork()`, before any other thread is created. +#[cfg(unix)] +pub unsafe fn reinit_mutex_after_fork(mutex: &PyMutex) { + unsafe { zero_reinit_after_fork(mutex.raw()) } +} + +/// Reset a `PyRwLock` after `fork()`. See [`zero_reinit_after_fork`]. /// /// # Safety /// @@ -101,10 +106,7 @@ pub unsafe fn reinit_mutex_after_fork(mutex: &PyMutex) { /// after `fork()`, before any other thread is created. #[cfg(unix)] pub unsafe fn reinit_rwlock_after_fork(rwlock: &PyRwLock) { - unsafe { - let raw = rwlock.raw() as *const RawRwLock as *mut u8; - core::ptr::write_bytes(raw, 0, core::mem::size_of::()); - } + unsafe { zero_reinit_after_fork(rwlock.raw()) } } /// Reset a `PyThreadMutex` to its initial (unlocked, unowned) state after `fork()`. diff --git a/crates/common/src/lock/thread_mutex.rs b/crates/common/src/lock/thread_mutex.rs index 5b5b89f4eb1..884556c4476 100644 --- a/crates/common/src/lock/thread_mutex.rs +++ b/crates/common/src/lock/thread_mutex.rs @@ -54,6 +54,18 @@ impl RawThreadMutex { .is_some() } + /// Like `lock()` but wraps the blocking wait in `wrap_fn`. + /// The caller can use this to detach thread state while waiting. + pub fn lock_wrapped(&self, wrap_fn: F) -> bool { + let id = self.get_thread_id.nonzero_thread_id().get(); + if self.owner.load(Ordering::Relaxed) == id { + return false; + } + wrap_fn(&|| self.mutex.lock()); + self.owner.store(id, Ordering::Relaxed); + true + } + /// Returns `Some(true)` if able to successfully lock without blocking, `Some(false)` /// otherwise, and `None` when the mutex is already locked on the current thread. pub fn try_lock(&self) -> Option { @@ -135,6 +147,23 @@ impl ThreadMutex { None } } + + /// Like `lock()` but wraps the blocking wait in `wrap_fn`. + /// The caller can use this to detach thread state while waiting. + pub fn lock_wrapped( + &self, + wrap_fn: F, + ) -> Option> { + if self.raw.lock_wrapped(wrap_fn) { + Some(ThreadMutexGuard { + mu: self, + marker: PhantomData, + }) + } else { + None + } + } + pub fn try_lock(&self) -> Result, TryLockThreadError> { match self.raw.try_lock() { Some(true) => Ok(ThreadMutexGuard { diff --git a/crates/compiler-core/Cargo.toml b/crates/compiler-core/Cargo.toml index f4e619b95a4..7be58432cdf 100644 --- a/crates/compiler-core/Cargo.toml +++ b/crates/compiler-core/Cargo.toml @@ -14,6 +14,7 @@ ruff_source_file = { workspace = true } rustpython-wtf8 = { workspace = true } bitflags = { workspace = true } +bitflagset = { workspace = true } itertools = { workspace = true } malachite-bigint = { workspace = true } num-complex = { workspace = true } diff --git a/crates/compiler-core/src/bytecode.rs b/crates/compiler-core/src/bytecode.rs index 46182962654..5120d371a39 100644 --- a/crates/compiler-core/src/bytecode.rs +++ b/crates/compiler-core/src/bytecode.rs @@ -11,7 +11,7 @@ use bitflags::bitflags; use core::{ cell::UnsafeCell, hash, mem, - ops::Deref, + ops::{Deref, Index, IndexMut}, sync::atomic::{AtomicU8, AtomicU16, AtomicUsize, Ordering}, }; use itertools::Itertools; @@ -26,13 +26,13 @@ pub use crate::bytecode::{ oparg::{ BinaryOperator, BuildSliceArgCount, CommonConstant, ComparisonOperator, ConvertValueOparg, IntrinsicFunction1, IntrinsicFunction2, Invert, Label, LoadAttr, LoadSuperAttr, - MakeFunctionFlags, NameIdx, OpArg, OpArgByte, OpArgState, OpArgType, RaiseKind, ResumeType, - SpecialMethod, UnpackExArgs, + MakeFunctionFlag, MakeFunctionFlags, NameIdx, OpArg, OpArgByte, OpArgState, OpArgType, + RaiseKind, ResumeType, SpecialMethod, UnpackExArgs, }, }; mod instruction; -mod oparg; +pub mod oparg; /// Exception table entry for zero-cost exception handling /// Format: (start, size, target, depth<<1|lasti) @@ -293,6 +293,47 @@ impl ConstantBag for BasicBag { } } +#[derive(Clone)] +pub struct Constants(Box<[C]>); + +impl Deref for Constants { + type Target = [C]; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Index for Constants { + type Output = C; + + fn index(&self, consti: oparg::ConstIdx) -> &Self::Output { + &self.0[consti.as_usize()] + } +} + +impl FromIterator for Constants { + fn from_iter>(iter: T) -> Self { + Self(iter.into_iter().collect()) + } +} + +// TODO: Newtype "CodeObject.varnames". Make sure only `oparg:VarNum` can be used as index +impl Index for [T] { + type Output = T; + + fn index(&self, var_num: oparg::VarNum) -> &Self::Output { + &self[var_num.as_usize()] + } +} + +// TODO: Newtype "CodeObject.varnames". Make sure only `oparg:VarNum` can be used as index +impl IndexMut for [T] { + fn index_mut(&mut self, var_num: oparg::VarNum) -> &mut Self::Output { + &mut self[var_num.as_usize()] + } +} + /// Primary container of a single code object. Each python function has /// a code object. Also a module has a code object. #[derive(Clone)] @@ -312,7 +353,7 @@ pub struct CodeObject { /// Qualified name of the object (like CPython's co_qualname) pub qualname: C::Name, pub cell2arg: Option>, - pub constants: Box<[C]>, + pub constants: Constants, pub names: Box<[C::Name]>, pub varnames: Box<[C::Name]>, pub cellvars: Box<[C::Name]>, @@ -983,7 +1024,7 @@ impl CodeObject { } // arrow and offset - let arrow = if label_targets.contains(&Label(offset as u32)) { + let arrow = if label_targets.contains(&Label::new(offset as u32)) { ">>" } else { " " @@ -1012,16 +1053,14 @@ impl CodeObject { pub fn map_bag(self, bag: Bag) -> CodeObject { let map_names = |names: Box<[C::Name]>| { names - .into_vec() - .into_iter() + .iter() .map(|x| bag.make_name(x.as_ref())) .collect::>() }; CodeObject { constants: self .constants - .into_vec() - .into_iter() + .iter() .map(|x| bag.make_constant(x.borrow_constant())) .collect(), names: map_names(self.names), @@ -1095,11 +1134,11 @@ impl fmt::Display for CodeObject { pub trait InstrDisplayContext { type Constant: Constant; - fn get_constant(&self, i: usize) -> &Self::Constant; + fn get_constant(&self, consti: oparg::ConstIdx) -> &Self::Constant; fn get_name(&self, i: usize) -> &str; - fn get_varname(&self, i: usize) -> &str; + fn get_varname(&self, var_num: oparg::VarNum) -> &str; fn get_cell_name(&self, i: usize) -> &str; } @@ -1107,16 +1146,16 @@ pub trait InstrDisplayContext { impl InstrDisplayContext for CodeObject { type Constant = C; - fn get_constant(&self, i: usize) -> &C { - &self.constants[i] + fn get_constant(&self, consti: oparg::ConstIdx) -> &C { + &self.constants[consti] } fn get_name(&self, i: usize) -> &str { self.names[i].as_ref() } - fn get_varname(&self, i: usize) -> &str { - self.varnames[i].as_ref() + fn get_varname(&self, var_num: oparg::VarNum) -> &str { + self.varnames[var_num].as_ref() } fn get_cell_name(&self, i: usize) -> &str { diff --git a/crates/compiler-core/src/bytecode/instruction.rs b/crates/compiler-core/src/bytecode/instruction.rs index 754447956fa..9eca2220977 100644 --- a/crates/compiler-core/src/bytecode/instruction.rs +++ b/crates/compiler-core/src/bytecode/instruction.rs @@ -4,9 +4,9 @@ use crate::{ bytecode::{ BorrowedConstant, Constant, InstrDisplayContext, oparg::{ - BinaryOperator, BuildSliceArgCount, CommonConstant, ComparisonOperator, + self, BinaryOperator, BuildSliceArgCount, CommonConstant, ComparisonOperator, ConvertValueOparg, IntrinsicFunction1, IntrinsicFunction2, Invert, Label, LoadAttr, - LoadSuperAttr, MakeFunctionFlags, NameIdx, OpArg, OpArgByte, OpArgType, RaiseKind, + LoadSuperAttr, MakeFunctionFlag, NameIdx, OpArg, OpArgByte, OpArgType, RaiseKind, SpecialMethod, StoreFastLoadFast, UnpackExArgs, }, }, @@ -133,7 +133,7 @@ pub enum Instruction { i: Arg, } = 62, DeleteFast { - var_num: Arg, + var_num: Arg, } = 63, DeleteGlobal { namei: Arg, @@ -186,25 +186,25 @@ pub enum Instruction { idx: Arg, } = 81, LoadConst { - consti: Arg, + consti: Arg, } = 82, LoadDeref { i: Arg, } = 83, LoadFast { - var_num: Arg, + var_num: Arg, } = 84, LoadFastAndClear { - var_num: Arg, + var_num: Arg, } = 85, LoadFastBorrow { - var_num: Arg, + var_num: Arg, } = 86, LoadFastBorrowLoadFastBorrow { var_nums: Arg, } = 87, LoadFastCheck { - var_num: Arg, + var_num: Arg, } = 88, LoadFastLoadFast { var_nums: Arg, @@ -264,7 +264,7 @@ pub enum Instruction { i: Arg, } = 107, SetFunctionAttribute { - flag: Arg, + flag: Arg, } = 108, SetUpdate { i: Arg, @@ -276,7 +276,7 @@ pub enum Instruction { i: Arg, } = 111, StoreFast { - var_num: Arg, + var_num: Arg, } = 112, StoreFastLoadFast { var_nums: Arg, @@ -1120,26 +1120,29 @@ impl InstructionMetadata for Instruction { }; } - let varname = |i: u32| ctx.get_varname(i as usize); + let varname = |var_num: oparg::VarNum| ctx.get_varname(var_num); let name = |i: u32| ctx.get_name(i as usize); let cell_name = |i: u32| ctx.get_cell_name(i as usize); - let fmt_const = - |op: &str, arg: OpArg, f: &mut fmt::Formatter<'_>, idx: &Arg| -> fmt::Result { - let value = ctx.get_constant(idx.get(arg) as usize); - match value.borrow_constant() { - BorrowedConstant::Code { code } if expand_code_objects => { - write!(f, "{op:pad$}({code:?}):")?; - code.display_inner(f, true, level + 1)?; - Ok(()) - } - c => { - write!(f, "{op:pad$}(")?; - c.fmt_display(f)?; - write!(f, ")") - } + let fmt_const = |op: &str, + arg: OpArg, + f: &mut fmt::Formatter<'_>, + consti: &Arg| + -> fmt::Result { + let value = ctx.get_constant(consti.get(arg)); + match value.borrow_constant() { + BorrowedConstant::Code { code } if expand_code_objects => { + write!(f, "{op:pad$}({code:?}):")?; + code.display_inner(f, true, level + 1)?; + Ok(()) } - }; + c => { + write!(f, "{op:pad$}(")?; + c.fmt_display(f)?; + write!(f, ")") + } + } + }; match self { Self::BinarySlice => w!(BINARY_SLICE), @@ -1223,16 +1226,16 @@ impl InstructionMetadata for Instruction { let oparg = var_nums.get(arg); let idx1 = oparg >> 4; let idx2 = oparg & 15; - let name1 = varname(idx1); - let name2 = varname(idx2); + let name1 = varname(idx1.into()); + let name2 = varname(idx2.into()); write!(f, "{:pad$}({}, {})", "LOAD_FAST_LOAD_FAST", name1, name2) } Self::LoadFastBorrowLoadFastBorrow { var_nums } => { let oparg = var_nums.get(arg); let idx1 = oparg >> 4; let idx2 = oparg & 15; - let name1 = varname(idx1); - let name2 = varname(idx2); + let name1 = varname(idx1.into()); + let name2 = varname(idx2.into()); write!( f, "{:pad$}({}, {})", @@ -1359,8 +1362,8 @@ impl InstructionMetadata for Instruction { f, "{:pad$}({}, {})", "STORE_FAST_STORE_FAST", - varname(idx1), - varname(idx2) + varname(idx1.into()), + varname(idx2.into()) ) } Self::StoreGlobal { namei } => w!(STORE_GLOBAL, name = namei), diff --git a/crates/compiler-core/src/bytecode/oparg.rs b/crates/compiler-core/src/bytecode/oparg.rs index 729b84db591..6de567048fe 100644 --- a/crates/compiler-core/src/bytecode/oparg.rs +++ b/crates/compiler-core/src/bytecode/oparg.rs @@ -1,5 +1,3 @@ -use bitflags::bitflags; - use core::fmt; use crate::{ @@ -293,77 +291,6 @@ pub type NameIdx = u32; impl OpArgType for u32 {} -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] -#[repr(transparent)] -pub struct Label(pub u32); - -impl Label { - pub const fn new(value: u32) -> Self { - Self(value) - } -} - -impl From for Label { - fn from(value: u32) -> Self { - Self::new(value) - } -} - -impl From