feat: AMX tile matmul via inline asm (stable Rust 1.94) amx_matmul.rs: tile_loadconfig, tile_zero, tile_release, tile_dpbusd All via asm!() — no nightly needed. Verified working on this CPU. TileConfig::for_dpbusd(): configures 3 tiles for TDPBUSD operation. tile_dpbusd(): C[16×16 i32] += A[16×64 u8] × B[64×16 i8] = 16384 MACs in ONE instruction. For GGUF codebook distance table build: 4096² pairs × dim dot products Tiled: (4096/16)² = 65536 tiles × (dim/64) TDPBUSD per tile ~20 min for all models combined (vs ~1:20h VNNI, 24-48h scalar) 2 tests passing. Processor: Sapphire Rapids+ with AMX-TILE+INT8+BF16. https://claude.ai/code/session_01ChLvBfpJS8dQhHxRD4pYNp #127
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| on: | |
| pull_request: | |
| paths-ignore: | |
| - '.github/workflows/latest-deps.yaml' | |
| merge_group: | |
| push: | |
| branches: | |
| - master | |
| - main | |
| name: Continuous integration | |
| env: | |
| CARGO_TERM_COLOR: always | |
| HOST: x86_64-unknown-linux-gnu | |
| FEATURES: "approx,serde,rayon" | |
| RUSTFLAGS: "-D warnings" | |
| MSRV: 1.64.0 | |
| BLAS_MSRV: 1.71.1 | |
| jobs: | |
| pass-msrv: | |
| runs-on: ubuntu-latest | |
| name: Pass MSRV values to other jobs | |
| outputs: | |
| MSRV: ${{ env.MSRV }} | |
| BLAS_MSRV: ${{ env.BLAS_MSRV }} | |
| steps: | |
| - name: Pass MSRV | |
| run: | | |
| echo "MSRV=${{ env.MSRV }}" >> $GITHUB_OUTPUT | |
| echo "BLAS_MSRV=${{ env.BLAS_MSRV }}" >> $GITHUB_OUTPUT | |
| clippy: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| rust: | |
| - stable | |
| name: clippy/${{ matrix.rust }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@master | |
| with: | |
| toolchain: ${{ matrix.rust }} | |
| components: clippy | |
| - uses: Swatinem/rust-cache@v2 | |
| - run: cargo clippy --features approx,serde,rayon -- -D warnings | |
| - run: cargo clippy --features native -- -D warnings | |
| format: | |
| runs-on: ubuntu-latest | |
| name: format/stable | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@stable | |
| with: | |
| components: rustfmt | |
| - run: cargo fmt --all --check | |
| nostd: | |
| runs-on: ubuntu-latest | |
| continue-on-error: ${{ matrix.experimental }} | |
| strategy: | |
| matrix: | |
| include: | |
| - rust: stable | |
| experimental: false | |
| target: thumbv6m-none-eabi | |
| name: nostd/${{ matrix.target }}/${{ matrix.rust }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@stable | |
| with: | |
| toolchain: ${{ matrix.rust }} | |
| targets: ${{ matrix.target }} | |
| - name: Tests | |
| run: | | |
| cargo rustc "--target=${{ matrix.target }}" --no-default-features --features portable-atomic-critical-section | |
| tests: | |
| runs-on: ubuntu-latest | |
| needs: pass-msrv | |
| strategy: | |
| matrix: | |
| rust: | |
| - stable | |
| - beta | |
| - ${{ needs.pass-msrv.outputs.MSRV }} | |
| name: tests/${{ matrix.rust }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@master | |
| with: | |
| toolchain: ${{ matrix.rust }} | |
| - uses: rui314/setup-mold@v1 | |
| - uses: Swatinem/rust-cache@v2 | |
| - uses: taiki-e/install-action@nextest | |
| - name: Install openblas | |
| run: sudo apt-get install libopenblas-dev gfortran | |
| - run: ./scripts/all-tests.sh "$FEATURES" ${{ matrix.rust }} | |
| native-backend: | |
| runs-on: ubuntu-latest | |
| name: native-backend/stable | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@stable | |
| - uses: Swatinem/rust-cache@v2 | |
| - uses: taiki-e/install-action@nextest | |
| - name: Build native backend | |
| run: cargo build --features native | |
| - name: Test native backend | |
| run: cargo nextest run -p ndarray --features native | |
| - name: Test native + approx | |
| run: cargo nextest run -p ndarray --features native,approx | |
| blas-msrv: | |
| runs-on: ubuntu-latest | |
| name: blas-msrv | |
| needs: pass-msrv | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@master | |
| with: | |
| toolchain: ${{ needs.pass-msrv.outputs.BLAS_MSRV }} | |
| - uses: rui314/setup-mold@v1 | |
| - uses: Swatinem/rust-cache@v2 | |
| - uses: taiki-e/install-action@nextest | |
| - name: Install openblas | |
| run: sudo apt-get install libopenblas-dev gfortran | |
| - run: ./scripts/blas-integ-tests.sh $BLAS_MSRV | |
| miri: | |
| # Nightly-only. Cannot run SIMD intrinsics or FFI. | |
| # Only runs on merge queue / push to main — never blocks PR checks. | |
| if: github.event_name == 'merge_group' || github.event_name == 'push' | |
| runs-on: ubuntu-latest | |
| name: miri | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@nightly | |
| with: | |
| components: miri | |
| - uses: Swatinem/rust-cache@v2 | |
| - uses: taiki-e/install-action@nextest | |
| - run: ./scripts/miri-tests.sh | |
| cross_test: | |
| #if: ${{ github.event_name == 'merge_group' }} | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| include: | |
| - rust: stable | |
| target: s390x-unknown-linux-gnu | |
| - rust: stable | |
| target: i686-unknown-linux-gnu | |
| name: cross_test/${{ matrix.target }}/${{ matrix.rust }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@master | |
| with: | |
| toolchain: ${{ matrix.rust }} | |
| targets: ${{ matrix.target }} | |
| - uses: rui314/setup-mold@v1 | |
| - uses: Swatinem/rust-cache@v2 | |
| - name: Install cross | |
| run: cargo install cross | |
| - run: ./scripts/cross-tests.sh "approx,serde,rayon" ${{ matrix.rust }} ${{ matrix.target }} | |
| cargo-careful: | |
| # Nightly-only. Only runs on merge queue / push — never blocks PR checks. | |
| if: github.event_name == 'merge_group' || github.event_name == 'push' | |
| runs-on: ubuntu-latest | |
| name: cargo-careful | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@nightly | |
| - uses: Swatinem/rust-cache@v2 | |
| - uses: taiki-e/install-action@nextest | |
| - name: Install cargo-careful | |
| run: cargo install cargo-careful | |
| - run: cargo careful nextest run -Zcareful-sanitizer --features="$FEATURES" | |
| docs: | |
| # Nightly needed for --cfg docsrs. Only on merge queue / push. | |
| if: github.event_name == 'merge_group' || github.event_name == 'push' | |
| runs-on: ubuntu-latest | |
| name: docs/nightly | |
| env: | |
| RUSTDOCFLAGS: "-Dwarnings --cfg docsrs" | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@nightly | |
| - run: cargo doc --no-deps --all-features | |
| conclusion: | |
| needs: | |
| - clippy | |
| - format | |
| - nostd | |
| - tests | |
| - native-backend | |
| - miri | |
| - cross_test | |
| - cargo-careful | |
| - docs | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Result | |
| run: | | |
| jq -C <<< "${needs}" | |
| # Check if all needs were successful or skipped. | |
| "$(jq -r 'all(.result as $result | (["success", "skipped"] | contains([$result])))' <<< "${needs}")" | |
| env: | |
| needs: ${{ toJson(needs) }} |