diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 46a0ef0c5bb..73fdacc10ae 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -18,6 +18,11 @@ jobs: timeout-minutes: 120 steps: - uses: actions/checkout@v6 + - uses: ./.github/actions/setup-rust + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + - name: Build s3_cat binary + run: cargo build --bin s3_cat --package bench-vortex --release - name: Setup AWS CLI uses: aws-actions/configure-aws-credentials@v5 with: @@ -30,7 +35,7 @@ jobs: sudo apt-get update && sudo apt-get install -y jq bash scripts/commit-json.sh > new-commit.json - bash scripts/cat-s3.sh vortex-benchmark-results-database commits.json new-commit.json + target/release/s3_cat vortex-benchmark-results-database-test commits.json new-commit.json bench: timeout-minutes: 120 @@ -69,7 +74,7 @@ jobs: RUSTFLAGS: "-C target-cpu=native -C force-frame-pointers=yes" # The main difference between this and `bench-pr.yml` is that we add the `lance` feature. run: | - cargo build --bin ${{ matrix.benchmark.id }} --package bench-vortex --profile release_debug --features lance + cargo build --bin ${{ matrix.benchmark.id }} --bin s3_cat --package bench-vortex --profile release_debug --features lance - name: Setup Polar Signals uses: polarsignals/gh-actions-ps-profiling@v0.6.0 @@ -96,7 +101,7 @@ jobs: - name: Upload Benchmark Results shell: bash run: | - bash scripts/cat-s3.sh vortex-benchmark-results-database data.json.gz ${{ matrix.benchmark.id }}.json + target/release_debug/s3_cat vortex-benchmark-results-database-test data.json.gz ${{ matrix.benchmark.id }}.json sql: uses: ./.github/workflows/sql-benchmarks.yml secrets: inherit diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f5d35fc42c0..b165069eb8d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -619,15 +619,23 @@ jobs: - uses: ./.github/actions/setup-rust with: repo-token: ${{ secrets.GITHUB_TOKEN }} - toolchain: nightly-2025-06-26 - targets: "wasm32-wasip1" - components: "rust-src" + targets: "wasm32-wasip1,wasm32-unknown-unknown" + - name: Setup Wasmer uses: wasmerio/setup-wasmer@v3.1 - # there is a compiler bug in nightly (but not in nightly-2025-06-26) - - run: cargo +nightly-2025-06-26 -Zbuild-std=panic_abort,std build --target wasm32-wasip1 + - name: Build WASI binary + run: cargo build --target wasm32-wasip1 + working-directory: ./wasm-test + - name: Run WASI test + run: wasmer run ./target/wasm32-wasip1/debug/wasm-test.wasm working-directory: ./wasm-test - - run: wasmer run ./target/wasm32-wasip1/debug/wasm-test.wasm + + # wasm-bindgen-test with headless browser + # wasm-pack automatically installs matching wasm-bindgen-cli from Cargo.lock + - name: Install wasm-pack + run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh + - name: Run wasm-bindgen tests (headless Firefox) + run: wasm-pack test --headless --firefox working-directory: ./wasm-test miri: diff --git a/.github/workflows/sql-benchmarks.yml b/.github/workflows/sql-benchmarks.yml index 949075339ef..63d574a5c6d 100644 --- a/.github/workflows/sql-benchmarks.yml +++ b/.github/workflows/sql-benchmarks.yml @@ -145,7 +145,7 @@ jobs: env: RUSTFLAGS: "-C target-cpu=native -C force-frame-pointers=yes" run: | - cargo build --bin query_bench --package bench-vortex --profile release_debug ${{ matrix.build_args }} + cargo build --bin query_bench --bin s3_cat --package bench-vortex --profile release_debug ${{ matrix.build_args }} - name: Generate data shell: bash @@ -265,4 +265,4 @@ jobs: if: inputs.mode == 'develop' shell: bash run: | - bash scripts/cat-s3.sh vortex-benchmark-results-database data.json.gz results.json + target/release_debug/s3_cat vortex-benchmark-results-database-test data.json.gz results.json diff --git a/Cargo.lock b/Cargo.lock index cf5c3a600f7..a206fbb58cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -245,7 +245,7 @@ dependencies = [ "arrow-schema", "arrow-select", "atoi", - "base64", + "base64 0.22.1", "chrono", "comfy-table", "half", @@ -680,6 +680,7 @@ dependencies = [ "aws-credential-types", "aws-sigv4", "aws-smithy-async", + "aws-smithy-eventstream", "aws-smithy-http", "aws-smithy-runtime", "aws-smithy-runtime-api", @@ -695,6 +696,40 @@ dependencies = [ "uuid", ] +[[package]] +name = "aws-sdk-s3" +version = "1.109.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c6d81b75f8ff78882e70c5909804b44553d56136899fb4015a0a68ecc870e0e" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-checksums", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "bytes", + "fastrand", + "hex", + "hmac", + "http 0.2.12", + "http 1.3.1", + "http-body 0.4.6", + "lru", + "percent-encoding", + "regex-lite", + "sha2", + "tracing", + "url", +] + [[package]] name = "aws-sdk-sso" version = "1.86.0" @@ -769,19 +804,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bffc03068fbb9c8dd5ce1c6fb240678a5cffb86fb2b7b1985c999c4b83c8df68" dependencies = [ "aws-credential-types", + "aws-smithy-eventstream", "aws-smithy-http", "aws-smithy-runtime-api", "aws-smithy-types", "bytes", + "crypto-bigint 0.5.5", "form_urlencoded", "hex", "hmac", "http 0.2.12", "http 1.3.1", + "p256", "percent-encoding", + "ring", "sha2", + "subtle", "time", "tracing", + "zeroize", ] [[package]] @@ -795,12 +836,44 @@ dependencies = [ "tokio", ] +[[package]] +name = "aws-smithy-checksums" +version = "0.63.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "165d8583d8d906e2fb5511d29201d447cc710864f075debcdd9c31c265412806" +dependencies = [ + "aws-smithy-http", + "aws-smithy-types", + "bytes", + "crc-fast", + "hex", + "http 0.2.12", + "http-body 0.4.6", + "md-5", + "pin-project-lite", + "sha1", + "sha2", + "tracing", +] + +[[package]] +name = "aws-smithy-eventstream" +version = "0.60.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9656b85088f8d9dc7ad40f9a6c7228e1e8447cdf4b046c87e152e0805dea02fa" +dependencies = [ + "aws-smithy-types", + "bytes", + "crc32fast", +] + [[package]] name = "aws-smithy-http" version = "0.62.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3feafd437c763db26aa04e0cc7591185d0961e64c61885bece0fb9d50ceac671" dependencies = [ + "aws-smithy-eventstream", "aws-smithy-runtime-api", "aws-smithy-types", "bytes", @@ -824,17 +897,23 @@ dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", "aws-smithy-types", - "h2", + "h2 0.3.27", + "h2 0.4.12", + "http 0.2.12", "http 1.3.1", - "hyper", - "hyper-rustls", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper 1.7.0", + "hyper-rustls 0.24.2", + "hyper-rustls 0.27.7", "hyper-util", "pin-project-lite", - "rustls", - "rustls-native-certs", + "rustls 0.21.12", + "rustls 0.23.32", + "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower", "tracing", ] @@ -917,6 +996,7 @@ dependencies = [ "base64-simd", "bytes", "bytes-utils", + "futures-core", "http 0.2.12", "http 1.3.1", "http-body 0.4.6", @@ -929,6 +1009,8 @@ dependencies = [ "ryu", "serde", "time", + "tokio", + "tokio-util", ] [[package]] @@ -965,6 +1047,18 @@ dependencies = [ "tokio", ] +[[package]] +name = "base16ct" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce" + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + [[package]] name = "base64" version = "0.22.1" @@ -997,6 +1091,8 @@ dependencies = [ "arrow-schema", "arrow-select", "async-trait", + "aws-config", + "aws-sdk-s3", "bytes", "bzip2", "clap", @@ -1005,8 +1101,10 @@ dependencies = [ "datafusion-physical-plan", "dirs", "erased-serde", + "flate2", "futures", "glob", + "hex", "humansize", "indicatif", "itertools 0.14.0", @@ -1023,6 +1121,7 @@ dependencies = [ "parking_lot", "parquet", "paste", + "phf 0.13.1", "rand 0.9.2", "rayon", "regex", @@ -1397,7 +1496,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" dependencies = [ "chrono", - "phf", + "phf 0.12.1", ] [[package]] @@ -1479,7 +1578,7 @@ checksum = "af491d569909a7e4dee0ad7db7f5341fef5c614d5b8ec8cf765732aba3cff681" dependencies = [ "serde", "termcolor", - "unicode-width 0.1.14", + "unicode-width 0.2.0", ] [[package]] @@ -1555,7 +1654,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" dependencies = [ "lazy_static", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1650,6 +1749,16 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "console_error_panic_hook" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +dependencies = [ + "cfg-if", + "wasm-bindgen", +] + [[package]] name = "const-oid" version = "0.9.6" @@ -1767,6 +1876,19 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +[[package]] +name = "crc-fast" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bf62af4cc77d8fe1c22dde4e721d87f2f54056139d8c412e1366b740305f56f" +dependencies = [ + "crc", + "digest", + "libc", + "rand 0.9.2", + "regex", +] + [[package]] name = "crc32c" version = "0.6.8" @@ -1877,6 +1999,28 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "crypto-bigint" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef2b4b23cddf68b89b8f8069890e8c270d54e2d5fe1b143820234805e4cb17ef" +dependencies = [ + "generic-array", + "rand_core 0.6.4", + "subtle", + "zeroize", +] + +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] + [[package]] name = "crypto-common" version = "0.1.6" @@ -2173,7 +2317,7 @@ dependencies = [ "ahash", "arrow", "arrow-ipc", - "base64", + "base64 0.22.1", "chrono", "half", "hashbrown 0.14.5", @@ -2381,7 +2525,7 @@ checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" dependencies = [ "arrow", "arrow-buffer", - "base64", + "base64 0.22.1", "blake2", "blake3", "chrono", @@ -2718,6 +2862,16 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26bf8fc351c5ed29b5c2f0cbbac1b209b74f60ecd62e675a998df72c49af5204" +[[package]] +name = "der" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1a467a65c5e759bce6e65eaf91cc29f466cdc57cb65777bd646872a8a1fd4de" +dependencies = [ + "const-oid", + "zeroize", +] + [[package]] name = "der" version = "0.7.10" @@ -2801,7 +2955,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -2862,12 +3016,44 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" +[[package]] +name = "ecdsa" +version = "0.14.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413301934810f597c1d19ca71c8710e99a3f1ba28a0d2ebc01551a2daeea3c5c" +dependencies = [ + "der 0.6.1", + "elliptic-curve", + "rfc6979", + "signature 1.6.4", +] + [[package]] name = "either" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "elliptic-curve" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7bb888ab5300a19b8e5bceef25ac745ad065f3c9f7efc6de1b91958110891d3" +dependencies = [ + "base16ct", + "crypto-bigint 0.4.9", + "der 0.6.1", + "digest", + "ff", + "generic-array", + "group", + "pkcs8 0.9.0", + "rand_core 0.6.4", + "sec1", + "subtle", + "zeroize", +] + [[package]] name = "encode_unicode" version = "1.0.0" @@ -2970,7 +3156,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -3070,6 +3256,16 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "ff" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d013fc25338cc558c5c2cfbad646908fb23591e2404481826742b651c9af7160" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] + [[package]] name = "filetime" version = "0.2.26" @@ -3406,6 +3602,36 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9e2d4c0a8296178d8802098410ca05d86b17a10bb5ab559b3fb404c1f948220" +[[package]] +name = "group" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfbfb3a6cfbd390d5c9564ab283a0349b9b9fcd46a706c1eb10e0db70bfbac7" +dependencies = [ + "ff", + "rand_core 0.6.4", + "subtle", +] + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "h2" version = "0.4.12" @@ -3475,6 +3701,8 @@ dependencies = [ "allocator-api2", "equivalent", "foldhash 0.2.0", + "serde", + "serde_core", ] [[package]] @@ -3581,6 +3809,12 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + [[package]] name = "humansize" version = "2.1.3" @@ -3596,6 +3830,30 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.7.0" @@ -3606,7 +3864,7 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "h2", + "h2 0.4.12", "http 1.3.1", "http-body 1.0.1", "httparse", @@ -3618,6 +3876,22 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "rustls-native-certs 0.6.3", + "tokio", + "tokio-rustls 0.24.1", +] + [[package]] name = "hyper-rustls" version = "0.27.7" @@ -3625,13 +3899,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ "http 1.3.1", - "hyper", + "hyper 1.7.0", "hyper-util", - "rustls", - "rustls-native-certs", + "rustls 0.23.32", + "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower-service", "webpki-roots", ] @@ -3642,19 +3916,19 @@ version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-channel", "futures-core", "futures-util", "http 1.3.1", "http-body 1.0.1", - "hyper", + "hyper 1.7.0", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2", + "socket2 0.6.0", "system-configuration", "tokio", "tower-service", @@ -3835,7 +4109,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5" dependencies = [ "equivalent", - "hashbrown 0.15.5", + "hashbrown 0.16.1", ] [[package]] @@ -3983,7 +4257,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4082,7 +4356,7 @@ version = "9.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" dependencies = [ - "base64", + "base64 0.22.1", "js-sys", "pem", "ring", @@ -5188,7 +5462,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -5354,7 +5628,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" dependencies = [ "async-trait", - "base64", + "base64 0.22.1", "bytes", "chrono", "form_urlencoded", @@ -5363,7 +5637,7 @@ dependencies = [ "http-body-util", "httparse", "humantime", - "hyper", + "hyper 1.7.0", "itertools 0.14.0", "md-5", "parking_lot", @@ -5372,7 +5646,7 @@ dependencies = [ "rand 0.9.2", "reqwest", "ring", - "rustls-pemfile", + "rustls-pemfile 2.2.0", "serde", "serde_json", "serde_urlencoded", @@ -5429,7 +5703,7 @@ checksum = "ffb9838d0575c6dbaf3fcec7255af8d5771996d4af900bbb6fa9a314dec00a1a" dependencies = [ "anyhow", "backon", - "base64", + "base64 0.22.1", "bytes", "chrono", "crc32c", @@ -5577,6 +5851,17 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "p256" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594" +dependencies = [ + "ecdsa", + "elliptic-curve", + "sha2", +] + [[package]] name = "papergrid" version = "0.17.0" @@ -5659,7 +5944,7 @@ dependencies = [ "arrow-ipc", "arrow-schema", "arrow-select", - "base64", + "base64 0.22.1", "brotli", "bytes", "chrono", @@ -5728,7 +6013,7 @@ version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" dependencies = [ - "base64", + "base64 0.22.1", "serde_core", ] @@ -5781,7 +6066,41 @@ version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" dependencies = [ - "phf_shared", + "phf_shared 0.12.1", +] + +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_macros", + "phf_shared 0.13.1", + "serde", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared 0.13.1", +] + +[[package]] +name = "phf_macros" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" +dependencies = [ + "phf_generator", + "phf_shared 0.13.1", + "proc-macro2", + "quote", + "syn 2.0.106", ] [[package]] @@ -5793,6 +6112,15 @@ dependencies = [ "siphasher", ] +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project" version = "1.1.10" @@ -5842,9 +6170,9 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" dependencies = [ - "der", - "pkcs8", - "spki", + "der 0.7.10", + "pkcs8 0.10.2", + "spki 0.7.3", ] [[package]] @@ -5855,11 +6183,21 @@ checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6" dependencies = [ "aes", "cbc", - "der", + "der 0.7.10", "pbkdf2", "scrypt", "sha2", - "spki", + "spki 0.7.3", +] + +[[package]] +name = "pkcs8" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9eca2c590a5f85da82668fa685c09ce2888b9430e83299debf1f34b65fd4a4ba" +dependencies = [ + "der 0.6.1", + "spki 0.6.0", ] [[package]] @@ -5868,10 +6206,10 @@ version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" dependencies = [ - "der", + "der 0.7.10", "pkcs5", "rand_core 0.6.4", - "spki", + "spki 0.7.3", ] [[package]] @@ -6225,8 +6563,8 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls", - "socket2", + "rustls 0.23.32", + "socket2 0.6.0", "thiserror 2.0.17", "tokio", "tracing", @@ -6245,7 +6583,7 @@ dependencies = [ "rand 0.9.2", "ring", "rustc-hash", - "rustls", + "rustls 0.23.32", "rustls-pki-types", "slab", "thiserror 2.0.17", @@ -6263,9 +6601,9 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2", + "socket2 0.6.0", "tracing", - "windows-sys 0.52.0", + "windows-sys 0.60.2", ] [[package]] @@ -6521,7 +6859,7 @@ checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701" dependencies = [ "anyhow", "async-trait", - "base64", + "base64 0.22.1", "chrono", "form_urlencoded", "getrandom 0.2.16", @@ -6552,18 +6890,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" dependencies = [ "async-compression", - "base64", + "base64 0.22.1", "bytes", "encoding_rs", "futures-channel", "futures-core", "futures-util", - "h2", + "h2 0.4.12", "http 1.3.1", "http-body 1.0.1", "http-body-util", - "hyper", - "hyper-rustls", + "hyper 1.7.0", + "hyper-rustls 0.27.7", "hyper-util", "js-sys", "log", @@ -6572,15 +6910,15 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls", - "rustls-native-certs", + "rustls 0.23.32", + "rustls-native-certs 0.8.1", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tokio-util", "tower", "tower-http", @@ -6593,6 +6931,17 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "rfc6979" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7743f17af12fa0b03b803ba12cd6a8d9483a587e89c69445e3909655c0b9fabb" +dependencies = [ + "crypto-bigint 0.4.9", + "hmac", + "zeroize", +] + [[package]] name = "ring" version = "0.17.14" @@ -6639,11 +6988,11 @@ dependencies = [ "num-integer", "num-traits", "pkcs1", - "pkcs8", + "pkcs8 0.10.2", "rand_core 0.6.4", "sha2", - "signature", - "spki", + "signature 2.2.0", + "spki 0.7.3", "subtle", "zeroize", ] @@ -6739,7 +7088,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.4.15", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -6752,7 +7101,19 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.11.0", - "windows-sys 0.52.0", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", ] [[package]] @@ -6765,11 +7126,23 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki", + "rustls-webpki 0.103.7", "subtle", "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe", + "rustls-pemfile 1.0.4", + "schannel", + "security-framework 2.11.1", +] + [[package]] name = "rustls-native-certs" version = "0.8.1" @@ -6779,7 +7152,16 @@ dependencies = [ "openssl-probe", "rustls-pki-types", "schannel", - "security-framework", + "security-framework 3.5.1", +] + +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64 0.21.7", ] [[package]] @@ -6801,6 +7183,16 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "rustls-webpki" version = "0.103.7" @@ -6881,6 +7273,43 @@ dependencies = [ "sha2", ] +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "sec1" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be24c1842290c45df0a7bf069e0c268a747ad05a192f2fd7dcfdbc1cba40928" +dependencies = [ + "base16ct", + "der 0.6.1", + "generic-array", + "pkcs8 0.9.0", + "subtle", + "zeroize", +] + +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + [[package]] name = "security-framework" version = "3.5.1" @@ -6936,6 +7365,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde-wasm-bindgen" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8302e169f0eddcc139c70f139d19d6467353af16f9fce27e8c30158036a1e16b" +dependencies = [ + "js-sys", + "serde", + "wasm-bindgen", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -7086,6 +7526,16 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "1.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74233d3b3b2f6d4b006dc19dee745e73e2a6bfb6f93607cd3b02bd5b00797d7c" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + [[package]] name = "signature" version = "2.2.0" @@ -7231,6 +7681,16 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "socket2" version = "0.6.0" @@ -7247,6 +7707,16 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "spki" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67cf02bbac7a337dc36e4f5a693db6c21e7863f45070f7064577eb4367a3212b" +dependencies = [ + "base64ct", + "der 0.6.1", +] + [[package]] name = "spki" version = "0.7.3" @@ -7254,7 +7724,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" dependencies = [ "base64ct", - "der", + "der 0.7.10", ] [[package]] @@ -7486,7 +7956,7 @@ checksum = "64a966cb0e76e311f09cf18507c9af192f15d34886ee43d7ba7c7e3803660c43" dependencies = [ "aho-corasick", "arc-swap", - "base64", + "base64 0.22.1", "bitpacking", "bon", "byteorder", @@ -7663,7 +8133,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix 1.1.2", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -7858,7 +8328,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.6.0", "tokio-macros", "windows-sys 0.61.2", ] @@ -7874,13 +8344,23 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls", + "rustls 0.23.32", "tokio", ] @@ -7966,7 +8446,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203" dependencies = [ "async-trait", - "base64", + "base64 0.22.1", "bytes", "http 1.3.1", "http-body 1.0.1", @@ -8729,6 +9209,7 @@ dependencies = [ name = "vortex-file" version = "0.1.0" dependencies = [ + "async-fs", "async-trait", "bytes", "flatbuffers", @@ -9159,6 +9640,38 @@ dependencies = [ "vortex-mask", ] +[[package]] +name = "vortex-wasm" +version = "0.1.0" +dependencies = [ + "async-fs", + "console_error_panic_hook", + "futures", + "hashbrown 0.16.1", + "hex", + "reqwest", + "serde", + "serde-wasm-bindgen", + "serde_json", + "tempfile", + "tokio", + "tracing", + "vortex", + "vortex-array", + "vortex-buffer", + "vortex-dtype", + "vortex-error", + "vortex-file", + "vortex-io", + "vortex-layout", + "vortex-metrics", + "vortex-scalar", + "vortex-session", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "vortex-zigzag" version = "0.1.0" @@ -9355,7 +9868,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 7a676a4006e..343273ed51b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ members = [ "vortex-tui", "vortex-utils", "vortex-vector", + "vortex-wasm", "xtask", ] exclude = ["java/testfiles", "wasm-test"] @@ -125,6 +126,7 @@ glob = "0.3.2" goldenfile = "1" half = { version = "2.6", features = ["std", "num-traits"] } hashbrown = "0.16.0" +hex = "0.4" humansize = "2.1.3" indicatif = "0.18.0" insta = "1.43" @@ -154,6 +156,7 @@ parking_lot = { version = "0.12.3", features = ["nightly"] } parquet = "56" paste = "1.0.15" pco = "0.4.4" +phf = { version = "0.13", features = ["macros"] } pin-project-lite = "0.2.15" primitive-types = { version = "0.14.0" } prost = "0.14" @@ -246,6 +249,7 @@ vortex-sparse = { version = "0.1.0", path = "./encodings/sparse", default-featur vortex-tui = { version = "0.1.0", path = "./vortex-tui", default-features = false } vortex-utils = { version = "0.1.0", path = "./vortex-utils", default-features = false } vortex-vector = { version = "0.1.0", path = "./vortex-vector", default-features = false } +vortex-wasm = { version = "0.1.0", path = "./vortex-wasm", default-features = false } vortex-zigzag = { version = "0.1.0", path = "./encodings/zigzag", default-features = false } vortex-zstd = { version = "0.1.0", path = "./encodings/zstd", default-features = false } # END crates published by this project diff --git a/REUSE.toml b/REUSE.toml index 46787ae62e2..b8bd2c9948b 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -17,6 +17,11 @@ path = "benchmarks-website/**" SPDX-FileCopyrightText = "Copyright the Vortex contributors" SPDX-License-Identifier = "CC-BY-4.0" +[[annotations]] +path = "wasm-test/**" +SPDX-FileCopyrightText = "Copyright the Vortex contributors" +SPDX-License-Identifier = "CC-BY-4.0" + # Golden files are licensed under CC-BY-4.0. [[annotations]] path = "**/goldenfiles/**" diff --git a/bench-vortex/Cargo.toml b/bench-vortex/Cargo.toml index 4bab8b9c093..93ace837333 100644 --- a/bench-vortex/Cargo.toml +++ b/bench-vortex/Cargo.toml @@ -29,6 +29,8 @@ arrow-cast = { workspace = true } arrow-schema = { workspace = true } arrow-select = { workspace = true } async-trait = { workspace = true } +aws-config = "1" +aws-sdk-s3 = "1" bytes = { workspace = true } bzip2 = { workspace = true } clap = { workspace = true, features = ["derive"] } @@ -42,8 +44,10 @@ datafusion-common = { workspace = true } datafusion-physical-plan = { workspace = true } dirs = { workspace = true } erased-serde = { workspace = true } +flate2 = "1" futures = { workspace = true } glob = { workspace = true } +hex = { workspace = true } humansize = { workspace = true } indicatif = { workspace = true, features = ["futures"] } itertools = { workspace = true } @@ -58,6 +62,7 @@ opentelemetry_sdk = { workspace = true } parking_lot = { workspace = true } parquet = { workspace = true, features = ["async"] } paste = { workspace = true } +phf = { workspace = true } rand = { workspace = true } rayon = { workspace = true } regex = { workspace = true } diff --git a/bench-vortex/src/bin/s3_cat.rs b/bench-vortex/src/bin/s3_cat.rs new file mode 100644 index 00000000000..ea79aff44d0 --- /dev/null +++ b/bench-vortex/src/bin/s3_cat.rs @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Appends a local file to an S3 object using optimistic concurrency control via ETags. +//! +//! This binary is a Rust port of `scripts/cat-s3.sh` and handles concurrent appends to S3 objects +//! by using conditional requests with ETags. If the object has been modified by another process +//! between read and write, the operation is retried. + +use std::io::Read; +use std::io::Write; +use std::path::PathBuf; +use std::time::Duration; + +use anyhow::Context; +use anyhow::Result; +use anyhow::bail; +use aws_sdk_s3::Client; +use aws_sdk_s3::error::ProvideErrorMetadata; +use aws_sdk_s3::error::SdkError; +use aws_sdk_s3::primitives::ByteStream; +use clap::Parser; +use flate2::Compression; +use flate2::read::GzDecoder; +use flate2::write::GzEncoder; + +const MAX_RETRIES: u32 = 5; +const RETRY_DELAY: Duration = Duration::from_millis(100); + +#[derive(Parser, Debug)] +#[command( + name = "s3_cat", + about = "Append a local file to an S3 object with optimistic concurrency control" +)] +struct Args { + /// S3 bucket name. + bucket: String, + + /// S3 object key. + key: String, + + /// Path to the local file to append. + local_file: PathBuf, +} + +#[tokio::main] +async fn main() -> Result<()> { + let args = Args::parse(); + + let config = aws_config::load_defaults(aws_config::BehaviorVersion::latest()).await; + let client = Client::new(&config); + + let local_content = + std::fs::read(&args.local_file).context("Failed to read local file to append")?; + + let is_gzipped = args.key.ends_with(".gz"); + + for attempt in 0..MAX_RETRIES { + match try_append(&client, &args.bucket, &args.key, &local_content, is_gzipped).await { + Ok(()) => { + println!("File updated and uploaded successfully."); + return Ok(()); + } + Err(AppendError::EtagMismatch) => { + println!("ETag mismatch on attempt {}. Retrying...", attempt + 1); + tokio::time::sleep(RETRY_DELAY).await; + } + Err(AppendError::Other(e)) => { + return Err(e); + } + } + } + + bail!("Too many failures: {MAX_RETRIES}"); +} + +enum AppendError { + EtagMismatch, + Other(anyhow::Error), +} + +impl From for AppendError { + fn from(e: anyhow::Error) -> Self { + AppendError::Other(e) + } +} + +async fn try_append( + client: &Client, + bucket: &str, + key: &str, + local_content: &[u8], + is_gzipped: bool, +) -> Result<(), AppendError> { + // Get current ETag. + let head = client + .head_object() + .bucket(bucket) + .key(key) + .send() + .await + .context("Failed to get object metadata")?; + + let etag = head + .e_tag() + .context("No ETag returned from head_object")? + .to_string(); + + // Download with if-match. + let get_result = client + .get_object() + .bucket(bucket) + .key(key) + .if_match(&etag) + .send() + .await; + + let get_output = match get_result { + Ok(output) => output, + Err(SdkError::ServiceError(err)) if err.err().code() == Some("PreconditionFailed") => { + return Err(AppendError::EtagMismatch); + } + Err(e) => { + return Err(AppendError::Other( + anyhow::Error::new(e).context("Failed to download object"), + )); + } + }; + + let existing_bytes = get_output + .body + .collect() + .await + .context("Failed to read object body")? + .into_bytes(); + + // Concatenate contents. + let new_content = if is_gzipped { + // Decompress existing content. + let mut decoder = GzDecoder::new(&existing_bytes[..]); + let mut decompressed = Vec::new(); + decoder + .read_to_end(&mut decompressed) + .context("Failed to decompress existing content")?; + + // Append new content. + decompressed.extend_from_slice(local_content); + + // Recompress. + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + encoder + .write_all(&decompressed) + .context("Failed to compress concatenated content")?; + encoder.finish().context("Failed to finish compression")? + } else { + let mut combined = existing_bytes.to_vec(); + combined.extend_from_slice(local_content); + combined + }; + + // Upload with if-match. + let put_result = client + .put_object() + .bucket(bucket) + .key(key) + .if_match(&etag) + .body(ByteStream::from(new_content)) + .send() + .await; + + match put_result { + Ok(_) => Ok(()), + Err(SdkError::ServiceError(err)) if err.err().code() == Some("PreconditionFailed") => { + Err(AppendError::EtagMismatch) + } + Err(e) => Err(AppendError::Other( + anyhow::Error::new(e).context("Failed to upload object"), + )), + } +} diff --git a/justfile b/justfile new file mode 100644 index 00000000000..dd751b5254b --- /dev/null +++ b/justfile @@ -0,0 +1,21 @@ +data_input := "/Users/connor/spiral/hw25/output/transformed_benchmarks.json" +data_output := "/Users/connor/spiral/vortex-data/vortex/vortex-wasm/data.vortex" +commits_input := "/Users/connor/spiral/hw25/output/transformed_commits.json" +commits_output := "/Users/connor/spiral/vortex-data/vortex/vortex-wasm/commits.vortex" + +# Migrate benchmark data from JSON to Vortex format. +migrate-data: + cargo run -p vortex-wasm --bin migrate_data --release -- {{data_input}} {{data_output}} + +# Migrate commits from JSON to Vortex format. +migrate-commits: + cargo run -p vortex-wasm --bin migrate_commits --release -- {{commits_input}} {{commits_output}} + +# Run both migrations. +migrate-all: migrate-data migrate-commits + +browse file: + cargo run -p vortex-tui -- browse {{file}} + +build-wasm-website: + wasm-pack build vortex-wasm --target web --no-default-features diff --git a/plan.md b/plan.md new file mode 100644 index 00000000000..0d9f2784673 --- /dev/null +++ b/plan.md @@ -0,0 +1,106 @@ + + +# Hack Week + +## Goals + +_Approximately in order of priority:_ + +- Get faster load times of Vortex benchmarks on the web by using Vortex itself to store benchmarks measurements instead of JSON +- Make Vortex work reliably on the web via WASM +- Allow addition/removal of different benchmark measurements with schema evolution on Vortex +- Make the benchmarks website easier to read / more understandable +- Rewrite the entire benchmarks website to a WASM framework like Dioxus +- (Stretch) Make benchmarks website more dynamic +- (Stretch) Add Vortex demo in the browser +- (Stretch) Add Vortex vs. Parquet demo in the browser +- (Stretch) Add wasm-bindgen bindings for Vortex? + +## Plan of Attack + +- Design (at a high level) a better benchmarks website (figure out what components and pages it needs, plus general layout) +- Figure out the minimal API for the current benchmark website +- If the current JavaScript code conflicts than the new design, refactor the architecture of the website so that it is easy to switch out the implementations +- Determine the schema of each of the current benchmark (and the evolution of each over time) +- Figure out if the current schemas make sense or if they need to change +- Design extensible(?) Vortex schemas for benchmarking +- Migrate all existing data to Vortex files +- Design writer (append-only) interface for adding benchmark measurements that can evolve its schema +- Design reader interface for loading specific columns of Vortex from S3 and parsing data to a format easily read by JavaScript (should probably be streaming over chunks?) +- Implement the reader and writer interfaces with wasm-bindgen +- Migrate the JavaScript code to use the Rust bindings +- Test + +### Ideas + +```rust +/// The 20 byte SHA-1 Git commit ID. +pub struct CommitId([u8; 20]); + +/// String ID lookup so that we don't have to store the string every time. +pub struct NameId(u32); + +/// A benchmark entry, grouped by benchmark group, then chart name, then series name. +pub struct BenchmarkEntry { // `StructArray` + commit_id: CommitId, // fixed size list of `u8`? + benchmark_group: NameId, // `u16` array + chart_name: NameId, // `u16` array + series_name: NameId, // `u16` array + value: u64, // `u64` array +} + +fn main() { + println!("{}", size_of::()); // 64 + println!("{}", align_of::()); // 8 +} + +/// Maps [`CommitId`] to benchmark value. +pub type CommitValueMap<'a> = HashMap<&'a CommitId, u64, PassthroughBuildHasher>; + +/// Maps series name to commit values. +pub type SeriesMap<'a> = HashMap<&'a str, CommitValueMap<'a>>; + +/// Series in a chart mapped to their data. +pub type ChartMap<'a> = HashMap<&'a str, SeriesMap<'a>>; + +/// Chart names in a group mapped to their data. +pub type GroupedEntries<'a> = HashMap<&'a str, ChartMap<'a>>; +``` + +A benchmark group should be defined by 1 or more charts and 1 or more series (that always appear on +every chart in the group). + +### Findings + +- There is an insane amount of wasted space in `data.json`& +- The amount of actual benchmarking data is actually very small, and it can easily fit in memory of + the CI runners +- We can simply read the entire file of all benchmarking data into memory, decompress in memory, add + a new entry, compress, and then write back to S3 + +### 1 file vs many files + +With 1 file, we have to stuff every different kind of benchmark into the same place, which isnt great +for compression and it means we have to do more work on read time to group data correctly (by benchmark group, chart, then series). + +The seemingly obvious alternative here is to have a different file per "same" data. But what exactly would these be grouped by? We definitely do not want to group by series as that makes it pretty +difficult to add a new series to a chart (maybe it's not terrible with some more engineering). It +also would mean that we would start to approach 1000+ files. + +We could also do a file per chart, as that maps much closer to how we generate these chart. One +program is generating all the data for one chart, but that program might also generate data for +multiple charts. This is definitely something we should look into later, but for now having a single +file that has all the data (all with the same schema) is the most flexible. + +### Things to update + +Start with just the random access benchmark + +generate a bunch of fake data and upload it to S3 + +- Add bindings to read and write `BenchmarkEntry` vortex arrays to and from S3 +- `query_bench` to post directly to S3 +- `random_access` and `compress` to also post directly to S3 diff --git a/vortex-file/Cargo.toml b/vortex-file/Cargo.toml index 032789211db..aa28757d0c3 100644 --- a/vortex-file/Cargo.toml +++ b/vortex-file/Cargo.toml @@ -56,6 +56,11 @@ vortex-utils = { workspace = true, features = ["dashmap"] } vortex-zigzag = { workspace = true } vortex-zstd = { workspace = true, optional = true } +# async-fs uses std::time which is not available on WASM. It's only used in update.rs which is +# already feature-gated for non-WASM. +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +async-fs = { workspace = true } + [dev-dependencies] tokio = { workspace = true, features = ["full"] } vortex-array = { workspace = true, features = ["test-harness"] } diff --git a/vortex-file/src/lib.rs b/vortex-file/src/lib.rs index 8fd03b26470..a76ed2d69f8 100644 --- a/vortex-file/src/lib.rs +++ b/vortex-file/src/lib.rs @@ -99,6 +99,9 @@ pub mod segments; mod strategy; #[cfg(test)] mod tests; +// The update module uses blocking runtimes that are not available in WASM. +#[cfg(not(target_arch = "wasm32"))] +mod update; mod writer; pub use file::*; @@ -106,6 +109,8 @@ pub use footer::*; pub use forever_constant::*; pub use open::*; pub use strategy::*; +#[cfg(not(target_arch = "wasm32"))] +pub use update::*; use vortex_alp::ALPRDVTable; use vortex_alp::ALPVTable; use vortex_array::arrays::DictVTable; diff --git a/vortex-file/src/open.rs b/vortex-file/src/open.rs index 1ee6d08feb2..48164c40afe 100644 --- a/vortex-file/src/open.rs +++ b/vortex-file/src/open.rs @@ -12,6 +12,7 @@ use vortex_dtype::DType; use vortex_error::VortexError; use vortex_error::VortexExpect; use vortex_error::VortexResult; +#[cfg(not(target_arch = "wasm32"))] use vortex_io::InstrumentedReadAt; use vortex_io::VortexReadAt; use vortex_io::file::IntoReadSource; @@ -159,7 +160,12 @@ impl VortexOpenOptions { /// /// This is a low-level API and we strongly recommend using [`VortexOpenOptions::open`]. pub async fn open_read_at(self, read: R) -> VortexResult { - let read = Arc::new(InstrumentedReadAt::new(Arc::new(read), &self.metrics)); + // On WASM, skip instrumentation because it uses std::time which is not available. + #[cfg(target_arch = "wasm32")] + let read: Arc = Arc::new(read); + #[cfg(not(target_arch = "wasm32"))] + let read: Arc = + Arc::new(InstrumentedReadAt::new(Arc::new(read), &self.metrics)); let footer = if let Some(footer) = self.footer { footer diff --git a/vortex-file/src/update.rs b/vortex-file/src/update.rs new file mode 100644 index 00000000000..baa71fc2281 --- /dev/null +++ b/vortex-file/src/update.rs @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Update operations for Vortex files. +//! +//! This module provides functions to read a Vortex file, apply a transformation, and write the +//! result back to a file. The update operation uses atomic file replacement for safety. + +use std::future::Future; +use std::path::Path; + +use vortex_array::ArrayRef; +use vortex_array::expr::session::ExprSession; +use vortex_array::session::ArraySession; +use vortex_array::stream::ArrayStreamExt; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_io::runtime::BlockingRuntime; +use vortex_io::runtime::current::CurrentThreadRuntime; +use vortex_io::session::RuntimeSession; +use vortex_io::session::RuntimeSessionExt; +use vortex_layout::session::LayoutSession; +use vortex_metrics::VortexMetrics; +use vortex_session::VortexSession; + +use crate::OpenOptionsSessionExt; +use crate::WriteOptionsSessionExt; +use crate::WriteSummary; +use crate::register_default_encodings; + +/// Updates a Vortex file by reading it, applying a transformation, and writing the result. +/// +/// This is a blocking convenience wrapper around [`update_file_async`]. It creates a new session +/// with default encodings and a current-thread runtime. +/// +/// # Arguments +/// +/// * `input_path` - Path to the existing Vortex file to read. +/// * `output_path` - Path to write the updated Vortex file. Can be the same as input. +/// * `update_fn` - An async function that takes the file's array data and returns the updated +/// array. The returned array must have the same dtype as the input. +/// +/// # Returns +/// +/// A [`WriteSummary`] containing information about the written file. +/// +/// # Errors +/// +/// Returns an error if: +/// - The input file cannot be read. +/// - The update function returns an error. +/// - The update function returns an array with a different dtype. +/// - The output file cannot be written. +/// +/// # Atomic Write Guarantee +/// +/// The write operation uses a temporary file and atomic rename to ensure that the output file is +/// never left in a corrupted state, even if the process crashes during the write. +pub fn update_file( + input_path: impl AsRef, + output_path: impl AsRef, + update_fn: F, +) -> VortexResult +where + F: FnOnce(ArrayRef) -> Fut, + Fut: Future>, +{ + let runtime = CurrentThreadRuntime::new(); + + let session = VortexSession::empty() + .with::() + .with::() + .with::() + .with::() + .with::() + .with_handle(runtime.handle()); + + register_default_encodings(&session); + + runtime.block_on(update_file_async( + &session, + input_path.as_ref(), + output_path.as_ref(), + update_fn, + )) +} + +/// Updates a Vortex file asynchronously by reading it, applying a transformation, and writing the +/// result. +/// +/// This function: +/// 1. Reads the existing Vortex file into memory. +/// 2. Calls the update function with the array data. +/// 3. Validates the returned array has the same dtype. +/// 4. Writes the updated data to a temporary file. +/// 5. Atomically renames the temporary file to the output path. +/// +/// # Arguments +/// +/// * `session` - The Vortex session to use for reading and writing. +/// * `input_path` - Path to the existing Vortex file to read. +/// * `output_path` - Path to write the updated Vortex file. Can be the same as input. +/// * `update_fn` - An async function that takes the file's array data and returns the updated +/// array. The returned array must have the same dtype as the input. +/// +/// # Returns +/// +/// A [`WriteSummary`] containing information about the written file. +/// +/// # Errors +/// +/// Returns an error if: +/// - The input file cannot be read. +/// - The update function returns an error. +/// - The update function returns an array with a different dtype. +/// - The output file cannot be written. +pub async fn update_file_async( + session: &VortexSession, + input_path: impl AsRef, + output_path: impl AsRef, + update_fn: F, +) -> VortexResult +where + F: FnOnce(ArrayRef) -> Fut, + Fut: Future>, +{ + let input_path = input_path.as_ref(); + let output_path = output_path.as_ref(); + + // Read the existing file. + let file = session.open_options().open(input_path).await?; + let original_dtype = file.dtype().clone(); + + // Read all existing data into memory. + let existing_array = file.scan()?.into_array_stream()?.read_all().await?; + + // Apply the user's update function. + let updated_array = update_fn(existing_array).await?; + + // Validate that the dtype matches. + if updated_array.dtype() != &original_dtype { + vortex_bail!( + "Update function changed dtype from {} to {}. \ + The updated array must have the same dtype as the input file.", + original_dtype, + updated_array.dtype() + ); + } + + // Generate a temporary file path in the same directory as output. + // This ensures the rename will be atomic (same filesystem). + let temp_path = generate_temp_path(output_path); + + // Write to the temporary file. + let temp_file = async_fs::File::create(&temp_path).await?; + let mut writer = session.write_options().writer(temp_file, original_dtype); + writer.push(updated_array).await?; + let summary = writer.finish().await?; + + // Atomically rename the temp file to the output path. + async_fs::rename(&temp_path, output_path).await?; + + Ok(summary) +} + +/// Generates a temporary file path in the same directory as the target path. +fn generate_temp_path(target: &Path) -> std::path::PathBuf { + let parent = target.parent().unwrap_or_else(|| Path::new(".")); + let file_name = target + .file_name() + .map(|s| s.to_string_lossy()) + .unwrap_or_else(|| "file".into()); + + let temp_name = format!(".{}.{}.tmp", file_name, uuid::Uuid::new_v4()); + parent.join(temp_name) +} diff --git a/vortex-io/Cargo.toml b/vortex-io/Cargo.toml index eae3e778009..90d329d6530 100644 --- a/vortex-io/Cargo.toml +++ b/vortex-io/Cargo.toml @@ -17,8 +17,6 @@ version = { workspace = true } all-features = true [dependencies] -async-compat = { workspace = true } -async-fs = { workspace = true } async-stream = { workspace = true } async-trait = { workspace = true } bytes = { workspace = true } @@ -26,15 +24,13 @@ cfg-if = { workspace = true } futures = { workspace = true, features = ["std", "executor"] } # Needed to pickup the "wasm_js" feature for wasm targets from the workspace configuration getrandom_v03 = { workspace = true } +handle = "1.0.2" kanal = { workspace = true } log = { workspace = true } object_store = { workspace = true, optional = true, features = ["fs"] } oneshot = { workspace = true } parking_lot = { workspace = true } pin-project-lite = { workspace = true } -# this is the maximum subset of fetaures that is safe for wasm32 targets -handle = "1.0.2" -tokio = { workspace = true, features = ["io-util", "rt", "sync"] } tracing = { workspace = true } vortex-buffer = { workspace = true } vortex-error = { workspace = true } @@ -42,8 +38,11 @@ vortex-metrics = { workspace = true } vortex-session = { workspace = true } [target.'cfg(not(target_arch = "wasm32"))'.dependencies] -# Smol is our default impl, so we don't want it to be optional, but it cannot be part of wasm +# These deps use std::time which is not available in WASM. +async-compat = { workspace = true } +async-fs = { workspace = true } smol = { workspace = true } +tokio = { workspace = true, features = ["io-util", "rt", "sync"] } [target.'cfg(target_arch = "wasm32")'.dependencies] wasm-bindgen-futures = { workspace = true } diff --git a/vortex-io/src/lib.rs b/vortex-io/src/lib.rs index 6a08c821c8f..de1f9f4882b 100644 --- a/vortex-io/src/lib.rs +++ b/vortex-io/src/lib.rs @@ -11,6 +11,7 @@ //! flags implements the core traits for several common async runtimes and backing stores. pub use io_buf::*; +#[cfg(not(target_arch = "wasm32"))] pub use limit::*; #[cfg(feature = "object_store")] pub use object_store::*; @@ -20,6 +21,7 @@ pub use write::*; pub mod file; mod io_buf; pub mod kanal_ext; +#[cfg(not(target_arch = "wasm32"))] mod limit; #[cfg(feature = "object_store")] mod object_store; diff --git a/vortex-io/src/write.rs b/vortex-io/src/write.rs index 57048ce66ef..ce61aadcbf4 100644 --- a/vortex-io/src/write.rs +++ b/vortex-io/src/write.rs @@ -95,6 +95,7 @@ impl VortexWrite for &mut W { } } +#[cfg(not(target_arch = "wasm32"))] impl VortexWrite for async_fs::File { async fn write_all(&mut self, buffer: B) -> io::Result { AsyncWriteExt::write_all(self, buffer.as_slice()).await?; diff --git a/vortex-layout/Cargo.toml b/vortex-layout/Cargo.toml index 4e062f66ba3..4b2f106fe75 100644 --- a/vortex-layout/Cargo.toml +++ b/vortex-layout/Cargo.toml @@ -26,7 +26,6 @@ futures = { workspace = true, features = ["alloc", "async-await", "executor"] } itertools = { workspace = true } kanal = { workspace = true } log = { workspace = true } -moka = { workspace = true, features = ["future"] } once_cell = { workspace = true, features = ["parking_lot"] } oneshot = { workspace = true } parking_lot = { workspace = true } @@ -34,7 +33,6 @@ paste = { workspace = true } pco = { workspace = true } pin-project-lite = { workspace = true } prost = { workspace = true } -rustc-hash = { workspace = true } termtree = { workspace = true } tokio = { workspace = true, features = ["rt"], optional = true } uuid = { workspace = true } @@ -55,6 +53,11 @@ vortex-session = { workspace = true } vortex-utils = { workspace = true, features = ["dashmap"] } vortex-zstd = { workspace = true, optional = true } +# Moka uses std::time::Instant which is not available on WASM. +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +moka = { workspace = true, features = ["future"] } +rustc-hash = { workspace = true } + [dev-dependencies] futures = { workspace = true, features = ["executor"] } rstest = { workspace = true } diff --git a/vortex-layout/src/segments/cache.rs b/vortex-layout/src/segments/cache.rs index ce023b50f98..fdea8d3b9a3 100644 --- a/vortex-layout/src/segments/cache.rs +++ b/vortex-layout/src/segments/cache.rs @@ -5,12 +5,17 @@ use std::sync::Arc; use async_trait::async_trait; use futures::FutureExt; +#[cfg(not(target_arch = "wasm32"))] use moka::future::Cache; +#[cfg(not(target_arch = "wasm32"))] use moka::future::CacheBuilder; +#[cfg(not(target_arch = "wasm32"))] use moka::policy::EvictionPolicy; +#[cfg(not(target_arch = "wasm32"))] use rustc_hash::FxBuildHasher; use vortex_buffer::BufferHandle; use vortex_buffer::ByteBuffer; +#[cfg(not(target_arch = "wasm32"))] use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_metrics::Counter; @@ -41,8 +46,13 @@ impl SegmentCache for NoOpSegmentCache { } /// A [`SegmentCache`] based around an in-memory Moka cache. +/// +/// This cache is not available on WASM targets because moka uses `std::time::Instant` which is not +/// supported. Use [`NoOpSegmentCache`] for WASM targets instead. +#[cfg(not(target_arch = "wasm32"))] pub struct MokaSegmentCache(Cache); +#[cfg(not(target_arch = "wasm32"))] impl MokaSegmentCache { pub fn new(max_capacity_bytes: u64) -> Self { Self( @@ -61,6 +71,7 @@ impl MokaSegmentCache { } } +#[cfg(not(target_arch = "wasm32"))] #[async_trait] impl SegmentCache for MokaSegmentCache { async fn get(&self, id: SegmentId) -> VortexResult> { diff --git a/vortex-wasm/Cargo.toml b/vortex-wasm/Cargo.toml new file mode 100644 index 00000000000..f077c2b4290 --- /dev/null +++ b/vortex-wasm/Cargo.toml @@ -0,0 +1,87 @@ +[package] +name = "vortex-wasm" +description = "WASM bindings for Vortex" +authors.workspace = true +categories.workspace = true +edition.workspace = true +homepage.workspace = true +include.workspace = true +keywords.workspace = true +license.workspace = true +readme.workspace = true +repository.workspace = true +rust-version.workspace = true +version.workspace = true + +[lib] +crate-type = ["cdylib", "rlib"] + +[features] +default = ["native"] +# Native feature for binaries: includes tokio runtime and file system dependencies. +native = [ + "dep:async-fs", + "dep:tempfile", + "dep:tokio", + "dep:tracing", + "vortex/tokio", +] + +[dependencies] +# Core dependencies for both WASM and native. +console_error_panic_hook = "0.1" +futures = { workspace = true } +hex = { workspace = true } +reqwest = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +serde-wasm-bindgen = "0.6" +vortex = { workspace = true } +wasm-bindgen = "0.2" +wasm-bindgen-futures = { workspace = true } +hashbrown = { workspace = true, features = ["serde"] } + +# These are needed by binaries - accessed via vortex re-exports for library code. +vortex-array = { workspace = true } +vortex-buffer = { workspace = true } +vortex-dtype = { workspace = true } +vortex-error = { workspace = true } +vortex-file = { workspace = true } +vortex-io = { workspace = true } +vortex-layout = { workspace = true } +vortex-metrics = { workspace = true } +vortex-scalar = { workspace = true } +vortex-session = { workspace = true } + +# Native-only dependencies (for binaries). +async-fs = { workspace = true, optional = true } +tempfile = { workspace = true, optional = true } +tokio = { workspace = true, features = ["full"], optional = true } +tracing = { workspace = true, optional = true } + +[dependencies.web-sys] +features = ["console"] +version = "0.3" + +[[bin]] +name = "test_s3_read" +path = "src/bin/test_s3_read.rs" +required-features = ["native"] + +[[bin]] +name = "test_s3_update" +path = "src/bin/test_s3_update.rs" +required-features = ["native"] + +[[bin]] +name = "migrate_data" +path = "src/bin/migrate_data.rs" +required-features = ["native"] + +[[bin]] +name = "migrate_commits" +path = "src/bin/migrate_commits.rs" +required-features = ["native"] + +[lints] +workspace = true diff --git a/vortex-wasm/src/bin/migrate_commits.rs b/vortex-wasm/src/bin/migrate_commits.rs new file mode 100644 index 00000000000..b6c4919e738 --- /dev/null +++ b/vortex-wasm/src/bin/migrate_commits.rs @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Binary to migrate a JSON array of [`CommitInfo`] objects to a Vortex file. +//! +//! # Usage +//! +//! ```bash +//! cargo run -p vortex-wasm --bin migrate_commits -- +//! ``` + +// TODO(connor): We don't use the `TemporalArray` right now because it doesn't have easy interop yet +// for the chrono `DateTime` type, and bringing in arrow for just this is too heavyweight. + +#![allow(clippy::expect_used, clippy::panic)] + +use std::env; +use std::fs; + +use vortex::VortexSessionDefault; +use vortex::array::IntoArray; +use vortex::array::arrays::FixedSizeListArray; +use vortex::array::arrays::PrimitiveArray; +use vortex::array::arrays::StructArray; +use vortex::array::arrays::VarBinArray; +use vortex::array::validity::Validity; +use vortex::buffer::Buffer; +use vortex::compressor::CompactCompressor; +use vortex::dtype::DType; +use vortex::dtype::FieldNames; +use vortex::dtype::Nullability; +use vortex::file::WriteOptionsSessionExt; +use vortex::file::WriteStrategyBuilder; +use vortex::session::VortexSession; +use vortex_wasm::website::commit_info::CommitInfo; + +#[tokio::main] +async fn main() { + let session = VortexSession::default(); + + let args: Vec = env::args().collect(); + let input_path = args + .get(1) + .expect("Usage: migrate_commits "); + let output_path = args + .get(2) + .map(String::as_str) + .expect("Usage: migrate_commits "); + + let contents = fs::read_to_string(input_path).expect("Failed to read file"); + let commits: Vec = serde_json::from_str(&contents).expect("Failed to parse JSON"); + + let num_commits = commits.len(); + println!("Parsed {num_commits} commits from JSON"); + + // Extract fields into columnar vectors. + let mut timestamps: Vec = Vec::with_capacity(num_commits); + let mut author_names: Vec<&str> = Vec::with_capacity(num_commits); + let mut author_emails: Vec<&str> = Vec::with_capacity(num_commits); + let mut messages: Vec<&str> = Vec::with_capacity(num_commits); + let mut commit_id_bytes: Vec = Vec::with_capacity(num_commits * 20); + + for commit in &commits { + timestamps.push(commit.timestamp()); + author_names.push(commit.author().name()); + author_emails.push(commit.author().email()); + messages.push(commit.message()); + commit_id_bytes.extend_from_slice(&commit.commit_id().0); + } + + // Build Vortex arrays. + + // Timestamp array. + let timestamp_array = PrimitiveArray::new(Buffer::from(timestamps), Validity::NonNullable); + + // Author struct array (nested). + let author_name_array = VarBinArray::from_iter( + author_names.iter().map(|s| Some(*s)), + DType::Utf8(Nullability::NonNullable), + ); + let author_email_array = VarBinArray::from_iter( + author_emails.iter().map(|s| Some(*s)), + DType::Utf8(Nullability::NonNullable), + ); + let author_array = StructArray::try_new( + FieldNames::from(["name", "email"]), + vec![ + author_name_array.into_array(), + author_email_array.into_array(), + ], + num_commits, + Validity::NonNullable, + ) + .expect("Failed to create author struct array"); + + // Message array. + let message_array = VarBinArray::from_iter( + messages.iter().map(|s| Some(*s)), + DType::Utf8(Nullability::NonNullable), + ); + + // Commit ID array (FixedSizeList). + let commit_id_elements = + PrimitiveArray::new(Buffer::from(commit_id_bytes), Validity::NonNullable); + let commit_id_array = FixedSizeListArray::try_new( + commit_id_elements.into_array(), + 20, + Validity::NonNullable, + num_commits, + ) + .expect("Failed to create commit_id array"); + + // Outer struct array with field order: timestamp, author, message, commit_id. + let struct_array = StructArray::try_new( + FieldNames::from(["timestamp", "author", "message", "commit_id"]), + vec![ + timestamp_array.into_array(), + author_array.into_array(), + message_array.into_array(), + commit_id_array.into_array(), + ], + num_commits, + Validity::NonNullable, + ) + .expect("Failed to create struct array"); + + println!("Schema: {}", struct_array.dtype()); + + // Write to Vortex file with compression. + let file = tokio::fs::File::create(output_path) + .await + .expect("Failed to create output file"); + + session + .write_options() + .with_strategy( + WriteStrategyBuilder::new() + .with_compressor(CompactCompressor::default()) + .build(), + ) + .write(file, struct_array.to_array_stream()) + .await + .expect("Failed to write Vortex file"); + + println!("Wrote {num_commits} commits to {output_path}"); +} diff --git a/vortex-wasm/src/bin/migrate_data.rs b/vortex-wasm/src/bin/migrate_data.rs new file mode 100644 index 00000000000..1eb5100183a --- /dev/null +++ b/vortex-wasm/src/bin/migrate_data.rs @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Binary to migrate a JSON array of [`BenchmarkEntry`] objects to a Vortex file. +//! +//! # Usage +//! +//! ```bash +//! cargo run -p vortex-wasm --bin migrate_data -- +//! ``` + +#![allow(clippy::expect_used, clippy::panic)] + +use std::env; +use std::fs; + +use vortex::VortexSessionDefault; +use vortex::array::IntoArray; +use vortex::array::arrays::FixedSizeListArray; +use vortex::array::arrays::PrimitiveArray; +use vortex::array::arrays::StructArray; +use vortex::array::arrays::VarBinArray; +use vortex::array::validity::Validity; +use vortex::buffer::Buffer; +use vortex::compressor::CompactCompressor; +use vortex::dtype::DType; +use vortex::dtype::FieldNames; +use vortex::dtype::Nullability; +use vortex::file::WriteOptionsSessionExt; +use vortex::file::WriteStrategyBuilder; +use vortex::session::VortexSession; +use vortex_wasm::website::entry::BenchmarkEntry; + +#[tokio::main] +async fn main() { + let session = VortexSession::default(); + + let args: Vec = env::args().collect(); + let input_path = args + .get(1) + .expect("Usage: migrate_all "); + let output_path = args + .get(2) + .map(String::as_str) + .expect("Usage: migrate_all "); + + let contents = fs::read_to_string(input_path).expect("Failed to read file"); + let entries: Vec = + serde_json::from_str(&contents).expect("Failed to parse JSON"); + + let num_entries = entries.len(); + println!("Parsed {num_entries} entries from JSON"); + + // Extract fields into columnar vectors. + let mut commit_id_bytes: Vec = Vec::with_capacity(num_entries * 20); + let mut group_names: Vec<&str> = Vec::with_capacity(num_entries); + let mut chart_names: Vec<&str> = Vec::with_capacity(num_entries); + let mut series_names: Vec<&str> = Vec::with_capacity(num_entries); + let mut values: Vec = Vec::with_capacity(num_entries); + + for entry in &entries { + commit_id_bytes.extend_from_slice(&entry.commit_id.0); + group_names.push(&entry.group_name); + chart_names.push(&entry.chart_name); + series_names.push(&entry.series_name); + values.push(entry.value); + } + + // Build Vortex arrays. + let commit_id_elements = + PrimitiveArray::new(Buffer::from(commit_id_bytes), Validity::NonNullable); + let commit_id_array = FixedSizeListArray::try_new( + commit_id_elements.into_array(), + 20, + Validity::NonNullable, + num_entries, + ) + .expect("Failed to create commit_id array"); + + let group_name_array = VarBinArray::from_iter( + group_names.iter().map(|s| Some(*s)), + DType::Utf8(Nullability::NonNullable), + ); + let chart_name_array = VarBinArray::from_iter( + chart_names.iter().map(|s| Some(*s)), + DType::Utf8(Nullability::NonNullable), + ); + let series_name_array = VarBinArray::from_iter( + series_names.iter().map(|s| Some(*s)), + DType::Utf8(Nullability::NonNullable), + ); + let value_array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable); + + let struct_array = StructArray::try_new( + FieldNames::from([ + "commit_id", + "group_name", + "chart_name", + "series_name", + "value", + ]), + vec![ + commit_id_array.into_array(), + group_name_array.into_array(), + chart_name_array.into_array(), + series_name_array.into_array(), + value_array.into_array(), + ], + num_entries, + Validity::NonNullable, + ) + .expect("Failed to create struct array"); + + println!("Schema: {}", struct_array.dtype()); + + // Write to Vortex file with compression. + let file = tokio::fs::File::create(output_path) + .await + .expect("Failed to create output file"); + + session + .write_options() + .with_strategy( + WriteStrategyBuilder::new() + .with_compressor(CompactCompressor::default()) + .build(), + ) + .write(file, struct_array.to_array_stream()) + .await + .expect("Failed to write Vortex file"); + + println!("Wrote {num_entries} entries to {output_path}"); +} diff --git a/vortex-wasm/src/lib.rs b/vortex-wasm/src/lib.rs new file mode 100644 index 00000000000..29fb32b7bc2 --- /dev/null +++ b/vortex-wasm/src/lib.rs @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! WASM bindings for the Vortex benchmark website. +//! +//! This crate provides functions for fetching and processing benchmark data from S3, returning it +//! in a format ready for JavaScript to render. + +pub mod website; + +#[cfg(target_arch = "wasm32")] +mod wasm_init { + use wasm_bindgen::prelude::*; + + /// Initialize the WASM module. + #[wasm_bindgen(start)] + pub fn init() { + console_error_panic_hook::set_once(); + } + + /// Get version information. + #[wasm_bindgen] + pub fn get_version() -> String { + format!("vortex-wasm v{}", env!("CARGO_PKG_VERSION")) + } +} diff --git a/vortex-wasm/src/website/charts.rs b/vortex-wasm/src/website/charts.rs new file mode 100644 index 00000000000..9ae5d302e17 --- /dev/null +++ b/vortex-wasm/src/website/charts.rs @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::num::NonZeroU64; + +use serde::Serialize; +use vortex::utils::aliases::hash_map::HashMap; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; + +use crate::website::commit_info::CommitInfo; +use crate::website::entry::BenchmarkEntry; +use crate::website::entry::CommitValueMap; + +/// The complete response containing benchmarks and commit metadata. +#[derive(Debug, Clone, Serialize)] +pub struct BenchmarkResponse { + /// Benchmarks grouped by group name, chart name, and series name. + pub benchmarks: Benchmarks, + /// Sorted list of commits (by timestamp). + pub commits: Vec, +} + +/// A map of group names to their benchmark data. +pub type Benchmarks = HashMap; + +/// Benchmark group data. +#[derive(Debug, Clone, Serialize)] +pub struct BenchmarkGroupData { + /// The name of a chart and its associated data. + pub charts: HashMap, +} + +/// Chart data. +#[derive(Debug, Clone, Serialize)] +pub struct ChartData { + /// The name of a series and its associated data. + pub aligned_series: HashMap>>, +} + +// ============================================================================ +// Summary data structures (for fast initial load) +// ============================================================================ + +/// Summary of all benchmarks (metadata only, no series values). +#[derive(Debug, Clone, Serialize)] +pub struct BenchmarkSummary { + /// Sorted list of commits. + pub commits: Vec, + /// Groups with their chart and series metadata. + pub groups: HashMap, +} + +/// Summary of a benchmark group (metadata only). +#[derive(Debug, Clone, Serialize)] +pub struct GroupSummary { + /// Charts in this group with their series names. + pub charts: HashMap, +} + +/// Summary of a chart (metadata only). +#[derive(Debug, Clone, Serialize)] +pub struct ChartSummary { + /// Names of series in this chart. + pub series_names: Vec, +} + +/// Processes benchmark entries into a structured format aligned with commits. +/// +/// Series, charts, and groups with no data are automatically pruned from the result. +/// +/// # Errors +/// +/// Returns an error if: +/// - Commits are not sorted by timestamp. +/// - Any group, chart, or series name is empty. +/// - Series lengths don't match the number of commits (internal error). +pub fn process_benchmarks( + entries: &[BenchmarkEntry], + sorted_commits: &[CommitInfo], +) -> VortexResult { + if !sorted_commits.is_sorted() { + vortex_bail!("Commits must be sorted by timestamp"); + } + + let num_commits = sorted_commits.len(); + let grouped_entries = BenchmarkEntry::group(entries); + + let mut benchmarks = HashMap::with_capacity(grouped_entries.keys().len()); + for (group_name, group_data) in grouped_entries { + if group_name.is_empty() { + vortex_bail!("Group name cannot be empty"); + } + + let mut charts = HashMap::with_capacity(group_data.keys().len()); + for (chart_name, chart_data) in group_data { + if chart_name.is_empty() { + vortex_bail!("Chart name cannot be empty in group '{group_name}'"); + } + + let mut aligned_series = HashMap::with_capacity(chart_data.keys().len()); + for (series_name, series_data) in chart_data { + if series_name.is_empty() { + vortex_bail!( + "Series name cannot be empty in group '{group_name}', chart '{chart_name}'", + ); + } + + let aligned_series_data = create_aligned_series_data(series_data, sorted_commits); + + if aligned_series_data.len() != num_commits { + vortex_bail!( + "Series '{series_name}' in group '{group_name}', chart '{chart_name}' has \ + {} elements, expected {num_commits}", + aligned_series_data.len(), + ); + } + + // Skip series with no data points. + if !aligned_series_data.iter().any(|v| v.is_some()) { + continue; + } + + // Convert to owned String key. + aligned_series.insert(series_name.to_string(), aligned_series_data); + } + + if aligned_series.is_empty() { + continue; + } + + charts.insert(chart_name.to_string(), ChartData { aligned_series }); + } + + if charts.is_empty() { + continue; + } + + benchmarks.insert(group_name.to_string(), BenchmarkGroupData { charts }); + } + + Ok(benchmarks) +} + +fn create_aligned_series_data( + commits_and_values: CommitValueMap<'_>, + sorted_commits: &[CommitInfo], +) -> Vec> { + sorted_commits + .iter() + .map(|commit_info| { + commits_and_values + .get(commit_info.commit_id()) + .map(|&value| { + NonZeroU64::new(value).unwrap_or_else(|| { + eprintln!("Warning: benchmark value of 0 encountered, converting to 1"); + NonZeroU64::MIN + }) + }) + }) + .collect() +} + +/// Extracts summary metadata from benchmarks. +pub fn extract_summary(benchmarks: &Benchmarks, commits: Vec) -> BenchmarkSummary { + let groups = benchmarks + .iter() + .map(|(group_name, group_data)| { + let charts = group_data + .charts + .iter() + .map(|(chart_name, chart_data)| { + let series_names: Vec = + chart_data.aligned_series.keys().cloned().collect(); + (chart_name.clone(), ChartSummary { series_names }) + }) + .collect(); + (group_name.clone(), GroupSummary { charts }) + }) + .collect(); + + BenchmarkSummary { commits, groups } +} diff --git a/vortex-wasm/src/website/commit_id.rs b/vortex-wasm/src/website/commit_id.rs new file mode 100644 index 00000000000..1ee36c12420 --- /dev/null +++ b/vortex-wasm/src/website/commit_id.rs @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Git commit ID type with passthrough hashing. + +use std::fmt; +use std::hash::BuildHasher; +use std::hash::Hash; +use std::hash::Hasher; + +use serde::Deserialize; +use serde::Serialize; + +/// The 20-byte binary SHA-1 Git commit ID. +/// +/// Note that the ordering of commit IDs does not really mean anything, we just have it implemented +/// for convenience. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct CommitId(pub [u8; 20]); + +impl Serialize for CommitId { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.serialize_str(&hex::encode(self.0)) + } +} + +impl<'de> Deserialize<'de> for CommitId { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + struct CommitIdVisitor; + + impl<'de> serde::de::Visitor<'de> for CommitIdVisitor { + type Value = CommitId; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a 40-character hexadecimal string") + } + + fn visit_str(self, value: &str) -> Result + where + E: serde::de::Error, + { + if value.len() != 40 { + return Err(E::custom(format!( + "expected 40 hex characters, got {}", + value.len() + ))); + } + + let bytes = hex::decode(value) + .map_err(|e| E::custom(format!("invalid hexadecimal: {}", e)))?; + + let mut arr = [0u8; 20]; + arr.copy_from_slice(&bytes); + Ok(CommitId(arr)) + } + } + + deserializer.deserialize_str(CommitIdVisitor) + } +} + +impl Hash for CommitId { + fn hash(&self, state: &mut H) { + state.write(&self.0); + } +} + +impl fmt::Display for CommitId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0)) + } +} + +impl fmt::Debug for CommitId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "CommitId(\"{}\")", hex::encode(self.0)) + } +} + +/// A hasher that passes through bytes directly without additional hashing. +/// +/// This is useful for types like [`CommitId`] that are already cryptographic hashes. +#[derive(Default)] +pub struct PassthroughHasher(u64); + +impl Hasher for PassthroughHasher { + fn finish(&self) -> u64 { + self.0 + } + + fn write(&mut self, bytes: &[u8]) { + // Use the first 8 bytes (or fewer) as the hash value. + let len = bytes.len().min(8); + let mut buf = [0u8; 8]; + buf[..len].copy_from_slice(&bytes[..len]); + self.0 = u64::from_le_bytes(buf); + } +} + +/// A [`BuildHasher`] that creates [`PassthroughHasher`] instances. +#[derive(Default, Clone)] +pub struct PassthroughBuildHasher; + +impl BuildHasher for PassthroughBuildHasher { + type Hasher = PassthroughHasher; + + fn build_hasher(&self) -> Self::Hasher { + PassthroughHasher::default() + } +} diff --git a/vortex-wasm/src/website/commit_info.rs b/vortex-wasm/src/website/commit_info.rs new file mode 100644 index 00000000000..046f8e1b622 --- /dev/null +++ b/vortex-wasm/src/website/commit_info.rs @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +// TODO(connor): We don't use the `TemporalArray` right now because it doesn't have easy interop yet +// for the chrono `DateTime` type, and bringing in arrow for just this is too heavyweight. + +use std::sync::Arc; + +use serde::Deserialize; +use serde::Serialize; +use vortex::dtype::DType; +use vortex::dtype::FieldNames; +use vortex::dtype::Nullability::NonNullable; +use vortex::dtype::PType; +use vortex::dtype::StructFields; +use vortex::scalar::Scalar; +use vortex_array::Array; +use vortex_array::ToCanonical; +use vortex_array::arrays::FixedSizeListArray; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::arrays::StructArray; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; + +use crate::website::commit_id::CommitId; + +/// Commit information including author, message, timestamp, and commit ID. +/// +/// The field order determines the derived [`Ord`] implementation: timestamp first, then author, +/// message, and finally commit_id. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] +pub struct CommitInfo { + /// Unix timestamp in seconds. + timestamp: i64, + author: Author, + message: String, + commit_id: CommitId, +} + +impl CommitInfo { + /// Creates a new [`CommitInfo`]. + pub fn new(timestamp: i64, author: Author, message: String, commit_id: CommitId) -> Self { + Self { + timestamp, + author, + message, + commit_id, + } + } + + /// Returns the commit timestamp as a Unix timestamp in seconds. + pub fn timestamp(&self) -> i64 { + self.timestamp + } + + /// Returns the commit author. + pub fn author(&self) -> &Author { + &self.author + } + + /// Returns the commit message. + pub fn message(&self) -> &str { + &self.message + } + + /// Returns the commit ID. + pub fn commit_id(&self) -> &CommitId { + &self.commit_id + } + + /// Returns the [`DType`] for a [`CommitInfo`]. + /// + /// The schema is: + /// - `timestamp`: `i64` (Unix timestamp in seconds) + /// - `author`: Struct (name: Utf8, email: Utf8) + /// - `message`: `Utf8` + /// - `commit_id`: `FixedSizeList` (20-byte binary SHA-1) + pub fn dtype() -> DType { + DType::Struct( + StructFields::new( + FieldNames::from(["timestamp", "author", "message", "commit_id"]), + vec![ + DType::Primitive(PType::I64, NonNullable), + Author::dtype(), + DType::Utf8(NonNullable), + DType::FixedSizeList( + Arc::new(DType::Primitive(PType::U8, NonNullable)), + 20, + NonNullable, + ), + ], + ), + NonNullable, + ) + } + + /// Converts a [`CommitInfo`] to a [`Scalar`]. + pub fn into_scalar(&self) -> Scalar { + let u8_dtype = DType::Primitive(PType::U8, NonNullable); + + // Convert the 20-byte commit_id to a FixedSizeList scalar. + let commit_id_bytes: Vec = self + .commit_id + .0 + .iter() + .map(|&b| Scalar::primitive(b, NonNullable)) + .collect(); + let commit_id_scalar = Scalar::fixed_size_list(u8_dtype, commit_id_bytes, NonNullable); + + Scalar::struct_( + Self::dtype(), + vec![ + Scalar::primitive(self.timestamp, NonNullable), + self.author.into_scalar(), + Scalar::utf8(self.message.as_str(), NonNullable), + commit_id_scalar, + ], + ) + } + + /// Converts a Vortex array (expected to be a struct array) into a vector of [`CommitInfo`]. + /// + /// The array must have the following schema: + /// - `timestamp`: `i64` + /// - `author`: Struct (name: Utf8, email: Utf8) + /// - `message`: `Utf8` + /// - `commit_id`: `FixedSizeList` + pub fn vec_from_array(array: &dyn Array) -> VortexResult> { + let struct_array: StructArray = array.to_struct(); + let len = struct_array.len(); + let mut entries = Vec::with_capacity(len); + + // Extract each field. + let timestamp_field = struct_array.field_by_name("timestamp")?; + let author_field = struct_array.field_by_name("author")?; + let message_field = struct_array.field_by_name("message")?; + let commit_id_field = struct_array.field_by_name("commit_id")?; + + // Convert timestamp to primitive array. + let timestamp_prim: PrimitiveArray = timestamp_field.to_primitive(); + let timestamps: &[i64] = timestamp_prim.as_slice(); + + // Convert author struct to its components. + let author_struct: StructArray = author_field.to_struct(); + let author_name_field = author_struct.field_by_name("name")?; + let author_email_field = author_struct.field_by_name("email")?; + let author_name_vbv = author_name_field.to_varbinview(); + let author_email_vbv = author_email_field.to_varbinview(); + + // Convert message to varbinview. + let message_vbv = message_field.to_varbinview(); + + // Convert commit_id to canonical fixed-size list and get the underlying bytes. + let commit_id_fsl: FixedSizeListArray = commit_id_field.to_fixed_size_list(); + if commit_id_fsl.list_size() != 20 { + vortex_bail!( + "Expected commit_id to have list_size 20, got {}", + commit_id_fsl.list_size() + ); + } + let commit_id_elements: PrimitiveArray = commit_id_fsl.elements().to_primitive(); + let commit_id_bytes: &[u8] = commit_id_elements.as_slice(); + + // Build the entries. + for i in 0..len { + // Extract author fields. + let name = std::str::from_utf8(author_name_vbv.bytes_at(i).as_ref()) + .map_err(|e| vortex_error::vortex_err!("Invalid UTF-8 in author name: {}", e))? + .to_string(); + let email = std::str::from_utf8(author_email_vbv.bytes_at(i).as_ref()) + .map_err(|e| vortex_error::vortex_err!("Invalid UTF-8 in author email: {}", e))? + .to_string(); + + // Extract message. + let message = std::str::from_utf8(message_vbv.bytes_at(i).as_ref()) + .map_err(|e| vortex_error::vortex_err!("Invalid UTF-8 in message: {}", e))? + .to_string(); + + // Extract the 20-byte commit_id for this row. + let start = i * 20; + let end = start + 20; + let mut commit_id_arr = [0u8; 20]; + commit_id_arr.copy_from_slice(&commit_id_bytes[start..end]); + + entries.push(CommitInfo { + timestamp: timestamps[i], + author: Author::new(name, email), + message, + commit_id: CommitId(commit_id_arr), + }); + } + + Ok(entries) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] +pub struct Author { + name: String, + email: String, +} + +impl Author { + /// Creates a new [`Author`]. + pub fn new(name: String, email: String) -> Self { + Self { name, email } + } + + /// Returns the author's name. + pub fn name(&self) -> &str { + &self.name + } + + /// Returns the author's email. + pub fn email(&self) -> &str { + &self.email + } + + /// Returns the [`DType`] for an [`Author`]. + /// + /// The schema is: + /// - `name`: `Utf8` + /// - `email`: `Utf8` + pub fn dtype() -> DType { + DType::Struct( + StructFields::new( + FieldNames::from(["name", "email"]), + vec![DType::Utf8(NonNullable), DType::Utf8(NonNullable)], + ), + NonNullable, + ) + } + + /// Converts an [`Author`] to a [`Scalar`]. + pub fn into_scalar(&self) -> Scalar { + Scalar::struct_( + Self::dtype(), + vec![ + Scalar::utf8(self.name.as_str(), NonNullable), + Scalar::utf8(self.email.as_str(), NonNullable), + ], + ) + } +} diff --git a/vortex-wasm/src/website/entry.rs b/vortex-wasm/src/website/entry.rs new file mode 100644 index 00000000000..0ae55c998de --- /dev/null +++ b/vortex-wasm/src/website/entry.rs @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::sync::Arc; + +use serde::Deserialize; +use serde::Serialize; +use vortex::dtype::DType; +use vortex::dtype::FieldNames; +use vortex::dtype::Nullability::NonNullable; +use vortex::dtype::PType; +use vortex::dtype::StructFields; +use vortex::scalar::Scalar; +use vortex::utils::aliases::hash_map::HashMap; +use vortex_array::Array; +use vortex_array::ToCanonical; +use vortex_array::arrays::FixedSizeListArray; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::arrays::StructArray; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; + +use crate::website::commit_id::CommitId; +use crate::website::commit_id::PassthroughBuildHasher; + +/// Maps [`CommitId`] to benchmark value. +pub type CommitValueMap<'a> = HashMap<&'a CommitId, u64, PassthroughBuildHasher>; + +/// Maps series name to commit values. +pub type SeriesMap<'a> = HashMap<&'a str, CommitValueMap<'a>>; + +/// Maps chart name to series. +pub type ChartMap<'a> = HashMap<&'a str, SeriesMap<'a>>; + +/// Maps benchmark group to charts. +pub type GroupedEntries<'a> = HashMap<&'a str, ChartMap<'a>>; + +/// A benchmark entry, grouped by benchmark group, then chart name, then series name. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct BenchmarkEntry { + pub commit_id: CommitId, + pub group_name: String, + pub chart_name: String, + pub series_name: String, + pub value: u64, +} + +impl BenchmarkEntry { + pub fn new( + commit_id: CommitId, + group_name: String, + chart_name: String, + series_name: String, + value: u64, + ) -> Self { + Self { + commit_id, + group_name, + chart_name, + series_name, + value, + } + } + + /// Returns the [`DType`] for a [`BenchmarkEntry`]. + /// + /// The schema is: + /// - `commit_id`: `FixedSizeList` (20-byte binary SHA-1) + /// - `group_name`: `Utf8` + /// - `chart_name`: `Utf8` + /// - `series_name`: `Utf8` + /// - `value`: `u64` + pub fn dtype() -> DType { + DType::Struct( + StructFields::new( + FieldNames::from([ + "commit_id", + "group_name", + "chart_name", + "series_name", + "value", + ]), + vec![ + DType::FixedSizeList( + Arc::new(DType::Primitive(PType::U8, NonNullable)), + 20, + NonNullable, + ), + DType::Utf8(NonNullable), + DType::Utf8(NonNullable), + DType::Utf8(NonNullable), + DType::Primitive(PType::U64, NonNullable), + ], + ), + NonNullable, + ) + } + + /// Converts a [`BenchmarkEntry`] to a [`Scalar`]. + pub fn into_scalar(&self) -> Scalar { + let u8_dtype = DType::Primitive(PType::U8, NonNullable); + + // Convert the 20-byte commit_id to a FixedSizeList scalar. + let commit_id_bytes: Vec = self + .commit_id + .0 + .iter() + .map(|&b| Scalar::primitive(b, NonNullable)) + .collect(); + let commit_id_scalar = Scalar::fixed_size_list(u8_dtype, commit_id_bytes, NonNullable); + + Scalar::struct_( + BenchmarkEntry::dtype(), + vec![ + commit_id_scalar, + Scalar::utf8(self.group_name.as_str(), NonNullable), + Scalar::utf8(self.chart_name.as_str(), NonNullable), + Scalar::utf8(self.series_name.as_str(), NonNullable), + Scalar::primitive(self.value, NonNullable), + ], + ) + } + + /// Converts a Vortex array (expected to be a struct array) into a vector of [`BenchmarkEntry`]. + /// + /// The array must have the following schema: + /// - `commit_id`: FixedSizeList + /// - `group_name`: Utf8 + /// - `chart_name`: Utf8 + /// - `series_name`: Utf8 + /// - `value`: u64 + pub fn vec_from_array(array: &dyn Array) -> VortexResult> { + // Convert to canonical struct array. + let struct_array: StructArray = array.to_struct(); + + let len = struct_array.len(); + let mut entries = Vec::with_capacity(len); + + // Extract each field. + let commit_id_field = struct_array.field_by_name("commit_id")?; + let group_name_field = struct_array.field_by_name("group_name")?; + let chart_name_field = struct_array.field_by_name("chart_name")?; + let series_name_field = struct_array.field_by_name("series_name")?; + let value_field = struct_array.field_by_name("value")?; + + // Convert commit_id to canonical fixed-size list and get the underlying bytes. + let commit_id_fsl: FixedSizeListArray = commit_id_field.to_fixed_size_list(); + if commit_id_fsl.list_size() != 20 { + vortex_bail!( + "Expected commit_id to have list_size 20, got {}", + commit_id_fsl.list_size() + ); + } + + // Get the elements as a primitive array of u8. + let commit_id_elements: PrimitiveArray = commit_id_fsl.elements().to_primitive(); + let commit_id_bytes: &[u8] = commit_id_elements.as_slice(); + + // Convert string fields to canonical varbinview arrays. + let group_name_vbv = group_name_field.to_varbinview(); + let chart_name_vbv = chart_name_field.to_varbinview(); + let series_name_vbv = series_name_field.to_varbinview(); + + // Convert value field to primitive array. + let value_prim: PrimitiveArray = value_field.to_primitive(); + let values: &[u64] = value_prim.as_slice(); + + // Build the entries. + for i in 0..len { + // Extract the 20-byte commit_id for this row. + let start = i * 20; + let end = start + 20; + let mut commit_id_arr = [0u8; 20]; + commit_id_arr.copy_from_slice(&commit_id_bytes[start..end]); + + // Read strings using bytes_at() and convert to String. + let group_name = std::str::from_utf8(group_name_vbv.bytes_at(i).as_ref()) + .map_err(|e| vortex_error::vortex_err!("Invalid UTF-8 in group_name: {}", e))? + .to_string(); + let chart_name = std::str::from_utf8(chart_name_vbv.bytes_at(i).as_ref()) + .map_err(|e| vortex_error::vortex_err!("Invalid UTF-8 in chart_name: {}", e))? + .to_string(); + let series_name = std::str::from_utf8(series_name_vbv.bytes_at(i).as_ref()) + .map_err(|e| vortex_error::vortex_err!("Invalid UTF-8 in series_name: {}", e))? + .to_string(); + + entries.push(BenchmarkEntry { + commit_id: CommitId(commit_id_arr), + group_name, + chart_name, + series_name, + value: values[i], + }); + } + + Ok(entries) + } + + /// Groups benchmark entries by benchmark group, chart name, series name, and commit ID. + pub fn group(entries: &[BenchmarkEntry]) -> GroupedEntries<'_> { + let mut result: GroupedEntries<'_> = HashMap::new(); + for entry in entries { + result + .entry(entry.group_name.as_str()) + .or_default() + .entry(entry.chart_name.as_str()) + .or_default() + .entry(entry.series_name.as_str()) + .or_insert_with(|| HashMap::with_hasher(PassthroughBuildHasher)) + .insert(&entry.commit_id, entry.value); + } + result + } +} diff --git a/vortex-wasm/src/website/mod.rs b/vortex-wasm/src/website/mod.rs new file mode 100644 index 00000000000..3d69dcea90b --- /dev/null +++ b/vortex-wasm/src/website/mod.rs @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +pub mod charts; +pub mod commit_id; +pub mod commit_info; +pub mod entry; +pub mod read_s3; + +// `update_s3` uses `tokio` and `std::process::Command`, which are not available in WASM. +#[cfg(feature = "native")] +pub mod update_s3; + +/// S3 key for the benchmark data Vortex file. +pub const DATA_KEY: &str = "data.vortex"; + +/// S3 key for the commits metadata Vortex file. +pub const COMMITS_KEY: &str = "commits.vortex"; + +#[cfg(target_arch = "wasm32")] +mod wasm_bindings { + use std::sync::LazyLock; + + use vortex::VortexSessionDefault; + use vortex::io::runtime::wasm::WasmRuntime; + use vortex::io::session::RuntimeSessionExt; + use vortex::session::VortexSession; + use wasm_bindgen::prelude::*; + + use super::COMMITS_KEY; + use super::DATA_KEY; + use super::read_s3::get_benchmark_summary; + use super::read_s3::get_chart_data; + + /// Cached Vortex session configured with the WASM runtime. + static SESSION: LazyLock = + LazyLock::new(|| VortexSession::default().with_handle(WasmRuntime::handle())); + + /// Load benchmark summary (metadata only, fast). + /// + /// This function fetches data from S3 (cached after first call), processes it, and returns + /// a summary containing: + /// - `commits`: Array of commit objects + /// - `groups`: Object mapping group names to chart metadata (no values) + /// + /// Use this for fast initial load, then call `load_chart_data` for specific charts. + /// + /// # Returns + /// + /// A JSON string that must be parsed with `JSON.parse()` in JavaScript. + #[wasm_bindgen] + pub async fn load_benchmark_summary() -> Result { + get_benchmark_summary(&SESSION, COMMITS_KEY, DATA_KEY) + .await + .map_err(|e| JsValue::from_str(&format!("Failed to load benchmark summary: {}", e))) + } + + /// Load chart data for a specific group and chart. + /// + /// This function returns the aligned series data for a single chart. Data is cached after + /// the first call to any load function, so subsequent calls are fast. + /// + /// # Arguments + /// + /// * `group` - The group name (e.g., "random-access", "tpch") + /// * `chart` - The chart name within the group (e.g., "latency", "q1-sf1000-nvme") + /// + /// # Returns + /// + /// A JSON string containing `{ aligned_series: { series_name: [values...] } }`. + /// Values are in nanoseconds (u64). Parse with `JSON.parse()` in JavaScript. + #[wasm_bindgen] + pub async fn load_chart_data(group: &str, chart: &str) -> Result { + get_chart_data(&SESSION, COMMITS_KEY, DATA_KEY, group, chart) + .await + .map_err(|e| JsValue::from_str(&format!("Failed to load chart data: {}", e))) + } +} diff --git a/vortex-wasm/src/website/read_s3.rs b/vortex-wasm/src/website/read_s3.rs new file mode 100644 index 00000000000..5cf4da2728f --- /dev/null +++ b/vortex-wasm/src/website/read_s3.rs @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Functions for reading benchmark data from S3. + +use std::sync::OnceLock; + +use vortex::array::stream::ArrayStreamExt; +use vortex::error::VortexResult; +use vortex::error::vortex_bail; +use vortex::error::vortex_err; +use vortex::file::OpenOptionsSessionExt; +use vortex::session::VortexSession; +use vortex_array::ArrayRef; +use vortex_error::VortexExpect; + +use super::entry::BenchmarkEntry; +use crate::website::charts::Benchmarks; +use crate::website::charts::extract_summary; +use crate::website::charts::process_benchmarks; +use crate::website::commit_info::CommitInfo; + +/// Base URL for the S3 bucket containing benchmark data. +const S3_BASE_URL: &str = "https://vortex-benchmark-results-database-test.s3.amazonaws.com"; + +// ============================================================================ +// Static caches for data (fetched/processed once, reused across calls) +// ============================================================================ + +/// Processed benchmark data ready for serialization. +pub struct ProcessedData { + /// Sorted commits. + pub commits: Vec, + /// All benchmarks. + pub benchmarks: Benchmarks, +} + +/// Global cache for processed data. +static PROCESSED_DATA: OnceLock = OnceLock::new(); + +/// Ensures data is loaded and processed, returning a reference to the cached data. +/// +/// This function fetches data from S3 and processes it on the first call, then returns the +/// cached result on subsequent calls. +/// +/// # Implementation Note +/// +/// We use a manual `get()` check followed by `set()` instead of `OnceLock::get_or_init` because +/// `get_or_init` requires a synchronous closure, but our initialization is async (fetching from +/// S3). There is no async-compatible `get_or_init` in the standard library. +pub async fn ensure_data_loaded( + session: &VortexSession, + commits_key: &str, + data_key: &str, +) -> VortexResult<&'static ProcessedData> { + if let Some(data) = PROCESSED_DATA.get() { + return Ok(data); + } + + let (data_array, commits_array) = futures::try_join!( + read_s3_array(session, data_key), + read_s3_array(session, commits_key) + )?; + + let entries = BenchmarkEntry::vec_from_array(&data_array)?; + let mut commits = CommitInfo::vec_from_array(&commits_array)?; + commits.sort_unstable(); + + // Process benchmarks. + let benchmarks = process_benchmarks(&entries, &commits)?; + + let processed = ProcessedData { + commits, + benchmarks, + }; + + // Store in cache (ignore error if another thread beat us to it). + drop(PROCESSED_DATA.set(processed)); + + Ok(PROCESSED_DATA.get().vortex_expect("just set")) +} + +/// Returns the benchmark summary (metadata only, fast serialization). +pub async fn get_benchmark_summary( + session: &VortexSession, + commits_key: &str, + data_key: &str, +) -> VortexResult { + let data = ensure_data_loaded(session, commits_key, data_key).await?; + let summary = extract_summary(&data.benchmarks, data.commits.clone()); + serde_json::to_string(&summary).map_err(|e| vortex_err!("Failed to serialize summary: {}", e)) +} + +/// Returns chart data for a specific group and chart. +pub async fn get_chart_data( + session: &VortexSession, + commits_key: &str, + data_key: &str, + group: &str, + chart: &str, +) -> VortexResult { + let data = ensure_data_loaded(session, commits_key, data_key).await?; + + let group_data = data + .benchmarks + .get(group) + .ok_or_else(|| vortex_err!("Group '{}' not found", group))?; + + let chart_data = group_data + .charts + .get(chart) + .ok_or_else(|| vortex_err!("Chart '{}' not found in group '{}'", chart, group))?; + + serde_json::to_string(chart_data) + .map_err(|e| vortex_err!("Failed to serialize chart data: {}", e)) +} + +// ============================================================================ +// S3 reading functions +// ============================================================================ + +/// Reads a Vortex array from an S3 object. +/// +/// This function downloads the Vortex file from S3 using HTTP (the bucket is public) and +/// returns the parsed array. +/// +/// # Arguments +/// +/// * `session` - The Vortex session for reading files. +/// * `key` - The S3 object key (e.g., "test/random_access.vortex"). +/// +/// # Errors +/// +/// Returns an error if: +/// - The HTTP request fails. +/// - The file is not a valid Vortex file. +pub async fn read_s3_array(session: &VortexSession, key: &str) -> VortexResult { + let url = format!("{}/{}", S3_BASE_URL, key); + + let response = reqwest::get(&url) + .await + .map_err(|e| vortex_err!("Failed to fetch {}: {}", url, e))?; + + if !response.status().is_success() { + vortex_bail!( + "HTTP error fetching {}: {} {}", + url, + response.status().as_u16(), + response.status().as_str() + ); + } + + let bytes = response + .bytes() + .await + .map_err(|e| vortex_err!("Failed to read response body: {}", e))?; + + // Parse as Vortex file and read all data. + // Note: We use `open_read_at` directly instead of `open_buffer` because `open_buffer` uses + // `futures::executor::block_on` which requires `std::time` (not available in WASM). + let buffer: vortex::buffer::ByteBuffer = bytes.to_vec().into(); + let file = session + .open_options() + .with_initial_read_size(0) + .without_segment_cache() + .open_read_at(buffer) + .await?; + + file.scan()?.into_array_stream()?.read_all().await +} diff --git a/vortex-wasm/src/website/update_s3.rs b/vortex-wasm/src/website/update_s3.rs new file mode 100644 index 00000000000..48c0156c2bd --- /dev/null +++ b/vortex-wasm/src/website/update_s3.rs @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Atomic S3 update operations for Vortex files using the AWS CLI. +//! +//! This module provides functions to read a Vortex file from S3, apply a transformation, and write +//! the result back atomically using optimistic concurrency control via ETags. + +use std::fs; +use std::io::Write; +use std::process::Command; +use std::time::Duration; + +use tempfile::NamedTempFile; +use vortex::array::ArrayRef; +use vortex::array::builders::builder_with_capacity; +use vortex::array::stream::ArrayStreamExt; +use vortex::error::VortexResult; +use vortex::error::vortex_bail; +use vortex::error::vortex_err; +use vortex::file::OpenOptionsSessionExt; +use vortex::file::WriteOptionsSessionExt; +use vortex::session::VortexSession; + +use super::entry::BenchmarkEntry; + +const MAX_RETRIES: u32 = 8; + +/// Internal error type for retry control. +enum UpdateError { + /// The ETag has changed since we read the object. The operation should be retried. + EtagMismatch, + /// A non-retryable error occurred. + Other(String), +} + +/// Builds AWS CLI arguments, optionally including a profile. +fn aws_args(base_args: &[&str], profile: Option<&str>) -> Vec { + let mut args: Vec = base_args.iter().map(|s| s.to_string()).collect(); + if let Some(p) = profile { + args.push("--profile".to_string()); + args.push(p.to_string()); + } + args +} + +/// Gets the current ETag of an S3 object using the AWS CLI. +fn get_etag(bucket: &str, key: &str, profile: Option<&str>) -> Result { + let base_args = [ + "s3api", + "head-object", + "--bucket", + bucket, + "--key", + key, + "--query", + "ETag", + "--output", + "text", + ]; + let args = aws_args(&base_args, profile); + + let output = Command::new("aws") + .args(&args) + .output() + .map_err(|e| UpdateError::Other(format!("Failed to run aws CLI: {}", e)))?; + + if !output.status.success() { + return Err(UpdateError::Other(format!( + "aws s3api head-object failed: {}", + String::from_utf8_lossy(&output.stderr) + ))); + } + + let etag = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if etag.is_empty() || etag == "null" { + return Err(UpdateError::Other("Failed to retrieve ETag".to_string())); + } + + Ok(etag) +} + +/// Downloads an S3 object to a local file using the AWS CLI with ETag matching. +fn download_object( + bucket: &str, + key: &str, + etag: &str, + dest_path: &str, + profile: Option<&str>, +) -> Result<(), UpdateError> { + let base_args = [ + "s3api", + "get-object", + "--bucket", + bucket, + "--key", + key, + "--if-match", + etag, + dest_path, + ]; + let args = aws_args(&base_args, profile); + + let output = Command::new("aws") + .args(&args) + .output() + .map_err(|e| UpdateError::Other(format!("Failed to run aws CLI: {}", e)))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + if stderr.contains("PreconditionFailed") || stderr.contains("412") { + return Err(UpdateError::EtagMismatch); + } + return Err(UpdateError::Other(format!( + "aws s3api get-object failed: {}", + stderr + ))); + } + + Ok(()) +} + +/// Uploads a local file to S3 using the AWS CLI with ETag matching. +fn upload_object( + bucket: &str, + key: &str, + etag: &str, + src_path: &str, + profile: Option<&str>, +) -> Result<(), UpdateError> { + let base_args = [ + "s3api", + "put-object", + "--bucket", + bucket, + "--key", + key, + "--if-match", + etag, + "--body", + src_path, + ]; + let args = aws_args(&base_args, profile); + + let output = Command::new("aws") + .args(&args) + .output() + .map_err(|e| UpdateError::Other(format!("Failed to run aws CLI: {}", e)))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + if stderr.contains("PreconditionFailed") || stderr.contains("412") { + return Err(UpdateError::EtagMismatch); + } + return Err(UpdateError::Other(format!( + "aws s3api put-object failed: {}", + stderr + ))); + } + + Ok(()) +} + +/// Updates a Vortex file stored in S3 atomically using optimistic concurrency control. +/// +/// This function reads the existing file from S3, applies a transformation, and writes it back +/// using conditional puts with ETags. If another process modifies the file between read and write, +/// the operation is automatically retried. +/// +/// # Arguments +/// +/// * `session` - The Vortex session for reading and writing files. +/// * `bucket` - The S3 bucket name. +/// * `key` - The S3 object key. +/// * `profile` - Optional AWS CLI profile name (e.g., from `aws sso login`). +/// * `update_fn` - A function that takes the file's array data and returns the updated array. +/// The returned array must have the same dtype as the input. This function may be called +/// multiple times if retries are needed. +/// +/// # Errors +/// +/// Returns an error if: +/// - The S3 object does not exist. +/// - The update function returns an error. +/// - The update function returns an array with a different dtype. +/// - The retry limit is reached without success. +/// - An S3 operation fails with a non-retryable error. +pub fn update_s3_object( + session: &VortexSession, + bucket: &str, + key: &str, + profile: Option<&str>, + mut update_fn: F, +) -> VortexResult<()> +where + F: FnMut(ArrayRef) -> VortexResult, +{ + let runtime = tokio::runtime::Runtime::new() + .map_err(|e| vortex_err!("Failed to create tokio runtime: {}", e))?; + + for attempt in 0..MAX_RETRIES { + match try_update_s3_object(session, bucket, key, profile, &mut update_fn, &runtime) { + Ok(()) => return Ok(()), + Err(UpdateError::EtagMismatch) => { + eprintln!("ETag mismatch on attempt {}. Retrying...", attempt + 1); + std::thread::sleep(Duration::from_millis(100 * (1 << attempt))); + } + Err(UpdateError::Other(e)) => { + vortex_bail!("S3 update failed: {}", e); + } + } + } + + vortex_bail!("Failed to update S3 object after {} attempts", MAX_RETRIES) +} + +/// Attempts a single update of an S3 object. +fn try_update_s3_object( + session: &VortexSession, + bucket: &str, + key: &str, + profile: Option<&str>, + update_fn: &mut F, + runtime: &tokio::runtime::Runtime, +) -> Result<(), UpdateError> +where + F: FnMut(ArrayRef) -> VortexResult, +{ + // Get current ETag. + let etag = get_etag(bucket, key, profile)?; + + // Download to temp file. + let download_file = NamedTempFile::new() + .map_err(|e| UpdateError::Other(format!("Failed to create temp file: {}", e)))?; + let download_path = download_file.path().to_string_lossy().to_string(); + + download_object(bucket, key, &etag, &download_path, profile)?; + + // Read and parse. + let existing_bytes = fs::read(&download_path) + .map_err(|e| UpdateError::Other(format!("Failed to read downloaded file: {}", e)))?; + + let file = session + .open_options() + .open_buffer(existing_bytes) + .map_err(|e| UpdateError::Other(format!("Failed to open Vortex file: {}", e)))?; + + let original_dtype = file.dtype().clone(); + + let existing_array = runtime + .block_on(async { file.scan()?.into_array_stream()?.read_all().await }) + .map_err(|e| UpdateError::Other(format!("Failed to read array: {}", e)))?; + + // Apply the user's update function. + let updated_array = update_fn(existing_array) + .map_err(|e| UpdateError::Other(format!("Update function failed: {}", e)))?; + + // Validate that the dtype matches. + if updated_array.dtype() != &original_dtype { + return Err(UpdateError::Other(format!( + "Update function changed dtype from {} to {}. \ + The updated array must have the same dtype as the input file.", + original_dtype, + updated_array.dtype() + ))); + } + + // Serialize updated array to Vortex file bytes. + let mut buffer = Vec::new(); + runtime + .block_on(async { + session + .write_options() + .write(&mut buffer, updated_array.to_array_stream()) + .await + }) + .map_err(|e| UpdateError::Other(format!("Failed to serialize array: {}", e)))?; + + // Write to temp file for upload. + let mut upload_file = NamedTempFile::new() + .map_err(|e| UpdateError::Other(format!("Failed to create temp file: {}", e)))?; + upload_file + .write_all(&buffer) + .map_err(|e| UpdateError::Other(format!("Failed to write temp file: {}", e)))?; + upload_file + .flush() + .map_err(|e| UpdateError::Other(format!("Failed to flush temp file: {}", e)))?; + + let upload_path = upload_file.path().to_string_lossy().to_string(); + + // Upload with if-match. + upload_object(bucket, key, &etag, &upload_path, profile)?; + + Ok(()) +} + +/// Appends a single [`BenchmarkEntry`] to a Vortex file stored in S3. +/// +/// This function uses [`update_s3_object`] with optimistic concurrency control to atomically +/// append the entry to the existing data. If concurrent modifications are detected, the operation +/// is automatically retried. +/// +/// # Arguments +/// +/// * `session` - The Vortex session for reading and writing files. +/// * `bucket` - The S3 bucket name. +/// * `key` - The S3 object key. +/// * `profile` - Optional AWS CLI profile name. +/// * `entry` - The benchmark entry to append. +pub fn append_benchmark_entry_s3( + session: &VortexSession, + bucket: &str, + key: &str, + profile: Option<&str>, + entry: &BenchmarkEntry, +) -> VortexResult<()> { + let scalar = entry.into_scalar(); + + update_s3_object(session, bucket, key, profile, |existing_array| { + let existing_len = existing_array.len(); + let dtype = existing_array.dtype().clone(); + + let mut builder = builder_with_capacity(&dtype, existing_len + 1); + builder.extend_from_array(&existing_array); + builder.append_scalar(&scalar)?; + + Ok(builder.finish()) + }) +} diff --git a/vortex-wasm/wasm-test.css b/vortex-wasm/wasm-test.css new file mode 100644 index 00000000000..8cf60e80c4f --- /dev/null +++ b/vortex-wasm/wasm-test.css @@ -0,0 +1,585 @@ +/* CSS Variables for consistent theming */ +:root { + /* Vortex Brand Colors */ + --vortex-black: #101010; + --vortex-gray: #ececec; + --vortex-green: #cee562; + --vortex-blue: #5971fd; + --vortex-pink: #eeb3e1; + + /* Series Colors */ + --series-vortex: #101010; + --series-parquet: #5dade2; + --series-lance: #ef7f1d; + + /* Theme Colors */ + --primary-color: var(--vortex-blue); + --primary-hover: #4a5fe5; + --bg-color: #ffffff; + --bg-secondary: #fafafa; + --text-color: var(--vortex-black); + --text-secondary: #666666; + --border-color: var(--vortex-gray); + + /* Shadows */ + --shadow-sm: 0 1px 3px rgba(16, 16, 16, 0.08); + --shadow-md: 0 4px 8px rgba(16, 16, 16, 0.08); + --shadow-lg: 0 12px 24px rgba(16, 16, 16, 0.12); + + /* Border Radius */ + --radius-sm: 4px; + --radius-md: 8px; + --radius-lg: 12px; +} + +/* Reset and base styles */ +* { + box-sizing: border-box; + margin: 0; + padding: 0; +} + +html { + font-family: + -apple-system, BlinkMacSystemFont, "Segoe UI", "SF Pro Display", Roboto, + sans-serif; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; + font-size: 16px; + scroll-behavior: smooth; +} + +body { + color: var(--text-color); + background: var(--bg-secondary); + min-height: 100vh; + padding: 2rem; + padding-bottom: 12rem; + line-height: 1.6; + letter-spacing: -0.01em; +} + +/* Container */ +.container { + max-width: 1400px; + margin: 0 auto; +} + +/* Page Header */ +.page-header { + margin-bottom: 2rem; +} + +.page-header h1 { + font-family: + "Funnel Display", + -apple-system, + BlinkMacSystemFont, + sans-serif; + font-size: 2rem; + font-weight: 600; + color: var(--text-color); + margin-bottom: 0.5rem; + letter-spacing: -0.02em; +} + +.subtitle { + color: var(--text-secondary); + font-size: 1rem; +} + +/* Status */ +.status { + background: var(--bg-color); + border-radius: var(--radius-md); + padding: 1rem 1.25rem; + margin-bottom: 1.5rem; + box-shadow: var(--shadow-sm); + display: flex; + align-items: center; + gap: 0.75rem; + border: 1px solid var(--border-color); +} + +.status.loading { + border-left: 4px solid var(--vortex-blue); +} + +.status.success { + border-left: 4px solid var(--series-vortex); +} + +.status.error { + border-left: 4px solid #e74c3c; +} + +.spinner { + display: inline-block; + width: 16px; + height: 16px; + border: 2px solid var(--border-color); + border-top-color: var(--vortex-blue); + border-radius: 50%; + animation: spin 1s linear infinite; +} + +@keyframes spin { + to { + transform: rotate(360deg); + } +} + +/* Benchmarks Container - full width sections */ +.benchmarks-grid { + display: flex; + flex-direction: column; + gap: 24px; +} + +/* Benchmark Set */ +.benchmark-set { + background: var(--bg-color); + border-radius: var(--radius-lg); + border: 1px solid var(--border-color); + overflow: visible; + transition: all 0.3s ease; + box-shadow: var(--shadow-sm); +} + +.benchmark-set:hover { + box-shadow: var(--shadow-md); +} + +/* Benchmark Header (clickable) */ +.benchmark-header { + display: flex; + align-items: center; + justify-content: space-between; + padding: 16px 24px; + background: var(--bg-secondary); + border-bottom: 1px solid var(--border-color); + cursor: pointer; + user-select: none; + transition: background-color 0.2s; +} + +.benchmark-header:hover { + background: #f0f0f0; +} + +.title-wrapper { + display: flex; + align-items: center; + gap: 12px; +} + +.collapse-icon { + font-size: 0.875rem; + transition: transform 0.3s ease; + color: var(--text-secondary); +} + +.benchmark-set.collapsed .collapse-icon { + transform: rotate(-90deg); +} + +.benchmark-title { + font-family: + "Funnel Display", + -apple-system, + BlinkMacSystemFont, + sans-serif; + font-size: 1.25rem; + font-weight: 600; + color: var(--text-color); + margin: 0; + letter-spacing: -0.02em; +} + +.benchmark-meta { + font-size: 0.75rem; + font-weight: 500; + color: var(--text-secondary); + text-transform: uppercase; + letter-spacing: 0.02em; +} + +/* Summary Section (always visible) */ +.summary-section { + padding: 16px 24px; + background: var(--bg-secondary); + border-bottom: 1px solid var(--border-color); +} + +/* Scores List - 2 column layout */ +.scores-list { + column-count: 2; + column-gap: 20px; +} + +/* Single column for smaller screens */ +@media (max-width: 780px) { + .scores-list { + column-count: 1; + } +} + +/* Score Item - card style */ +.score-item { + display: flex; + align-items: center; + background: var(--bg-color); + padding: 10px 12px; + margin-bottom: 8px; + border-radius: var(--radius-sm); + border: 1px solid var(--border-color); + transition: all 0.2s ease; + font-size: 13px; + break-inside: avoid; +} + +.score-item:hover { + border-color: var(--primary-color); + background: var(--bg-secondary); +} + +.score-rank { + font-weight: 600; + color: var(--primary-color); + min-width: 30px; + font-size: 14px; +} + +.score-series { + flex: 1; + font-weight: 500; + color: var(--text-color); + margin: 0 8px; + font-size: 14px; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} + +.score-metrics { + display: flex; + gap: 6px; + align-items: center; +} + +.score-runtime { + font-family: "SF Mono", "Monaco", "Inconsolata", monospace; + font-weight: 600; + color: var(--primary-color); + background: rgba(89, 113, 253, 0.1); + padding: 3px 10px; + border-radius: var(--radius-sm); + font-size: 13px; +} + +.score-ratio { + font-family: "SF Mono", "Monaco", "Inconsolata", monospace; + font-weight: 600; + color: var(--text-secondary); + background: var(--bg-secondary); + padding: 3px 10px; + border-radius: var(--radius-sm); + font-size: 13px; +} + +.scores-explanation { + margin-top: 12px; + font-size: 11px; + color: var(--text-secondary); + font-style: italic; + text-align: center; +} + +/* Benchmark Graphs (collapsible) */ +.benchmark-graphs { + padding: 24px; + background: var(--bg-color); +} + +.benchmark-set.collapsed .benchmark-graphs { + display: none; +} + +/* Chart Container */ +.chart-container { + background: var(--bg-color); + border: 1px solid var(--border-color); + border-radius: var(--radius-md); + padding: 20px; + transition: all 0.3s ease; +} + +.chart-container:hover { + box-shadow: var(--shadow-md); + border-color: var(--primary-color); +} + +/* Disable transitions during group expand to avoid jank with many charts. */ +.benchmark-set.expanding .chart-container { + transition: none !important; +} + +.chart-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 1rem; +} + +.chart-title { + font-family: + "Funnel Display", + -apple-system, + BlinkMacSystemFont, + sans-serif; + font-size: 1rem; + font-weight: 500; + color: var(--text-color); + margin: 0; + letter-spacing: -0.01em; +} + +.chart-controls { + display: flex; + align-items: center; + gap: 12px; +} + +.control-info-compact { + color: var(--text-secondary); + font-size: 0.75rem; +} + +.zoom-controls { + display: flex; + gap: 4px; +} + +.zoom-btn { + width: 28px; + height: 28px; + border: 1px solid var(--border-color); + border-radius: var(--radius-sm); + background: var(--bg-color); + color: var(--text-color); + font-size: 16px; + font-weight: 600; + cursor: pointer; + display: flex; + align-items: center; + justify-content: center; + transition: all 0.2s; +} + +.zoom-btn:hover { + background: var(--bg-secondary); + border-color: var(--primary-color); +} + +.chart-wrapper { + position: relative; + height: 400px; +} + +.x-axis-label { + text-align: center; + color: var(--text-secondary); + font-size: 0.625rem; + text-transform: uppercase; + letter-spacing: 0.05em; + margin-top: 0.25rem; + margin-bottom: 0.25rem; +} + +/* Custom Tooltip */ +.chartjs-tooltip { + position: absolute; + pointer-events: none; + opacity: 0; + transition: opacity 0.2s ease; + background: rgba(16, 16, 16, 0.9); + color: white; + border-radius: var(--radius-md); + padding: 12px; + font-size: 12px; + z-index: 1000; + box-shadow: var(--shadow-lg); +} + +.chartjs-tooltip.active { + opacity: 1; +} + +.chartjs-tooltip::before { + content: ""; + position: absolute; + top: -8px; + left: 50%; + transform: translateX(-50%); + border-left: 8px solid transparent; + border-right: 8px solid transparent; + border-bottom: 8px solid rgba(16, 16, 16, 0.9); +} + +.chartjs-tooltip-body { + display: flex; + flex-direction: column; + gap: 6px; +} + +.chartjs-tooltip-item { + display: flex; + align-items: center; + gap: 8px; +} + +.chartjs-tooltip-color { + width: 12px; + height: 12px; + border-radius: 2px; +} + +.chartjs-tooltip-footer { + margin-top: 10px; + padding-top: 10px; + border-top: 1px solid rgba(255, 255, 255, 0.2); + font-size: 11px; + opacity: 0.9; +} + +/* Timeline Scrollbar */ +.timeline-scrollbar-container { + overflow-x: auto; + overflow-y: hidden; + height: 16px; + background: transparent; + border-radius: var(--radius-md); + cursor: pointer; + margin-top: 0.5rem; +} + +.timeline-scrollbar-container::-webkit-scrollbar { + height: 16px; +} + +.timeline-scrollbar-container::-webkit-scrollbar-track { + background: rgba(0, 0, 0, 0.05); + border-radius: var(--radius-md); +} + +.timeline-scrollbar-container::-webkit-scrollbar-thumb { + background: rgba(89, 113, 253, 0.3); + border-radius: var(--radius-md); + border: 3px solid transparent; + background-clip: padding-box; +} + +.timeline-scrollbar-container::-webkit-scrollbar-thumb:hover { + background: rgba(89, 113, 253, 0.5); + background-clip: padding-box; +} + +/* Dynamic scrollbar content elements */ +[id$="scrollbar-content"] { + height: 1px; + pointer-events: none; +} + +/* Chart Section - container for each chart within a group */ +.chart-section { + margin-bottom: 32px; + padding-bottom: 32px; + border-bottom: 1px solid var(--border-color); +} + +.chart-section:last-child { + margin-bottom: 0; + padding-bottom: 0; + border-bottom: none; +} + +/* Summary section within a chart section (not at group level) */ +.chart-section .summary-section { + padding: 0; + background: transparent; + border-bottom: none; + margin-bottom: 16px; +} + +/* Utility Classes */ +.hidden { + display: none !important; +} + +/* Responsive */ +@media (max-width: 768px) { + body { + padding: 1rem; + } + + .page-header h1 { + font-size: 1.5rem; + } + + .benchmark-header { + padding: 12px 16px; + } + + .benchmark-title { + font-size: 1.125rem; + } + + .summary-section { + padding: 12px 16px; + } + + .score-item { + padding: 8px 10px; + margin-bottom: 6px; + font-size: 12px; + } + + .score-rank { + font-size: 13px; + min-width: 28px; + } + + .score-series { + font-size: 13px; + margin: 0 6px; + } + + .score-runtime, + .score-ratio { + font-size: 12px; + padding: 2px 8px; + } + + .scores-explanation { + font-size: 10px; + margin-top: 8px; + } + + .benchmark-graphs { + padding: 16px; + } + + .chart-wrapper { + height: 300px; + } +} + +@media (min-width: 1600px) { + .container { + max-width: 1600px; + } + + body { + padding: 3rem 4rem; + } +} diff --git a/vortex-wasm/wasm-test.html b/vortex-wasm/wasm-test.html new file mode 100644 index 00000000000..80c31c689f2 --- /dev/null +++ b/vortex-wasm/wasm-test.html @@ -0,0 +1,38 @@ + + + + + + Vortex WASM Benchmarks + + + + + + + +
+ + +
+ + Initializing WASM module... +
+ +
+ +
+
+ + + + diff --git a/vortex-wasm/wasm-test.js b/vortex-wasm/wasm-test.js new file mode 100644 index 00000000000..a5f3ebf875f --- /dev/null +++ b/vortex-wasm/wasm-test.js @@ -0,0 +1,1361 @@ +// ============================================================================ +// Configuration +// ============================================================================ + +/** + * Global configuration settings. + */ +const CONFIG = { + wasmModulePath: "./pkg/vortex_wasm.js", + githubRepo: "https://github.com/spiraldb/vortex", + defaultVisibleCommits: 50, + minWindowSize: 10, + yAxisLabel: "Time (ms)", +}; + +/** + * Centralized series colors. Each series name maps to its display color. + */ +const SERIES_COLORS = { + // Simple series names (random-access). + vortex: "#101010", + parquet: "#5DADE2", + lance: "#ef7f1d", + // DuckDB series. + "duckdb:vortex-compact": "#101010", + "duckdb:vortex-file-compressed": "#6b7280", + "duckdb:parquet": "#5DADE2", + "duckdb:duckdb": "#f59e0b", + // DataFusion series. + "datafusion:vortex-compact": "#059669", + "datafusion:vortex-file-compressed": "#10b981", + "datafusion:parquet": "#8b5cf6", + "datafusion:lance": "#ef7f1d", + "datafusion:arrow": "#dc2626", +}; + +/** + * Generates an array of chart names following a pattern. + * + * @param {string} prefix - Prefix before the number (e.g., "q"). + * @param {string} suffix - Suffix after the number (e.g., "-sf1-nvme"). + * @param {number} start - Starting number (inclusive). + * @param {number} end - Ending number (inclusive). + * @param {number} [pad=2] - Number of digits to pad to. + * @returns {string[]} Array of chart names. + */ +function generateCharts(prefix, suffix, start, end, pad = 2) { + return Array.from( + { length: end - start + 1 }, + (_, i) => `${prefix}${String(start + i).padStart(pad, "0")}${suffix}`, + ); +} + +/** + * Benchmark group configurations. Each group contains multiple charts. + */ +const BENCHMARK_GROUPS = { + "random-access": { + title: "Random Access", + charts: ["latency"], + seriesNames: ["vortex", "parquet", "lance"], + }, + clickbench: { + title: "ClickBench", + charts: generateCharts("q", "", 0, 42), + seriesNames: [ + "datafusion:lance", + "datafusion:parquet", + "datafusion:vortex-compact", + "datafusion:vortex-file-compressed", + "duckdb:duckdb", + "duckdb:parquet", + "duckdb:vortex-compact", + "duckdb:vortex-file-compressed", + ], + }, + "tpch-sf1-nvme": { + title: "TPC-H NVMe (Scale Factor 1)", + charts: generateCharts("q", "", 1, 22), + seriesNames: [ + "datafusion:arrow", + "datafusion:lance", + "datafusion:parquet", + "datafusion:vortex-compact", + "datafusion:vortex-file-compressed", + "duckdb:duckdb", + "duckdb:parquet", + "duckdb:vortex-compact", + "duckdb:vortex-file-compressed", + ], + }, + "tpch-sf1-s3": { + title: "TPC-H S3 (Scale Factor 1)", + charts: generateCharts("q", "", 1, 22), + seriesNames: [ + "datafusion:lance", + "datafusion:parquet", + "datafusion:vortex-compact", + "datafusion:vortex-file-compressed", + "duckdb:parquet", + "duckdb:vortex-compact", + "duckdb:vortex-file-compressed", + ], + }, + "tpch-sf10-nvme": { + title: "TPC-H NVMe (Scale Factor 10)", + charts: generateCharts("q", "", 1, 22), + seriesNames: [ + "datafusion:arrow", + "datafusion:lance", + "datafusion:parquet", + "datafusion:vortex-compact", + "datafusion:vortex-file-compressed", + "duckdb:duckdb", + "duckdb:parquet", + "duckdb:vortex-compact", + "duckdb:vortex-file-compressed", + ], + }, + "tpch-sf10-s3": { + title: "TPC-H S3 (Scale Factor 10)", + charts: generateCharts("q", "", 1, 22), + seriesNames: [ + "datafusion:lance", + "datafusion:parquet", + "datafusion:vortex-compact", + "datafusion:vortex-file-compressed", + "duckdb:parquet", + "duckdb:vortex-compact", + "duckdb:vortex-file-compressed", + ], + }, + "tpch-sf100-nvme": { + title: "TPC-H NVMe (Scale Factor 100)", + charts: generateCharts("q", "", 1, 22), + seriesNames: [ + "datafusion:parquet", + "duckdb:parquet", + "duckdb:vortex-file-compressed", + ], + }, + "tpch-sf100-s3": { + title: "TPC-H S3 (Scale Factor 100)", + charts: generateCharts("q", "", 1, 22), + seriesNames: [ + "datafusion:parquet", + "duckdb:parquet", + "duckdb:vortex-file-compressed", + ], + }, + tpcds: { + title: "TPC-DS", + charts: generateCharts("q", "-sf1", 1, 99), + seriesNames: [ + "datafusion:parquet", + "datafusion:vortex-compact", + "datafusion:vortex-file-compressed", + "duckdb:duckdb", + "duckdb:parquet", + "duckdb:vortex-compact", + "duckdb:vortex-file-compressed", + ], + }, + statpopgen: { + title: "Statistical and Population Genetics", + charts: generateCharts("q", "", 0, 10), + seriesNames: [ + "duckdb:parquet", + "duckdb:vortex-compact", + "duckdb:vortex-file-compressed", + ], + }, +}; + +// ============================================================================ +// Utility Functions +// ============================================================================ + +/** + * Updates the status display with a message and type. + */ +function setStatus(message, type = "loading") { + const status = document.getElementById("status"); + const text = document.getElementById("status-text"); + status.className = `status ${type}`; + text.textContent = message; + + const spinner = status.querySelector(".spinner"); + if (spinner) { + spinner.style.display = type === "loading" ? "inline-block" : "none"; + } +} + +/** + * Formats time in human-readable format with appropriate units. + */ +function formatTime(ms) { + if (ms < 1) return `${(ms * 1000).toFixed(0)}μs`; + if (ms < 1000) return `${ms.toFixed(1)}ms`; + return `${(ms / 1000).toFixed(2)}s`; +} + +/** + * Creates a unique ID for chart elements. + */ +function makeId(groupId, chartName, suffix) { + return `${groupId}-${chartName}-${suffix}`.replace(/[^a-zA-Z0-9-]/g, "-"); +} + +// ============================================================================ +// Data Loading Functions +// ============================================================================ + +/** + * Loads and initializes the WASM module with timeout. + */ +async function loadWasmModule() { + setStatus("Loading WASM module..."); + console.log("[loadWasmModule] Starting import..."); + + const timeout = (ms) => + new Promise((_, reject) => + setTimeout(() => reject(new Error(`Timeout after ${ms}ms`)), ms), + ); + + try { + // Race import against timeout. + const wasm = await Promise.race([ + import(CONFIG.wasmModulePath), + timeout(10000), + ]); + console.log("[loadWasmModule] Import complete, initializing..."); + + // Race init against timeout. + await Promise.race([wasm.default(), timeout(10000)]); + console.log("[loadWasmModule] Initialized:", wasm.get_version()); + + return wasm; + } catch (error) { + console.error("[loadWasmModule] Failed:", error); + throw error; + } +} + +/** + * Loads benchmark summary from WASM (commits + group/chart metadata, no values). + */ +async function loadBenchmarkSummary(wasm) { + setStatus("Loading benchmark summary..."); + const json = await wasm.load_benchmark_summary(); + const summary = JSON.parse(json); + console.log( + `Loaded summary with ${summary.commits.length} commits, ${Object.keys(summary.groups).length} groups`, + ); + return summary; +} + +/** + * Loads chart data for a specific group and chart. + */ +async function loadChartData(wasm, group, chart) { + const json = await wasm.load_chart_data(group, chart); + return JSON.parse(json); +} + +// ============================================================================ +// Data Processing Functions +// ============================================================================ + +/** + * Processes chart data into chart-ready format. + */ +function processChartData(chartData, commits, groupConfig) { + const alignedSeries = chartData.aligned_series; + + const processedCommits = commits.map((commit, index) => ({ + ...commit, + id: commit.commit_id, + url: `${CONFIG.githubRepo}/commit/${commit.commit_id}`, + sortedIndex: index, + })); + + const seriesData = new Map(); + for (const name of groupConfig.seriesNames) { + const rawData = alignedSeries[name]; + if (rawData) { + const msData = rawData.map((v) => + v !== null ? { value: Number(v) / 1_000_000 } : null, + ); + seriesData.set(name, msData); + } else { + seriesData.set(name, new Array(commits.length).fill(null)); + } + } + + // Find the range of data. + let firstDataIndex = commits.length; + let lastDataIndex = -1; + for (const data of seriesData.values()) { + for (let i = 0; i < data.length; i++) { + if (data[i] !== null) { + firstDataIndex = Math.min(firstDataIndex, i); + lastDataIndex = Math.max(lastDataIndex, i); + } + } + } + + if (lastDataIndex < 0) { + firstDataIndex = 0; + lastDataIndex = commits.length - 1; + } + + const startIndex = Math.max(0, firstDataIndex); + const endIndex = lastDataIndex + 1; + const chartCommits = processedCommits.slice(startIndex, endIndex); + const slicedSeriesData = new Map(); + + for (const [name, data] of seriesData.entries()) { + slicedSeriesData.set(name, data.slice(startIndex, endIndex)); + } + + return { seriesData: slicedSeriesData, chartCommits }; +} + +/** + * Calculates summary statistics for the latest data point. + */ +function calculateSummary(seriesData) { + const latestResults = new Map(); + + for (const [seriesName, data] of seriesData.entries()) { + for (let i = data.length - 1; i >= 0; i--) { + if (data[i] !== null) { + latestResults.set(seriesName, data[i].value); + break; + } + } + } + + if (latestResults.size === 0) { + return { results: [], fastestTime: 0 }; + } + + const fastestTime = Math.min(...latestResults.values()); + const sortedResults = Array.from(latestResults.entries()).sort( + (a, b) => a[1] - b[1], + ); + + return { results: sortedResults, fastestTime }; +} + +// ============================================================================ +// Group Summary Data Storage +// ============================================================================ + +/** + * Storage for per-chart latest values. Used for calculating group summaries. + * Structure: groupId -> chartName -> Map + */ +const groupChartData = new Map(); + +/** + * Stores the latest value for each series in a chart. + */ +function storeChartData(groupId, chartName, seriesData) { + if (!groupChartData.has(groupId)) { + groupChartData.set(groupId, new Map()); + } + const groupData = groupChartData.get(groupId); + + const latestValues = new Map(); + for (const [seriesName, data] of seriesData.entries()) { + for (let i = data.length - 1; i >= 0; i--) { + if (data[i] !== null) { + latestValues.set(seriesName, data[i].value); + break; + } + } + } + groupData.set(chartName, latestValues); +} + +/** + * Calculates group-level summary for multi-chart groups (like clickbench). + * + * For clickbench scoring: + * - Score: Geometric mean of (query_time + 10ms) / (fastest_time + 10ms) across all queries + * - Total: Sum of all query times + */ +function calculateGroupSummary(groupId, groupConfig) { + const groupData = groupChartData.get(groupId); + if (!groupData || groupData.size === 0) { + return { results: [], isMultiChart: false }; + } + + const isMultiChart = groupConfig.charts.length > 1; + + if (!isMultiChart) { + // Single chart group: use simple summary (latest values). + const chartData = groupData.values().next().value; + if (!chartData || chartData.size === 0) { + return { results: [], isMultiChart: false }; + } + + const fastestTime = Math.min(...chartData.values()); + const sortedResults = Array.from(chartData.entries()) + .sort((a, b) => a[1] - b[1]) + .map(([name, time]) => ({ + name, + time, + ratio: time / fastestTime, + })); + + return { results: sortedResults, isMultiChart: false }; + } + + // Multi-chart group: calculate geometric mean score and total time. + const SHIFT_MS = 10; // Constant shift to avoid division issues with small values. + const seriesStats = new Map(); // seriesName -> { ratios: [], total: 0 } + + // Initialize stats for all series. + for (const seriesName of groupConfig.seriesNames) { + seriesStats.set(seriesName, { ratios: [], total: 0 }); + } + + // Process each chart (query). + for (const [chartName, chartValues] of groupData.entries()) { + // Find fastest time for this query. + let fastestTime = Infinity; + for (const time of chartValues.values()) { + if (time < fastestTime) fastestTime = time; + } + + // Calculate ratio for each series. + for (const seriesName of groupConfig.seriesNames) { + const stats = seriesStats.get(seriesName); + const time = chartValues.get(seriesName); + + if (time !== undefined) { + const ratio = (time + SHIFT_MS) / (fastestTime + SHIFT_MS); + stats.ratios.push(ratio); + stats.total += time; + } + } + } + + // Calculate geometric mean and build results. + const results = []; + for (const [seriesName, stats] of seriesStats.entries()) { + if (stats.ratios.length === 0) continue; + + // Geometric mean = exp(mean(log(ratios))). + const logSum = stats.ratios.reduce((sum, r) => sum + Math.log(r), 0); + const geometricMean = Math.exp(logSum / stats.ratios.length); + + results.push({ + name: seriesName, + score: geometricMean, + total: stats.total, + queryCount: stats.ratios.length, + }); + } + + // Sort by score (lower is better). + results.sort((a, b) => a.score - b.score); + + return { results, isMultiChart: true }; +} + +// ============================================================================ +// HTML Generation Functions +// ============================================================================ + +/** + * Creates HTML for a benchmark group container. + */ +function createGroupHTML(groupId, groupConfig) { + return ` + + `; +} + +/** + * Creates HTML for a single chart within a group. + */ +function createChartHTML(groupId, chartName) { + const prefix = makeId(groupId, chartName, ""); + return ` +
+
+
+

${chartName}

+
+ Loading... +
+ + + + +
+
+
+
+ +
+
+
Commit
+
+
+
+
+
+ `; +} + +// ============================================================================ +// Rendering Functions +// ============================================================================ + +/** + * Renders the group-level summary. + */ +function renderGroupSummary(groupId, groupConfig) { + const summaryList = document.getElementById(`${groupId}-summary`); + const explanationEl = document.getElementById( + `${groupId}-summary-explanation`, + ); + if (!summaryList) return; + + const summary = calculateGroupSummary(groupId, groupConfig); + + summaryList.innerHTML = ""; + + if (summary.results.length === 0) { + summaryList.innerHTML = '
No data available
'; + if (explanationEl) explanationEl.textContent = ""; + return; + } + + if (summary.isMultiChart) { + // Multi-chart group (clickbench): show score and total. + summary.results.forEach((result, index) => { + const item = document.createElement("div"); + item.className = "score-item"; + item.innerHTML = ` + #${index + 1} + ${result.name} +
+ ${result.score.toFixed(2)}x + ${formatTime(result.total)} +
+ `; + summaryList.appendChild(item); + }); + + if (explanationEl) { + explanationEl.textContent = + "Score: geometric mean of query time ratio to fastest with 10ms constant shift | Total: sum of all query times (lower is better)"; + } + } else { + // Single-chart group: show time and ratio. + summary.results.forEach((result, index) => { + const item = document.createElement("div"); + item.className = "score-item"; + item.innerHTML = ` + #${index + 1} + ${result.name} +
+ ${formatTime(result.time)} + ${result.ratio.toFixed(2)}x +
+ `; + summaryList.appendChild(item); + }); + + if (explanationEl) { + explanationEl.textContent = + "Query time | Ratio to fastest (lower is better)"; + } + } +} + +/** + * Sets up collapsible benchmark sections. + * + * Chart.js automatically detects visibility changes and resizes when charts become visible. + */ +function setupCollapsibleBenchmarks() { + document.querySelectorAll(".benchmark-header").forEach((header) => { + header.addEventListener("click", () => { + header.closest(".benchmark-set").classList.toggle("collapsed"); + }); + }); +} + +/** + * Creates Chart.js datasets from series data. + * + * @param {Map|null} seriesData - Series data map, or null for empty datasets. + * @param {Object} groupConfig - Group configuration with seriesNames. + */ +function createDatasets(seriesData, groupConfig) { + return groupConfig.seriesNames.map((name) => { + const rawData = seriesData?.get(name); + return { + label: name, + data: rawData ? rawData.map((d) => d?.value ?? null) : [], + borderColor: SERIES_COLORS[name], + backgroundColor: SERIES_COLORS[name], + borderWidth: 1.5, + borderJoinStyle: "round", + pointRadius: 2, + tension: 0, + spanGaps: true, + }; + }); +} + +// ============================================================================ +// Chart Configuration Functions +// ============================================================================ + +/** + * Creates the vertical line plugin for Chart.js. + */ +function createVerticalLinePlugin() { + return { + id: "verticalLine", + afterDatasetsDraw(chart) { + if (chart.tooltip?._active?.length) { + const activePoint = chart.tooltip._active[0]; + const ctx = chart.ctx; + const x = activePoint.element.x; + const topY = chart.scales.y.top; + const bottomY = chart.scales.y.bottom; + + ctx.save(); + ctx.beginPath(); + ctx.moveTo(x, topY); + ctx.lineTo(x, bottomY); + ctx.lineWidth = 2; + ctx.strokeStyle = "rgba(89, 113, 253, 0.5)"; + ctx.stroke(); + ctx.restore(); + } + }, + }; +} + +/** + * Creates tooltip configuration for Chart.js. + */ +function createTooltipConfig(chartCommits, tooltipElementId) { + return { + enabled: false, + external: (context) => renderExternalTooltip(context, tooltipElementId), + callbacks: { + footer: (tooltipItems) => getTooltipFooter(tooltipItems, chartCommits), + }, + }; +} + +/** + * Gets tooltip footer content with commit details. + */ +function getTooltipFooter(tooltipItems, chartCommits) { + if (tooltipItems.length === 0) return []; + const commit = chartCommits[tooltipItems[0].dataIndex]; + if (!commit) return []; + + const date = new Date(commit.timestamp * 1000).toLocaleDateString(); + + return [ + commit.message.split("\n")[0].slice(0, 60), + `${commit.author.name} - ${date}`, + ]; +} + +/** + * Renders the external tooltip element. + */ +function renderExternalTooltip(context, tooltipElementId) { + const tooltipEl = document.getElementById(tooltipElementId); + if (!tooltipEl) return; + + const tooltipModel = context.tooltip; + + if (tooltipModel.opacity === 0) { + tooltipEl.classList.remove("active"); + return; + } + + if (tooltipModel.body) { + tooltipEl.innerHTML = buildTooltipHTML(tooltipModel); + positionTooltip(tooltipEl, context, tooltipModel); + } +} + +/** + * Builds HTML content for the tooltip. + */ +function buildTooltipHTML(tooltipModel) { + const titleLines = tooltipModel.title || []; + const footerLines = tooltipModel.footer || []; + + let html = '
'; + + titleLines.forEach((title) => { + html += `
${title}
`; + }); + + const sortedItems = [...tooltipModel.dataPoints].sort( + (a, b) => b.parsed.y - a.parsed.y, + ); + sortedItems.forEach((item) => { + const color = item.dataset.borderColor; + const value = item.formattedValue; + const label = item.dataset.label; + html += ` +
+
+ ${label}: ${value}ms +
+ `; + }); + + if (footerLines.length > 0) { + html += '"; + } + + html += "
"; + return html; +} + +/** + * Positions the tooltip below the chart. + */ +function positionTooltip(tooltipEl, context, tooltipModel) { + const canvas = context.chart.canvas; + const canvasRect = canvas.getBoundingClientRect(); + + tooltipEl.classList.add("active"); + tooltipEl.style.left = + canvasRect.left + window.pageXOffset + tooltipModel.caretX + "px"; + tooltipEl.style.top = canvasRect.bottom + window.pageYOffset + 10 + "px"; + tooltipEl.style.transform = "translateX(-50%)"; +} + +/** + * Creates chart options configuration. + */ +function createChartOptions( + chartCommits, + tooltipElementId, + groupId, + groupConfig, +) { + const legendConfig = { + position: "top", + }; + + // Add group-linked legend click handler if groupId and groupConfig are provided. + if (groupId && groupConfig) { + legendConfig.onClick = createGroupLegendClickHandler(groupId, groupConfig); + } + + return { + responsive: true, + maintainAspectRatio: false, + animation: false, + layout: { + padding: { left: 0, right: 0, top: 0, bottom: 0 }, + }, + interaction: { + intersect: false, + mode: "index", + }, + scales: { + x: { + title: { display: false }, + ticks: { + maxRotation: 45, + minRotation: 45, + autoSkipPadding: 10, + }, + min: Math.max(0, chartCommits.length - CONFIG.defaultVisibleCommits), + }, + y: { + title: { + display: true, + text: CONFIG.yAxisLabel, + }, + beginAtZero: true, + }, + }, + plugins: { + verticalLine: {}, + legend: legendConfig, + tooltip: createTooltipConfig(chartCommits, tooltipElementId), + }, + onClick: (event, elements) => handleChartClick(elements, chartCommits), + }; +} + +/** + * Handles click events on chart data points. + */ +function handleChartClick(elements, chartCommits) { + if (elements.length > 0) { + const index = elements[0].index; + const commit = chartCommits[index]; + if (commit?.url) { + window.open(commit.url, "_blank"); + } + } +} + +/** + * Creates a Chart.js instance. + */ +function createChartInstance( + canvasId, + chartCommits, + seriesData, + groupId, + groupConfig, + tooltipElementId, +) { + const canvas = document.getElementById(canvasId); + if (!canvas) return null; + + const ctx = canvas.getContext("2d"); + const datasets = createDatasets(seriesData, groupConfig); + + return new Chart(ctx, { + type: "line", + data: { + labels: chartCommits.map((c) => c.id.slice(0, 7)), + datasets: datasets, + }, + options: createChartOptions( + chartCommits, + tooltipElementId, + groupId, + groupConfig, + ), + }); +} + +// ============================================================================ +// Timeline Control Functions +// ============================================================================ + +/** + * Creates chart context for timeline state management. + */ +function createChartContext(totalCommits) { + return { + totalCommits, + minWindowSize: CONFIG.minWindowSize, + maxWindowSize: totalCommits, + defaultWindowSize: CONFIG.defaultVisibleCommits, + currentWindowSize: Math.min(CONFIG.defaultVisibleCommits, totalCommits), + currentPosition: totalCommits, + }; +} + +/** + * Updates scrollbar dimensions to match current window size. + */ +function updateScrollbarDimensions(elements, chartContext) { + const containerWidth = elements.scrollbarContainer.clientWidth; + const ratio = chartContext.totalCommits / chartContext.currentWindowSize; + const contentWidth = Math.max(containerWidth * ratio, containerWidth * 1.01); + elements.scrollbarContent.style.width = `${contentWidth}px`; +} + +/** + * Updates chart view and UI to reflect current state. + */ +function updateChartView( + elements, + chartContext, + chartInstance, + updateScrollbar, +) { + const windowSize = chartContext.currentWindowSize; + const position = chartContext.currentPosition; + + const endIndex = Math.min(position, chartContext.totalCommits); + const startIndex = Math.max(0, endIndex - windowSize); + + chartInstance.options.scales.x.min = startIndex; + chartInstance.options.scales.x.max = endIndex - 1; + chartInstance.update("none"); + + elements.controlInfoText.textContent = `Showing commits ${startIndex + 1}-${endIndex} of ${chartContext.totalCommits} (${windowSize} visible)`; + + if (updateScrollbar) { + updateScrollbarDimensions(elements, chartContext); + const scrollPercentage = + (endIndex - windowSize) / (chartContext.totalCommits - windowSize); + elements.scrollbarContainer.scrollLeft = + scrollPercentage * + (elements.scrollbarContent.clientWidth - + elements.scrollbarContainer.clientWidth); + } +} + +/** + * Sets up scrollbar event handler. + */ +function setupScrollbarHandler(elements, chartContext, chartInstance) { + elements.scrollbarContainer.addEventListener("scroll", () => { + const scrollLeft = elements.scrollbarContainer.scrollLeft; + const maxScroll = + elements.scrollbarContent.clientWidth - + elements.scrollbarContainer.clientWidth; + const scrollPercentage = maxScroll > 0 ? scrollLeft / maxScroll : 0; + + const windowSize = chartContext.currentWindowSize; + const newPosition = Math.round( + windowSize + scrollPercentage * (chartContext.totalCommits - windowSize), + ); + chartContext.currentPosition = Math.min( + chartContext.totalCommits, + Math.max(windowSize, newPosition), + ); + + updateChartView(elements, chartContext, chartInstance, false); + }); +} + +/** + * Sets up zoom button click handlers. + */ +function setupZoomButtons(elements, chartContext, chartInstance) { + const zoom = (step, direction) => { + const currentWindowSize = chartContext.currentWindowSize; + + let newWindowSize; + if (direction > 0) { + newWindowSize = Math.ceil((currentWindowSize + 1) / step) * step; + } else { + newWindowSize = Math.floor((currentWindowSize - 1) / step) * step; + } + newWindowSize = Math.max( + chartContext.minWindowSize, + Math.min(chartContext.maxWindowSize, newWindowSize), + ); + + const currentStart = chartContext.currentPosition - currentWindowSize; + const currentCenter = currentStart + currentWindowSize / 2; + chartContext.currentWindowSize = newWindowSize; + chartContext.currentPosition = Math.min( + chartContext.totalCommits, + Math.max(newWindowSize, Math.round(currentCenter + newWindowSize / 2)), + ); + + updateChartView(elements, chartContext, chartInstance, true); + }; + + elements.zoomInSmallBtn?.addEventListener("click", () => zoom(50, -1)); + elements.zoomInLargeBtn?.addEventListener("click", () => zoom(500, -1)); + elements.zoomOutSmallBtn?.addEventListener("click", () => zoom(50, 1)); + elements.zoomOutLargeBtn?.addEventListener("click", () => zoom(500, 1)); +} + +/** + * Sets up mouse wheel pan handler. + */ +function setupWheelPanHandler(elements, chartContext, chartInstance) { + elements.chartCanvas.addEventListener("wheel", (e) => { + e.preventDefault(); + + const delta = Math.sign(e.deltaY); + const panAmount = Math.max( + 1, + Math.round(chartContext.currentWindowSize * 0.1), + ); + + chartContext.currentPosition = Math.min( + chartContext.totalCommits, + Math.max( + chartContext.currentWindowSize, + chartContext.currentPosition + delta * panAmount, + ), + ); + + updateChartView(elements, chartContext, chartInstance, true); + }); +} + +/** + * Initializes timeline controls for a chart. + */ +function initializeTimelineControls(chartInstance, chartCommits, prefix) { + const elements = { + scrollbarContainer: document.getElementById(`${prefix}scrollbar-container`), + scrollbarContent: document.getElementById(`${prefix}scrollbar-content`), + controlInfoText: document.getElementById(`${prefix}info`), + chartCanvas: document.getElementById(`${prefix}canvas`), + zoomInSmallBtn: document.getElementById(`${prefix}zoom-in-small`), + zoomInLargeBtn: document.getElementById(`${prefix}zoom-in-large`), + zoomOutSmallBtn: document.getElementById(`${prefix}zoom-out-small`), + zoomOutLargeBtn: document.getElementById(`${prefix}zoom-out-large`), + }; + + const chartContext = createChartContext(chartCommits.length); + + if (chartContext.totalCommits === 0) { + if (elements.scrollbarContainer) + elements.scrollbarContainer.style.display = "none"; + if (elements.controlInfoText) + elements.controlInfoText.textContent = "No data available"; + return; + } + + if (chartContext.totalCommits === 1) { + if (elements.scrollbarContainer) + elements.scrollbarContainer.style.display = "none"; + chartContext.maxWindowSize = 1; + } + + setupScrollbarHandler(elements, chartContext, chartInstance); + setupZoomButtons(elements, chartContext, chartInstance); + setupWheelPanHandler(elements, chartContext, chartInstance); + + updateChartView(elements, chartContext, chartInstance, true); +} + +// ============================================================================ +// Chart Instance Storage +// ============================================================================ + +/** + * Global storage for chart instances. Key is the chart prefix (groupId-chartName-). + */ +const chartInstances = new Map(); + +/** + * Tracks hidden series per group. When a legend item is clicked, all charts in the group update. + * Structure: groupId -> Set + */ +const hiddenSeries = new Map(); + +/** + * Creates a legend click handler that syncs visibility across all charts in a group. + */ +function createGroupLegendClickHandler(groupId, groupConfig) { + return function (e, legendItem, legend) { + const seriesName = legendItem.text; + const hidden = hiddenSeries.get(groupId) || new Set(); + + // Toggle visibility. + if (hidden.has(seriesName)) { + hidden.delete(seriesName); + } else { + hidden.add(seriesName); + } + hiddenSeries.set(groupId, hidden); + + // Update all charts in this group (no animation for performance with many charts). + for (const [prefix, chart] of chartInstances.entries()) { + if (prefix.startsWith(groupId + "-")) { + const datasetIndex = groupConfig.seriesNames.indexOf(seriesName); + if (datasetIndex >= 0) { + chart.setDatasetVisibility(datasetIndex, !hidden.has(seriesName)); + chart.update("none"); + } + } + } + }; +} + +// ============================================================================ +// Main Rendering Functions +// ============================================================================ + +/** + * Creates an empty Chart.js instance (structure only, no data). + * This allows the chart to be visible immediately while data loads. + */ +function createEmptyChart(groupId, chartName, groupConfig) { + const prefix = makeId(groupId, chartName, ""); + const canvas = document.getElementById(`${prefix}canvas`); + if (!canvas) return null; + + const ctx = canvas.getContext("2d"); + const datasets = createDatasets(null, groupConfig); + + const chart = new Chart(ctx, { + type: "line", + data: { + labels: [], + datasets: datasets, + }, + options: { + responsive: true, + maintainAspectRatio: false, + animation: false, + scales: { + x: { display: true }, + y: { + display: true, + title: { display: true, text: CONFIG.yAxisLabel }, + beginAtZero: true, + }, + }, + plugins: { + legend: { position: "top" }, + }, + }, + }); + + chartInstances.set(prefix, chart); + return chart; +} + +/** + * Updates an existing chart with data. + */ +async function renderChart(wasm, groupId, chartName, groupConfig, commits) { + const prefix = makeId(groupId, chartName, ""); + + try { + const chartData = await loadChartData(wasm, groupId, chartName); + const { seriesData, chartCommits } = processChartData( + chartData, + commits, + groupConfig, + ); + + // Store chart data for group summary calculation. + storeChartData(groupId, chartName, seriesData); + + // Get the existing chart instance. + let chartInstance = chartInstances.get(prefix); + + if (chartInstance) { + // Update existing chart with new data. + chartInstance.data.labels = chartCommits.map((c) => c.id.slice(0, 7)); + chartInstance.data.datasets = createDatasets(seriesData, groupConfig); + + // Update options for tooltips, click handling, and linked legend. + chartInstance.options = createChartOptions( + chartCommits, + `${prefix}tooltip`, + groupId, + groupConfig, + ); + + chartInstance.update("none"); + } else { + // Fallback: create new chart if empty one wasn't created. + chartInstance = createChartInstance( + `${prefix}canvas`, + chartCommits, + seriesData, + groupId, + groupConfig, + `${prefix}tooltip`, + ); + if (chartInstance) { + chartInstances.set(prefix, chartInstance); + } + } + + if (chartInstance) { + initializeTimelineControls(chartInstance, chartCommits, prefix); + } + + return true; + } catch (error) { + console.error(`Failed to render chart ${groupId}/${chartName}:`, error); + const infoEl = document.getElementById(`${prefix}info`); + if (infoEl) infoEl.textContent = `Error: ${error.message}`; + return false; + } +} + +/** + * Creates chart placeholders for a group (immediate, no data loading). + */ +function createChartPlaceholders(groupId, groupConfig) { + const chartsContainer = document.getElementById(`${groupId}-charts`); + if (!chartsContainer) return; + + chartsContainer.innerHTML = groupConfig.charts + .map((chartName) => createChartHTML(groupId, chartName)) + .join(""); +} + +/** + * Creates empty Chart.js instances for all charts in a group. + * This makes charts visible immediately (with legend but no data). + */ +function createEmptyCharts(groupId, groupConfig) { + for (const chartName of groupConfig.charts) { + createEmptyChart(groupId, chartName, groupConfig); + } +} + +/** + * Waits one frame (16ms) for the browser to paint. + * + * This is used to yield control back to the browser between chart renders, so each chart appears + * progressively rather than all at once. Skips the delay when the tab is hidden to avoid browser + * throttling (browsers throttle setTimeout to 1s minimum in background tabs). + */ +function waitForPaint() { + if (document.hidden) { + return Promise.resolve(); + } + return new Promise((resolve) => setTimeout(resolve, 16)); +} + +// ============================================================================ +// Main Orchestration +// ============================================================================ + +/** + * Initializes the UI with group containers and empty charts. + * + * This happens before loading any data so users see the UI instantly. + * + * @returns {Array<[string, Object]>} Array of [groupId, groupConfig] entries. + */ +function initializeUI() { + Chart.register(createVerticalLinePlugin()); + + const container = document.getElementById("benchmarks-container"); + if (!container) { + throw new Error("Benchmarks container not found"); + } + + const allGroups = Object.entries(BENCHMARK_GROUPS); + + // Create group containers. + container.innerHTML = allGroups + .map(([groupId, groupConfig]) => createGroupHTML(groupId, groupConfig)) + .join(""); + + // Create chart placeholders and empty Chart.js instances. + for (const [groupId, groupConfig] of allGroups) { + createChartPlaceholders(groupId, groupConfig); + createEmptyCharts(groupId, groupConfig); + } + + setupCollapsibleBenchmarks(); + + return allGroups; +} + +/** + * Renders all charts with data from the server. + * + * @param {Object} wasm - WASM module. + * @param {Array<[string, Object]>} activeGroups - Groups to render. + * @param {Object} summary - Benchmark summary with commits. + * @returns {Promise} Number of successfully loaded charts. + */ +async function renderAllCharts(wasm, activeGroups, summary) { + let loadedCharts = 0; + + for (const [groupId, groupConfig] of activeGroups) { + const statusEl = document.getElementById(`${groupId}-status`); + const groupTotal = groupConfig.charts.length; + let groupLoaded = 0; + + for (const chartName of groupConfig.charts) { + const success = await renderChart( + wasm, + groupId, + chartName, + groupConfig, + summary.commits, + ); + if (success) { + groupLoaded++; + loadedCharts++; + } + + if (statusEl) { + statusEl.textContent = `${groupLoaded}/${groupTotal} charts`; + } + + await waitForPaint(); + } + + renderGroupSummary(groupId, groupConfig); + console.log(`Rendered ${groupLoaded}/${groupTotal} charts in ${groupId}`); + } + + return loadedCharts; +} + +/** + * Main function that orchestrates the benchmark page initialization. + */ +async function main() { + try { + // Initialize UI (instant, no data loading). + const allGroups = initializeUI(); + + const totalCharts = allGroups.reduce((n, [_, c]) => n + c.charts.length, 0); + setStatus(`Loading ${totalCharts} charts...`, "loading"); + + await waitForPaint(); + + // Load data from server. + const wasm = await loadWasmModule(); + const summary = await loadBenchmarkSummary(wasm); + + console.log("Available groups from server:", Object.keys(summary.groups)); + console.log("Configured groups:", Object.keys(BENCHMARK_GROUPS)); + + // Filter to groups that exist on server. + const activeGroups = allGroups.filter(([groupId]) => { + const exists = !!summary.groups[groupId]; + if (!exists) { + console.warn(`Group '${groupId}' not found in server data, skipping`); + } + return exists; + }); + + // Render charts with data. + const loadedCharts = await renderAllCharts(wasm, activeGroups, summary); + + setStatus( + `Loaded ${loadedCharts} charts across ${activeGroups.length} groups`, + "success", + ); + } catch (error) { + console.error("Error:", error); + setStatus(`Error: ${error.message}`, "error"); + } +} + +main(); diff --git a/wasm-test/.gitignore b/wasm-test/.gitignore new file mode 100644 index 00000000000..bcc4bfec848 --- /dev/null +++ b/wasm-test/.gitignore @@ -0,0 +1,6 @@ +# WASM build artifacts. +/target +/pkg + +# Cargo lock file for binary projects. +Cargo.lock diff --git a/wasm-test/Cargo.lock b/wasm-test/Cargo.lock index e3ec17002e9..adf49aaaff8 100644 --- a/wasm-test/Cargo.lock +++ b/wasm-test/Cargo.lock @@ -462,18 +462,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "console" -version = "0.15.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" -dependencies = [ - "encode_unicode", - "libc", - "once_cell", - "windows-sys 0.59.0", -] - [[package]] name = "const-random" version = "0.1.18" @@ -515,16 +503,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "crossbeam-deque" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - [[package]] name = "crossbeam-epoch" version = "0.9.18" @@ -534,15 +512,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "crossbeam-queue" -version = "0.3.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -555,6 +524,16 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "cudarc" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef0cfc5e22a6b6f7d04ee45b0151232ca236ede8ca3534210fd4072bdead0d60" +dependencies = [ + "half", + "libloading", +] + [[package]] name = "dashmap" version = "6.1.0" @@ -586,24 +565,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3a5ccdfd6c5e7e2fea9c5cf256f2a08216047fab19c621c3da64e9ae4a1462d" -[[package]] -name = "dyn-hash" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15401da73a9ed8c80e3b2d4dc05fe10e7b72d7243b9f614e516a44fa99986e88" - [[package]] name = "either" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" -[[package]] -name = "encode_unicode" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" - [[package]] name = "enum-iterator" version = "2.3.0" @@ -624,6 +591,26 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "enum-map" +version = "2.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6866f3bfdf8207509a033af1a75a7b08abda06bbaaeae6669323fd5a097df2e9" +dependencies = [ + "enum-map-derive", +] + +[[package]] +name = "enum-map-derive" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f282cfdfe92516eb26c2af8589c274c7c17681f5ecc03c18255fe741c6aa64eb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -702,9 +689,9 @@ dependencies = [ [[package]] name = "fastlanes" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b79094ac813a000b92adff4231a6d291d063dc6312b3b88c154a5ea0a195d0" +checksum = "414cb755aee48ff7b0907995d2949c68c8c17900970076dff6a808e18e592d71" dependencies = [ "arrayref", "const_for", @@ -752,9 +739,9 @@ dependencies = [ [[package]] name = "fsst-rs" -version = "0.5.4" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab195789b87bb56fce91b3617e44d36dbba68a4c8d736ef48767187932a5161b" +checksum = "561f2458a3407836ab8f1acc9113b8cda91b9d6378ba8dad13b2fe1a1d3af5ce" [[package]] name = "funty" @@ -904,6 +891,8 @@ dependencies = [ "cfg-if", "crunchy", "num-traits", + "rand", + "rand_distr", "zerocopy", ] @@ -1076,17 +1065,6 @@ dependencies = [ "icu_properties", ] -[[package]] -name = "insta" -version = "1.43.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46fdb647ebde000f43b5b53f773c30cf9b0cb4300453208713fa38b2c70935a0" -dependencies = [ - "console", - "once_cell", - "similar", -] - [[package]] name = "inventory" version = "0.3.21" @@ -1159,9 +1137,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.81" +version = "0.3.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" +checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" dependencies = [ "once_cell", "wasm-bindgen", @@ -1208,6 +1186,16 @@ version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + [[package]] name = "libm" version = "0.2.15" @@ -1263,6 +1251,16 @@ version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +[[package]] +name = "minicov" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27fe9f1cc3c22e1687f9446c2083c4c5fc7f0bcf1c7a86bdbded14985895b4b" +dependencies = [ + "cc", + "walkdir", +] + [[package]] name = "miniz_oxide" version = "0.8.9" @@ -1543,26 +1541,6 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" -[[package]] -name = "pin-project" -version = "1.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.106", -] - [[package]] name = "pin-project-lite" version = "0.2.16" @@ -1736,6 +1714,16 @@ dependencies = [ "getrandom 0.3.3", ] +[[package]] +name = "rand_distr" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" +dependencies = [ + "num-traits", + "rand", +] + [[package]] name = "rand_xoshiro" version = "0.6.0" @@ -1823,6 +1811,15 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -1902,12 +1899,6 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" -[[package]] -name = "similar" -version = "2.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" - [[package]] name = "sketches-ddsketch" version = "0.3.0" @@ -2122,6 +2113,8 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" name = "vortex" version = "0.1.0" dependencies = [ + "fastlanes", + "rand", "vortex-alp", "vortex-array", "vortex-btrblocks", @@ -2129,10 +2122,8 @@ dependencies = [ "vortex-bytebool", "vortex-datetime-parts", "vortex-decimal-byte-parts", - "vortex-dict", "vortex-dtype", "vortex-error", - "vortex-expr", "vortex-fastlanes", "vortex-flatbuffers", "vortex-fsst", @@ -2147,6 +2138,7 @@ dependencies = [ "vortex-scalar", "vortex-scan", "vortex-sequence", + "vortex-session", "vortex-sparse", "vortex-utils", "vortex-zigzag", @@ -2187,11 +2179,11 @@ dependencies = [ "bitvec", "cfg-if", "enum-iterator", + "enum-map", "flatbuffers", "futures", "getrandom 0.3.3", "humansize", - "insta", "inventory", "itertools", "log", @@ -2200,7 +2192,7 @@ dependencies = [ "num_enum", "parking_lot", "paste", - "pin-project", + "pin-project-lite", "prost", "rand", "rustc-hash", @@ -2208,20 +2200,24 @@ dependencies = [ "static_assertions", "termtree", "vortex-buffer", + "vortex-compute", "vortex-dtype", "vortex-error", "vortex-flatbuffers", + "vortex-io", "vortex-mask", "vortex-metrics", + "vortex-proto", "vortex-scalar", + "vortex-session", "vortex-utils", + "vortex-vector", ] [[package]] name = "vortex-btrblocks" version = "0.1.0" dependencies = [ - "arrow-buffer", "getrandom 0.3.3", "itertools", "log", @@ -2233,7 +2229,6 @@ dependencies = [ "vortex-buffer", "vortex-datetime-parts", "vortex-decimal-byte-parts", - "vortex-dict", "vortex-dtype", "vortex-error", "vortex-fastlanes", @@ -2252,7 +2247,9 @@ name = "vortex-buffer" version = "0.1.0" dependencies = [ "arrow-buffer", + "bitvec", "bytes", + "cudarc", "itertools", "num-traits", "simdutf8", @@ -2263,7 +2260,6 @@ dependencies = [ name = "vortex-bytebool" version = "0.1.0" dependencies = [ - "arrow-buffer", "num-traits", "vortex-array", "vortex-buffer", @@ -2274,21 +2270,25 @@ dependencies = [ ] [[package]] -name = "vortex-datetime-parts" +name = "vortex-compute" version = "0.1.0" dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-schema", + "log", + "multiversion", "num-traits", - "prost", - "vortex-array", + "paste", "vortex-buffer", "vortex-dtype", "vortex-error", "vortex-mask", - "vortex-scalar", + "vortex-vector", ] [[package]] -name = "vortex-decimal-byte-parts" +name = "vortex-datetime-parts" version = "0.1.0" dependencies = [ "num-traits", @@ -2302,27 +2302,24 @@ dependencies = [ ] [[package]] -name = "vortex-dict" +name = "vortex-decimal-byte-parts" version = "0.1.0" dependencies = [ - "arrow-array", - "arrow-buffer", "num-traits", "prost", - "rustc-hash", "vortex-array", "vortex-buffer", "vortex-dtype", "vortex-error", "vortex-mask", "vortex-scalar", - "vortex-utils", ] [[package]] name = "vortex-dtype" version = "0.1.0" dependencies = [ + "arrow-buffer", "arrow-schema", "flatbuffers", "half", @@ -2352,29 +2349,6 @@ dependencies = [ "url", ] -[[package]] -name = "vortex-expr" -version = "0.1.0" -dependencies = [ - "arcref", - "async-trait", - "dyn-hash", - "futures", - "itertools", - "parking_lot", - "paste", - "prost", - "termtree", - "vortex-array", - "vortex-buffer", - "vortex-dtype", - "vortex-error", - "vortex-mask", - "vortex-proto", - "vortex-scalar", - "vortex-utils", -] - [[package]] name = "vortex-fastlanes" version = "0.1.0" @@ -2387,13 +2361,16 @@ dependencies = [ "log", "num-traits", "prost", + "static_assertions", "vortex-array", "vortex-buffer", + "vortex-compute", "vortex-dtype", "vortex-error", "vortex-mask", "vortex-scalar", "vortex-utils", + "vortex-vector", ] [[package]] @@ -2417,6 +2394,7 @@ dependencies = [ "vortex-error", "vortex-mask", "vortex-scalar", + "vortex-vector", ] [[package]] @@ -2424,6 +2402,7 @@ name = "vortex-io" version = "0.1.0" dependencies = [ "async-compat", + "async-fs", "async-stream", "async-trait", "bytes", @@ -2442,6 +2421,7 @@ dependencies = [ "vortex-buffer", "vortex-error", "vortex-metrics", + "vortex-session", "wasm-bindgen-futures", ] @@ -2471,7 +2451,6 @@ dependencies = [ "async-trait", "flatbuffers", "futures", - "getrandom 0.3.3", "itertools", "kanal", "log", @@ -2484,16 +2463,14 @@ dependencies = [ "pin-project-lite", "prost", "rustc-hash", - "tracing", + "termtree", "uuid", "vortex-array", "vortex-btrblocks", "vortex-buffer", "vortex-decimal-byte-parts", - "vortex-dict", "vortex-dtype", "vortex-error", - "vortex-expr", "vortex-flatbuffers", "vortex-io", "vortex-mask", @@ -2501,6 +2478,7 @@ dependencies = [ "vortex-pco", "vortex-scalar", "vortex-sequence", + "vortex-session", "vortex-utils", ] @@ -2508,8 +2486,8 @@ dependencies = [ name = "vortex-mask" version = "0.1.0" dependencies = [ - "arrow-buffer", "itertools", + "vortex-buffer", "vortex-error", ] @@ -2519,6 +2497,7 @@ version = "0.1.0" dependencies = [ "getrandom 0.3.3", "parking_lot", + "vortex-session", "witchcraft-metrics", ] @@ -2526,14 +2505,17 @@ dependencies = [ name = "vortex-pco" version = "0.1.0" dependencies = [ + "itertools", "pco", "prost", "vortex-array", "vortex-buffer", + "vortex-compute", "vortex-dtype", "vortex-error", "vortex-mask", "vortex-scalar", + "vortex-vector", ] [[package]] @@ -2566,7 +2548,6 @@ name = "vortex-scalar" version = "0.1.0" dependencies = [ "arrow-array", - "arrow-buffer", "bytes", "itertools", "num-traits", @@ -2575,8 +2556,10 @@ dependencies = [ "vortex-buffer", "vortex-dtype", "vortex-error", + "vortex-mask", "vortex-proto", "vortex-utils", + "vortex-vector", ] [[package]] @@ -2586,8 +2569,6 @@ dependencies = [ "arrow-array", "arrow-schema", "bit-vec", - "crossbeam-deque", - "crossbeam-queue", "futures", "itertools", "log", @@ -2597,11 +2578,11 @@ dependencies = [ "vortex-buffer", "vortex-dtype", "vortex-error", - "vortex-expr", "vortex-io", "vortex-layout", "vortex-mask", "vortex-metrics", + "vortex-session", ] [[package]] @@ -2618,6 +2599,16 @@ dependencies = [ "vortex-mask", "vortex-proto", "vortex-scalar", + "vortex-vector", +] + +[[package]] +name = "vortex-session" +version = "0.1.0" +dependencies = [ + "dashmap", + "vortex-error", + "vortex-utils", ] [[package]] @@ -2633,6 +2624,7 @@ dependencies = [ "vortex-error", "vortex-mask", "vortex-scalar", + "vortex-vector", ] [[package]] @@ -2643,10 +2635,23 @@ dependencies = [ "hashbrown 0.16.0", ] +[[package]] +name = "vortex-vector" +version = "0.1.0" +dependencies = [ + "paste", + "static_assertions", + "vortex-buffer", + "vortex-dtype", + "vortex-error", + "vortex-mask", +] + [[package]] name = "vortex-zigzag" version = "0.1.0" dependencies = [ + "itertools", "vortex-array", "vortex-buffer", "vortex-dtype", @@ -2656,6 +2661,16 @@ dependencies = [ "zigzag", ] +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" @@ -2682,9 +2697,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.104" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" +checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" dependencies = [ "cfg-if", "once_cell", @@ -2693,25 +2708,11 @@ dependencies = [ "wasm-bindgen-shared", ] -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.104" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn 2.0.106", - "wasm-bindgen-shared", -] - [[package]] name = "wasm-bindgen-futures" -version = "0.4.54" +version = "0.4.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c" +checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" dependencies = [ "cfg-if", "js-sys", @@ -2722,9 +2723,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.104" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" +checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2732,43 +2733,79 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.104" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" +checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" dependencies = [ + "bumpalo", "proc-macro2", "quote", "syn 2.0.106", - "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.104" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" +checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-bindgen-test" +version = "0.3.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfc379bfb624eb59050b509c13e77b4eb53150c350db69628141abce842f2373" +dependencies = [ + "js-sys", + "minicov", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-bindgen-test-macro", +] + +[[package]] +name = "wasm-bindgen-test-macro" +version = "0.3.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "085b2df989e1e6f9620c1311df6c996e83fe16f57792b272ce1e024ac16a90f1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "wasm-test" version = "0.0.1" dependencies = [ "vortex", + "wasm-bindgen", + "wasm-bindgen-test", + "web-sys", ] [[package]] name = "web-sys" -version = "0.3.81" +version = "0.3.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" +checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" dependencies = [ "js-sys", "wasm-bindgen", ] +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "windows-core" version = "0.62.2" diff --git a/wasm-test/Cargo.toml b/wasm-test/Cargo.toml index b6494bf0e2c..1b3802986e0 100644 --- a/wasm-test/Cargo.toml +++ b/wasm-test/Cargo.toml @@ -1,16 +1,27 @@ [package] name = "wasm-test" -description = "Small integration smoketest for wasm32-unknown-unknown target support" +description = "Integration smoketest for WASM target support (wasm32-unknown-unknown and wasm32-wasip1)" repository = "https://github.com/spiraldb/vortex" authors = ["Vortex Authors "] license = "Apache-2.0" version = "0.0.1" publish = false edition = "2024" -rust-version = "1.86" -#[lib] -#crate-type = ["cdylib"] +[lib] +crate-type = ["cdylib"] + +[[bin]] +name = "wasm-test" +path = "src/main.rs" [dependencies] vortex = { path = "../vortex", default-features = false } +wasm-bindgen = "0.2" + +[dependencies.web-sys] +features = ["console"] +version = "0.3" + +[dev-dependencies] +wasm-bindgen-test = "0.3" diff --git a/wasm-test/README.md b/wasm-test/README.md new file mode 100644 index 00000000000..4fbfd539dc1 --- /dev/null +++ b/wasm-test/README.md @@ -0,0 +1,96 @@ +# Vortex WASM Test + +Integration test for Vortex library compiled to WebAssembly. + +## Building + +### Prerequisites + +1. Install the WASM target: + +```bash +rustup target add wasm32-unknown-unknown +``` + +2. Install wasm-pack: + +```bash +cargo install wasm-pack +``` + +### Build Steps + +1. Navigate to the wasm-test directory: + +```bash +cd wasm-test +``` + +2. Build for web: + +```bash +wasm-pack build --target web +``` + +This creates the `pkg/` directory with JS bindings automatically. + +## Testing + +### In Browser + +1. Start a local web server (required for WASM loading): + +Using Python: + +```bash +python3 -m http.server 8000 +``` + +2. Open your browser to `http://localhost:8000` + +3. Click the test buttons: + - **Test Basic Function** - Tests simple `add()` function. + - **Get Version** - Gets version string. + - **Test Vortex Arrays** - Tests PrimitiveArray, compute operations, and encodings. + - **Test Compression** - Tests BtrBlocksCompressor compression. + - **Test Array Types** - Tests different array types (ConstantArray, StructArray, etc.). + - **Test Compute Operations** - Tests comparison operations (>, >=, ==). + +Console output from the WASM module will be displayed in the output area. + +### Headless Tests (wasm-bindgen) + +Run wasm-bindgen tests in headless Chrome: + +```bash +wasm-pack test --headless --chrome +``` + +Or Firefox: + +```bash +wasm-pack test --headless --firefox +``` + +### WASI Tests (Wasmer) + +1. Install the WASI target and Wasmer: + +```bash +rustup target add wasm32-wasip1 +curl https://get.wasmer.io -sSfL | sh +``` + +2. Build and run: + +```bash +cargo build --target wasm32-wasip1 +wasmer run ./target/wasm32-wasip1/debug/wasm-test.wasm +``` + +## Project Structure + +- `src/lib.rs` - WASM library with wasm-bindgen exports. +- `src/main.rs` - WASI binary for integration testing via Wasmer. +- `index.html` - Browser test page. +- `pkg/` - Generated JS bindings (created by wasm-pack). diff --git a/wasm-test/index.html b/wasm-test/index.html new file mode 100644 index 00000000000..04d11fc4888 --- /dev/null +++ b/wasm-test/index.html @@ -0,0 +1,182 @@ + + + + + Vortex WASM Test + + + +

Vortex WASM Test

+

Test the Vortex library compiled to WebAssembly.

+ +
+ + + + + + +
+ +
+
+ + + + diff --git a/wasm-test/src/lib.rs b/wasm-test/src/lib.rs new file mode 100644 index 00000000000..f457f8708ff --- /dev/null +++ b/wasm-test/src/lib.rs @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use wasm_bindgen::prelude::*; + +// Helper macro for logging to browser console. +macro_rules! log { + ($($t:tt)*) => { + web_sys::console::log_1(&format!($($t)*).into()); + } +} + +#[wasm_bindgen] +pub struct VortexBenchmark { + size: usize, +} + +#[wasm_bindgen] +impl VortexBenchmark { + /// Create a new benchmark instance. + #[wasm_bindgen(constructor)] + pub fn new(size: usize) -> VortexBenchmark { + VortexBenchmark { size } + } + + /// Test Vortex arrays to ensure the library is linked. + pub fn test_vortex(&self) -> Result<(), JsValue> { + use vortex::arrays::PrimitiveArray; + use vortex::buffer::Buffer; + use vortex::validity::Validity; + + // Create a simple integer array. + let data: Vec = (0..self.size as i32).collect(); + let buffer = Buffer::from(data); + let _array = PrimitiveArray::new(buffer, Validity::NonNullable); + + log!("Created Vortex PrimitiveArray with {} elements", self.size); + + // Test compute functions. + use vortex::IntoArray; + use vortex::arrays::ConstantArray; + use vortex::compute::{Operator, compare, take}; + use vortex::scalar::Scalar; + + let data: Vec = vec![1, 2, 3, 4, 5]; + let buffer = Buffer::from(data.clone()); + let array = PrimitiveArray::new(buffer, Validity::NonNullable).into_array(); + + // Create a constant array for comparison. + let threshold_array = ConstantArray::new(Scalar::from(3i32), 5).into_array(); + let _comparison = compare(&array, &threshold_array, Operator::Gt) + .map_err(|e| JsValue::from_str(&e.to_string()))?; + + // Test take operation. + let indices: Vec = vec![0, 2, 4]; + let indices_buffer = Buffer::from(indices); + let indices_array = PrimitiveArray::new(indices_buffer, Validity::NonNullable).into_array(); + let _taken = take(&array, &indices_array).map_err(|e| JsValue::from_str(&e.to_string()))?; + + log!("Tested Vortex compute operations"); + + // Test various encodings exist. + use vortex::encodings; + let _ = std::mem::size_of::(); + let _ = std::mem::size_of::(); + let _ = std::mem::size_of::(); + let _ = std::mem::size_of::(); + + log!("Verified Vortex encodings are included"); + + Ok(()) + } + + /// Test compression and decompression. + pub fn test_compression(&self) -> Result<(), JsValue> { + use vortex::Array; + use vortex::arrays::PrimitiveArray; + use vortex::buffer::buffer; + use vortex::compressor::BtrBlocksCompressor; + use vortex::validity::Validity; + + log!("Testing compression with BtrBlocksCompressor..."); + + // Create an array with repeated values (good for compression). + let array = PrimitiveArray::new(buffer![1i32; 1024], Validity::AllValid).to_array(); + let original_len = array.len(); + + // Compress the array. + let compressed = BtrBlocksCompressor::default() + .compress(&array) + .map_err(|e| JsValue::from_str(&e.to_string()))?; + + log!( + "Compressed array from {} to {} elements", + original_len, + compressed.len() + ); + + Ok(()) + } + + /// Test different array types. + pub fn test_array_types(&self) -> Result<(), JsValue> { + use vortex::IntoArray; + use vortex::arrays::{ConstantArray, PrimitiveArray, StructArray}; + use vortex::buffer::Buffer; + use vortex::scalar::Scalar; + use vortex::validity::Validity; + + log!("Testing different array types..."); + + // Test ConstantArray. + let _const_array = ConstantArray::new(Scalar::from(42i32), 100); + log!("Created ConstantArray with 100 elements of value 42"); + + // Test StructArray. + let field1 = PrimitiveArray::new(Buffer::from(vec![1i32, 2, 3]), Validity::NonNullable); + let field2 = PrimitiveArray::new(Buffer::from(vec![4i32, 5, 6]), Validity::NonNullable); + + let _struct_array = + StructArray::from_fields(&[("a", field1.into_array()), ("b", field2.into_array())]) + .map_err(|e| JsValue::from_str(&e.to_string()))?; + log!("Created StructArray with 2 fields"); + + // Test PrimitiveArray with different types. + let _int_array = + PrimitiveArray::new(Buffer::from(vec![1i64, 2, 3, 4]), Validity::NonNullable); + let _float_array = + PrimitiveArray::new(Buffer::from(vec![1.0f64, 2.0, 3.0]), Validity::NonNullable); + log!("Created PrimitiveArrays with different numeric types"); + + Ok(()) + } + + /// Test more compute operations. + pub fn test_compute_ops(&self) -> Result<(), JsValue> { + use vortex::IntoArray; + use vortex::arrays::{ConstantArray, PrimitiveArray}; + use vortex::buffer::Buffer; + use vortex::compute::{Operator, compare}; + use vortex::scalar::Scalar; + use vortex::validity::Validity; + + log!("Testing additional compute operations..."); + + let data: Vec = vec![10, 20, 30, 40, 50]; + let buffer = Buffer::from(data); + let array = PrimitiveArray::new(buffer, Validity::NonNullable).into_array(); + + // Test comparison operations with a scalar converted to array. + let scalar_array = ConstantArray::new(Scalar::from(25i32), 5).into_array(); + let _gt_result = compare(&array, &scalar_array, Operator::Gt) + .map_err(|e| JsValue::from_str(&e.to_string()))?; + log!("Compared array elements > 25"); + + // Test comparison with another array. + let threshold_array = ConstantArray::new(Scalar::from(30i32), 5).into_array(); + let _comparison = compare(&array, &threshold_array, Operator::Gte) + .map_err(|e| JsValue::from_str(&e.to_string()))?; + log!("Compared array elements >= 30"); + + // Test equality comparison. + let eq_array = ConstantArray::new(Scalar::from(30i32), 5).into_array(); + let _eq_result = compare(&array, &eq_array, Operator::Eq) + .map_err(|e| JsValue::from_str(&e.to_string()))?; + log!("Compared array elements == 30"); + + Ok(()) + } + + /// Get size info. + pub fn get_size(&self) -> usize { + self.size + } +} + +/// Initialize the WASM module. +#[wasm_bindgen(start)] +pub fn init() { + log!("Vortex WASM module initialized"); +} + +/// Get version information. +#[wasm_bindgen] +pub fn get_version() -> String { + format!("vortex-wasm-test v{}", env!("CARGO_PKG_VERSION")) +} + +/// A simple test function to verify WASM is working. +#[wasm_bindgen] +pub fn add(a: i32, b: i32) -> i32 { + a + b +} + +#[cfg(test)] +mod tests { + use wasm_bindgen_test::*; + + wasm_bindgen_test_configure!(run_in_browser); + + use vortex::Array; + use vortex::IntoArray; + use vortex::arrays::{ConstantArray, PrimitiveArray, StructArray}; + use vortex::buffer::{Buffer, buffer}; + use vortex::compressor::BtrBlocksCompressor; + use vortex::compute::{Operator, compare, take}; + use vortex::scalar::Scalar; + use vortex::validity::Validity; + + #[wasm_bindgen_test] + fn test_primitive_array() { + let data: Vec = (0..1000).collect(); + let buffer = Buffer::from(data); + let array = PrimitiveArray::new(buffer, Validity::NonNullable); + assert_eq!(array.len(), 1000); + } + + #[wasm_bindgen_test] + fn test_compute_operations() { + let data: Vec = vec![1, 2, 3, 4, 5]; + let buffer = Buffer::from(data); + let array = PrimitiveArray::new(buffer, Validity::NonNullable).into_array(); + + // Test comparison. + let threshold_array = ConstantArray::new(Scalar::from(3i32), 5).into_array(); + let comparison = compare(&array, &threshold_array, Operator::Gt).expect("compare failed"); + assert_eq!(comparison.len(), 5); + + // Test take. + let indices: Vec = vec![0, 2, 4]; + let indices_buffer = Buffer::from(indices); + let indices_array = + PrimitiveArray::new(indices_buffer, Validity::NonNullable).into_array(); + let taken = take(&array, &indices_array).expect("take failed"); + assert_eq!(taken.len(), 3); + } + + #[wasm_bindgen_test] + fn test_encodings() { + use vortex::encodings; + + // Verify encodings are linked by checking their sizes. + let alp_size = std::mem::size_of::(); + let bitpacked_size = std::mem::size_of::(); + let runend_size = std::mem::size_of::(); + let zigzag_size = std::mem::size_of::(); + + assert!(alp_size > 0); + assert!(bitpacked_size > 0); + assert!(runend_size > 0); + assert!(zigzag_size > 0); + } + + #[wasm_bindgen_test] + fn test_compression() { + let array = PrimitiveArray::new(buffer![1i32; 1024], Validity::AllValid).to_array(); + let original_len = array.len(); + + let compressed = BtrBlocksCompressor::default() + .compress(&array) + .expect("compression failed"); + + assert_eq!(compressed.len(), original_len); + } + + #[wasm_bindgen_test] + fn test_array_types() { + // ConstantArray. + let const_array = ConstantArray::new(Scalar::from(42i32), 100); + assert_eq!(const_array.len(), 100); + + // StructArray. + let field1 = PrimitiveArray::new(Buffer::from(vec![1i32, 2, 3]), Validity::NonNullable); + let field2 = PrimitiveArray::new(Buffer::from(vec![4i32, 5, 6]), Validity::NonNullable); + let struct_array = + StructArray::from_fields(&[("a", field1.into_array()), ("b", field2.into_array())]) + .expect("StructArray creation failed"); + assert_eq!(struct_array.len(), 3); + + // Different numeric types. + let int_array = + PrimitiveArray::new(Buffer::from(vec![1i64, 2, 3, 4]), Validity::NonNullable); + let float_array = + PrimitiveArray::new(Buffer::from(vec![1.0f64, 2.0, 3.0]), Validity::NonNullable); + assert_eq!(int_array.len(), 4); + assert_eq!(float_array.len(), 3); + } +} diff --git a/wasm-test/src/main.rs b/wasm-test/src/main.rs index 8be0ac82570..015f3ab3df4 100644 --- a/wasm-test/src/main.rs +++ b/wasm-test/src/main.rs @@ -1,19 +1,129 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex::array::arrays::PrimitiveArray; -use vortex::array::validity::Validity; -use vortex::array::Array; +//! WASI integration test for Vortex. +//! +//! This binary is compiled to `wasm32-wasip1` and executed via Wasmer to verify that Vortex works +//! correctly in a WASI environment. + +use vortex::Array; +use vortex::IntoArray; +use vortex::arrays::ConstantArray; +use vortex::arrays::PrimitiveArray; +use vortex::arrays::StructArray; +use vortex::buffer::Buffer; use vortex::buffer::buffer; use vortex::compressor::BtrBlocksCompressor; +use vortex::compute::Operator; +use vortex::compute::compare; +use vortex::compute::take; +use vortex::scalar::Scalar; +use vortex::validity::Validity; + +fn main() { + println!("Running Vortex WASI integration tests...\n"); + + test_primitive_array(); + test_compute_operations(); + test_encodings(); + test_compression(); + test_array_types(); + + println!("\nAll WASI integration tests passed!"); +} + +fn test_primitive_array() { + println!("Testing PrimitiveArray creation..."); + + let data: Vec = (0..1000).collect(); + let buffer = Buffer::from(data); + let array = PrimitiveArray::new(buffer, Validity::NonNullable); + + assert_eq!(array.len(), 1000); + println!(" Created PrimitiveArray with {} elements", array.len()); +} + +fn test_compute_operations() { + println!("Testing compute operations..."); + + let data: Vec = vec![1, 2, 3, 4, 5]; + let buffer = Buffer::from(data); + let array = PrimitiveArray::new(buffer, Validity::NonNullable).into_array(); + + // Test comparison. + let threshold_array = ConstantArray::new(Scalar::from(3i32), 5).into_array(); + let comparison = compare(&array, &threshold_array, Operator::Gt).expect("compare failed"); + assert_eq!(comparison.len(), 5); + println!(" Comparison operation succeeded"); + + // Test take. + let indices: Vec = vec![0, 2, 4]; + let indices_buffer = Buffer::from(indices); + let indices_array = PrimitiveArray::new(indices_buffer, Validity::NonNullable).into_array(); + let taken = take(&array, &indices_array).expect("take failed"); + assert_eq!(taken.len(), 3); + println!(" Take operation succeeded"); +} + +fn test_encodings() { + println!("Testing encoding types..."); + + use vortex::encodings; + + // Verify encodings are linked by checking their sizes. + let alp_size = std::mem::size_of::(); + let bitpacked_size = std::mem::size_of::(); + let runend_size = std::mem::size_of::(); + let zigzag_size = std::mem::size_of::(); + + assert!(alp_size > 0); + assert!(bitpacked_size > 0); + assert!(runend_size > 0); + assert!(zigzag_size > 0); + + println!(" ALP, BitPacked, RunEnd, ZigZag encodings are linked"); +} -//use wasm_bindgen::prelude::*; +fn test_compression() { + println!("Testing compression..."); -pub fn main() { - // Extremely simple test of compression/decompression and a few compute functions. + // Create an array with repeated values (good for compression). let array = PrimitiveArray::new(buffer![1i32; 1024], Validity::AllValid).to_array(); + let original_len = array.len(); + + let compressed = BtrBlocksCompressor::default() + .compress(&array) + .expect("compression failed"); + + println!( + " Compressed array: {} -> {} elements", + original_len, + compressed.len() + ); +} + +fn test_array_types() { + println!("Testing array types..."); + + // ConstantArray. + let const_array = ConstantArray::new(Scalar::from(42i32), 100); + assert_eq!(const_array.len(), 100); + println!(" ConstantArray created"); + + // StructArray. + let field1 = PrimitiveArray::new(Buffer::from(vec![1i32, 2, 3]), Validity::NonNullable); + let field2 = PrimitiveArray::new(Buffer::from(vec![4i32, 5, 6]), Validity::NonNullable); + let struct_array = + StructArray::from_fields(&[("a", field1.into_array()), ("b", field2.into_array())]) + .expect("StructArray creation failed"); + assert_eq!(struct_array.len(), 3); + println!(" StructArray created with 2 fields"); - let compressed = BtrBlocksCompressor::default().compress(&array).unwrap(); - println!("Compressed size: {}", compressed.len()); - println!("Tree view: {}", compressed.display_tree()); + // Different numeric types. + let int_array = PrimitiveArray::new(Buffer::from(vec![1i64, 2, 3, 4]), Validity::NonNullable); + let float_array = + PrimitiveArray::new(Buffer::from(vec![1.0f64, 2.0, 3.0]), Validity::NonNullable); + assert_eq!(int_array.len(), 4); + assert_eq!(float_array.len(), 3); + println!(" PrimitiveArrays with i64 and f64 created"); }