From 5268dbc8482a041b53c210f5d151d6eb907cdb47 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Mon, 24 Nov 2025 17:39:28 -0500 Subject: [PATCH 01/30] a bit more interesting wasm demo Signed-off-by: Connor Tsui --- .github/workflows/ci.yml | 20 ++- REUSE.toml | 5 + wasm-test/.gitignore | 6 + wasm-test/Cargo.lock | 361 +++++++++++++++++++++------------------ wasm-test/Cargo.toml | 19 ++- wasm-test/README.md | 96 +++++++++++ wasm-test/index.html | 182 ++++++++++++++++++++ wasm-test/src/lib.rs | 288 +++++++++++++++++++++++++++++++ wasm-test/src/main.rs | 128 +++++++++++++- 9 files changed, 924 insertions(+), 181 deletions(-) create mode 100644 wasm-test/.gitignore create mode 100644 wasm-test/README.md create mode 100644 wasm-test/index.html create mode 100644 wasm-test/src/lib.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f5d35fc42c0..b165069eb8d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -619,15 +619,23 @@ jobs: - uses: ./.github/actions/setup-rust with: repo-token: ${{ secrets.GITHUB_TOKEN }} - toolchain: nightly-2025-06-26 - targets: "wasm32-wasip1" - components: "rust-src" + targets: "wasm32-wasip1,wasm32-unknown-unknown" + - name: Setup Wasmer uses: wasmerio/setup-wasmer@v3.1 - # there is a compiler bug in nightly (but not in nightly-2025-06-26) - - run: cargo +nightly-2025-06-26 -Zbuild-std=panic_abort,std build --target wasm32-wasip1 + - name: Build WASI binary + run: cargo build --target wasm32-wasip1 + working-directory: ./wasm-test + - name: Run WASI test + run: wasmer run ./target/wasm32-wasip1/debug/wasm-test.wasm working-directory: ./wasm-test - - run: wasmer run ./target/wasm32-wasip1/debug/wasm-test.wasm + + # wasm-bindgen-test with headless browser + # wasm-pack automatically installs matching wasm-bindgen-cli from Cargo.lock + - name: Install wasm-pack + run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh + - name: Run wasm-bindgen tests (headless Firefox) + run: wasm-pack test --headless --firefox working-directory: ./wasm-test miri: diff --git a/REUSE.toml b/REUSE.toml index 46787ae62e2..b8bd2c9948b 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -17,6 +17,11 @@ path = "benchmarks-website/**" SPDX-FileCopyrightText = "Copyright the Vortex contributors" SPDX-License-Identifier = "CC-BY-4.0" +[[annotations]] +path = "wasm-test/**" +SPDX-FileCopyrightText = "Copyright the Vortex contributors" +SPDX-License-Identifier = "CC-BY-4.0" + # Golden files are licensed under CC-BY-4.0. [[annotations]] path = "**/goldenfiles/**" diff --git a/wasm-test/.gitignore b/wasm-test/.gitignore new file mode 100644 index 00000000000..bcc4bfec848 --- /dev/null +++ b/wasm-test/.gitignore @@ -0,0 +1,6 @@ +# WASM build artifacts. +/target +/pkg + +# Cargo lock file for binary projects. +Cargo.lock diff --git a/wasm-test/Cargo.lock b/wasm-test/Cargo.lock index e3ec17002e9..adf49aaaff8 100644 --- a/wasm-test/Cargo.lock +++ b/wasm-test/Cargo.lock @@ -462,18 +462,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "console" -version = "0.15.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" -dependencies = [ - "encode_unicode", - "libc", - "once_cell", - "windows-sys 0.59.0", -] - [[package]] name = "const-random" version = "0.1.18" @@ -515,16 +503,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "crossbeam-deque" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - [[package]] name = "crossbeam-epoch" version = "0.9.18" @@ -534,15 +512,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "crossbeam-queue" -version = "0.3.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -555,6 +524,16 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "cudarc" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef0cfc5e22a6b6f7d04ee45b0151232ca236ede8ca3534210fd4072bdead0d60" +dependencies = [ + "half", + "libloading", +] + [[package]] name = "dashmap" version = "6.1.0" @@ -586,24 +565,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3a5ccdfd6c5e7e2fea9c5cf256f2a08216047fab19c621c3da64e9ae4a1462d" -[[package]] -name = "dyn-hash" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15401da73a9ed8c80e3b2d4dc05fe10e7b72d7243b9f614e516a44fa99986e88" - [[package]] name = "either" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" -[[package]] -name = "encode_unicode" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" - [[package]] name = "enum-iterator" version = "2.3.0" @@ -624,6 +591,26 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "enum-map" +version = "2.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6866f3bfdf8207509a033af1a75a7b08abda06bbaaeae6669323fd5a097df2e9" +dependencies = [ + "enum-map-derive", +] + +[[package]] +name = "enum-map-derive" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f282cfdfe92516eb26c2af8589c274c7c17681f5ecc03c18255fe741c6aa64eb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -702,9 +689,9 @@ dependencies = [ [[package]] name = "fastlanes" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b79094ac813a000b92adff4231a6d291d063dc6312b3b88c154a5ea0a195d0" +checksum = "414cb755aee48ff7b0907995d2949c68c8c17900970076dff6a808e18e592d71" dependencies = [ "arrayref", "const_for", @@ -752,9 +739,9 @@ dependencies = [ [[package]] name = "fsst-rs" -version = "0.5.4" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab195789b87bb56fce91b3617e44d36dbba68a4c8d736ef48767187932a5161b" +checksum = "561f2458a3407836ab8f1acc9113b8cda91b9d6378ba8dad13b2fe1a1d3af5ce" [[package]] name = "funty" @@ -904,6 +891,8 @@ dependencies = [ "cfg-if", "crunchy", "num-traits", + "rand", + "rand_distr", "zerocopy", ] @@ -1076,17 +1065,6 @@ dependencies = [ "icu_properties", ] -[[package]] -name = "insta" -version = "1.43.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46fdb647ebde000f43b5b53f773c30cf9b0cb4300453208713fa38b2c70935a0" -dependencies = [ - "console", - "once_cell", - "similar", -] - [[package]] name = "inventory" version = "0.3.21" @@ -1159,9 +1137,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.81" +version = "0.3.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" +checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" dependencies = [ "once_cell", "wasm-bindgen", @@ -1208,6 +1186,16 @@ version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + [[package]] name = "libm" version = "0.2.15" @@ -1263,6 +1251,16 @@ version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +[[package]] +name = "minicov" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27fe9f1cc3c22e1687f9446c2083c4c5fc7f0bcf1c7a86bdbded14985895b4b" +dependencies = [ + "cc", + "walkdir", +] + [[package]] name = "miniz_oxide" version = "0.8.9" @@ -1543,26 +1541,6 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" -[[package]] -name = "pin-project" -version = "1.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.106", -] - [[package]] name = "pin-project-lite" version = "0.2.16" @@ -1736,6 +1714,16 @@ dependencies = [ "getrandom 0.3.3", ] +[[package]] +name = "rand_distr" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" +dependencies = [ + "num-traits", + "rand", +] + [[package]] name = "rand_xoshiro" version = "0.6.0" @@ -1823,6 +1811,15 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -1902,12 +1899,6 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" -[[package]] -name = "similar" -version = "2.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" - [[package]] name = "sketches-ddsketch" version = "0.3.0" @@ -2122,6 +2113,8 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" name = "vortex" version = "0.1.0" dependencies = [ + "fastlanes", + "rand", "vortex-alp", "vortex-array", "vortex-btrblocks", @@ -2129,10 +2122,8 @@ dependencies = [ "vortex-bytebool", "vortex-datetime-parts", "vortex-decimal-byte-parts", - "vortex-dict", "vortex-dtype", "vortex-error", - "vortex-expr", "vortex-fastlanes", "vortex-flatbuffers", "vortex-fsst", @@ -2147,6 +2138,7 @@ dependencies = [ "vortex-scalar", "vortex-scan", "vortex-sequence", + "vortex-session", "vortex-sparse", "vortex-utils", "vortex-zigzag", @@ -2187,11 +2179,11 @@ dependencies = [ "bitvec", "cfg-if", "enum-iterator", + "enum-map", "flatbuffers", "futures", "getrandom 0.3.3", "humansize", - "insta", "inventory", "itertools", "log", @@ -2200,7 +2192,7 @@ dependencies = [ "num_enum", "parking_lot", "paste", - "pin-project", + "pin-project-lite", "prost", "rand", "rustc-hash", @@ -2208,20 +2200,24 @@ dependencies = [ "static_assertions", "termtree", "vortex-buffer", + "vortex-compute", "vortex-dtype", "vortex-error", "vortex-flatbuffers", + "vortex-io", "vortex-mask", "vortex-metrics", + "vortex-proto", "vortex-scalar", + "vortex-session", "vortex-utils", + "vortex-vector", ] [[package]] name = "vortex-btrblocks" version = "0.1.0" dependencies = [ - "arrow-buffer", "getrandom 0.3.3", "itertools", "log", @@ -2233,7 +2229,6 @@ dependencies = [ "vortex-buffer", "vortex-datetime-parts", "vortex-decimal-byte-parts", - "vortex-dict", "vortex-dtype", "vortex-error", "vortex-fastlanes", @@ -2252,7 +2247,9 @@ name = "vortex-buffer" version = "0.1.0" dependencies = [ "arrow-buffer", + "bitvec", "bytes", + "cudarc", "itertools", "num-traits", "simdutf8", @@ -2263,7 +2260,6 @@ dependencies = [ name = "vortex-bytebool" version = "0.1.0" dependencies = [ - "arrow-buffer", "num-traits", "vortex-array", "vortex-buffer", @@ -2274,21 +2270,25 @@ dependencies = [ ] [[package]] -name = "vortex-datetime-parts" +name = "vortex-compute" version = "0.1.0" dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-schema", + "log", + "multiversion", "num-traits", - "prost", - "vortex-array", + "paste", "vortex-buffer", "vortex-dtype", "vortex-error", "vortex-mask", - "vortex-scalar", + "vortex-vector", ] [[package]] -name = "vortex-decimal-byte-parts" +name = "vortex-datetime-parts" version = "0.1.0" dependencies = [ "num-traits", @@ -2302,27 +2302,24 @@ dependencies = [ ] [[package]] -name = "vortex-dict" +name = "vortex-decimal-byte-parts" version = "0.1.0" dependencies = [ - "arrow-array", - "arrow-buffer", "num-traits", "prost", - "rustc-hash", "vortex-array", "vortex-buffer", "vortex-dtype", "vortex-error", "vortex-mask", "vortex-scalar", - "vortex-utils", ] [[package]] name = "vortex-dtype" version = "0.1.0" dependencies = [ + "arrow-buffer", "arrow-schema", "flatbuffers", "half", @@ -2352,29 +2349,6 @@ dependencies = [ "url", ] -[[package]] -name = "vortex-expr" -version = "0.1.0" -dependencies = [ - "arcref", - "async-trait", - "dyn-hash", - "futures", - "itertools", - "parking_lot", - "paste", - "prost", - "termtree", - "vortex-array", - "vortex-buffer", - "vortex-dtype", - "vortex-error", - "vortex-mask", - "vortex-proto", - "vortex-scalar", - "vortex-utils", -] - [[package]] name = "vortex-fastlanes" version = "0.1.0" @@ -2387,13 +2361,16 @@ dependencies = [ "log", "num-traits", "prost", + "static_assertions", "vortex-array", "vortex-buffer", + "vortex-compute", "vortex-dtype", "vortex-error", "vortex-mask", "vortex-scalar", "vortex-utils", + "vortex-vector", ] [[package]] @@ -2417,6 +2394,7 @@ dependencies = [ "vortex-error", "vortex-mask", "vortex-scalar", + "vortex-vector", ] [[package]] @@ -2424,6 +2402,7 @@ name = "vortex-io" version = "0.1.0" dependencies = [ "async-compat", + "async-fs", "async-stream", "async-trait", "bytes", @@ -2442,6 +2421,7 @@ dependencies = [ "vortex-buffer", "vortex-error", "vortex-metrics", + "vortex-session", "wasm-bindgen-futures", ] @@ -2471,7 +2451,6 @@ dependencies = [ "async-trait", "flatbuffers", "futures", - "getrandom 0.3.3", "itertools", "kanal", "log", @@ -2484,16 +2463,14 @@ dependencies = [ "pin-project-lite", "prost", "rustc-hash", - "tracing", + "termtree", "uuid", "vortex-array", "vortex-btrblocks", "vortex-buffer", "vortex-decimal-byte-parts", - "vortex-dict", "vortex-dtype", "vortex-error", - "vortex-expr", "vortex-flatbuffers", "vortex-io", "vortex-mask", @@ -2501,6 +2478,7 @@ dependencies = [ "vortex-pco", "vortex-scalar", "vortex-sequence", + "vortex-session", "vortex-utils", ] @@ -2508,8 +2486,8 @@ dependencies = [ name = "vortex-mask" version = "0.1.0" dependencies = [ - "arrow-buffer", "itertools", + "vortex-buffer", "vortex-error", ] @@ -2519,6 +2497,7 @@ version = "0.1.0" dependencies = [ "getrandom 0.3.3", "parking_lot", + "vortex-session", "witchcraft-metrics", ] @@ -2526,14 +2505,17 @@ dependencies = [ name = "vortex-pco" version = "0.1.0" dependencies = [ + "itertools", "pco", "prost", "vortex-array", "vortex-buffer", + "vortex-compute", "vortex-dtype", "vortex-error", "vortex-mask", "vortex-scalar", + "vortex-vector", ] [[package]] @@ -2566,7 +2548,6 @@ name = "vortex-scalar" version = "0.1.0" dependencies = [ "arrow-array", - "arrow-buffer", "bytes", "itertools", "num-traits", @@ -2575,8 +2556,10 @@ dependencies = [ "vortex-buffer", "vortex-dtype", "vortex-error", + "vortex-mask", "vortex-proto", "vortex-utils", + "vortex-vector", ] [[package]] @@ -2586,8 +2569,6 @@ dependencies = [ "arrow-array", "arrow-schema", "bit-vec", - "crossbeam-deque", - "crossbeam-queue", "futures", "itertools", "log", @@ -2597,11 +2578,11 @@ dependencies = [ "vortex-buffer", "vortex-dtype", "vortex-error", - "vortex-expr", "vortex-io", "vortex-layout", "vortex-mask", "vortex-metrics", + "vortex-session", ] [[package]] @@ -2618,6 +2599,16 @@ dependencies = [ "vortex-mask", "vortex-proto", "vortex-scalar", + "vortex-vector", +] + +[[package]] +name = "vortex-session" +version = "0.1.0" +dependencies = [ + "dashmap", + "vortex-error", + "vortex-utils", ] [[package]] @@ -2633,6 +2624,7 @@ dependencies = [ "vortex-error", "vortex-mask", "vortex-scalar", + "vortex-vector", ] [[package]] @@ -2643,10 +2635,23 @@ dependencies = [ "hashbrown 0.16.0", ] +[[package]] +name = "vortex-vector" +version = "0.1.0" +dependencies = [ + "paste", + "static_assertions", + "vortex-buffer", + "vortex-dtype", + "vortex-error", + "vortex-mask", +] + [[package]] name = "vortex-zigzag" version = "0.1.0" dependencies = [ + "itertools", "vortex-array", "vortex-buffer", "vortex-dtype", @@ -2656,6 +2661,16 @@ dependencies = [ "zigzag", ] +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" @@ -2682,9 +2697,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.104" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" +checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" dependencies = [ "cfg-if", "once_cell", @@ -2693,25 +2708,11 @@ dependencies = [ "wasm-bindgen-shared", ] -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.104" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn 2.0.106", - "wasm-bindgen-shared", -] - [[package]] name = "wasm-bindgen-futures" -version = "0.4.54" +version = "0.4.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c" +checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" dependencies = [ "cfg-if", "js-sys", @@ -2722,9 +2723,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.104" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" +checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2732,43 +2733,79 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.104" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" +checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" dependencies = [ + "bumpalo", "proc-macro2", "quote", "syn 2.0.106", - "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.104" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" +checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-bindgen-test" +version = "0.3.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfc379bfb624eb59050b509c13e77b4eb53150c350db69628141abce842f2373" +dependencies = [ + "js-sys", + "minicov", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-bindgen-test-macro", +] + +[[package]] +name = "wasm-bindgen-test-macro" +version = "0.3.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "085b2df989e1e6f9620c1311df6c996e83fe16f57792b272ce1e024ac16a90f1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "wasm-test" version = "0.0.1" dependencies = [ "vortex", + "wasm-bindgen", + "wasm-bindgen-test", + "web-sys", ] [[package]] name = "web-sys" -version = "0.3.81" +version = "0.3.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" +checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" dependencies = [ "js-sys", "wasm-bindgen", ] +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "windows-core" version = "0.62.2" diff --git a/wasm-test/Cargo.toml b/wasm-test/Cargo.toml index b6494bf0e2c..1b3802986e0 100644 --- a/wasm-test/Cargo.toml +++ b/wasm-test/Cargo.toml @@ -1,16 +1,27 @@ [package] name = "wasm-test" -description = "Small integration smoketest for wasm32-unknown-unknown target support" +description = "Integration smoketest for WASM target support (wasm32-unknown-unknown and wasm32-wasip1)" repository = "https://github.com/spiraldb/vortex" authors = ["Vortex Authors "] license = "Apache-2.0" version = "0.0.1" publish = false edition = "2024" -rust-version = "1.86" -#[lib] -#crate-type = ["cdylib"] +[lib] +crate-type = ["cdylib"] + +[[bin]] +name = "wasm-test" +path = "src/main.rs" [dependencies] vortex = { path = "../vortex", default-features = false } +wasm-bindgen = "0.2" + +[dependencies.web-sys] +features = ["console"] +version = "0.3" + +[dev-dependencies] +wasm-bindgen-test = "0.3" diff --git a/wasm-test/README.md b/wasm-test/README.md new file mode 100644 index 00000000000..4fbfd539dc1 --- /dev/null +++ b/wasm-test/README.md @@ -0,0 +1,96 @@ +# Vortex WASM Test + +Integration test for Vortex library compiled to WebAssembly. + +## Building + +### Prerequisites + +1. Install the WASM target: + +```bash +rustup target add wasm32-unknown-unknown +``` + +2. Install wasm-pack: + +```bash +cargo install wasm-pack +``` + +### Build Steps + +1. Navigate to the wasm-test directory: + +```bash +cd wasm-test +``` + +2. Build for web: + +```bash +wasm-pack build --target web +``` + +This creates the `pkg/` directory with JS bindings automatically. + +## Testing + +### In Browser + +1. Start a local web server (required for WASM loading): + +Using Python: + +```bash +python3 -m http.server 8000 +``` + +2. Open your browser to `http://localhost:8000` + +3. Click the test buttons: + - **Test Basic Function** - Tests simple `add()` function. + - **Get Version** - Gets version string. + - **Test Vortex Arrays** - Tests PrimitiveArray, compute operations, and encodings. + - **Test Compression** - Tests BtrBlocksCompressor compression. + - **Test Array Types** - Tests different array types (ConstantArray, StructArray, etc.). + - **Test Compute Operations** - Tests comparison operations (>, >=, ==). + +Console output from the WASM module will be displayed in the output area. + +### Headless Tests (wasm-bindgen) + +Run wasm-bindgen tests in headless Chrome: + +```bash +wasm-pack test --headless --chrome +``` + +Or Firefox: + +```bash +wasm-pack test --headless --firefox +``` + +### WASI Tests (Wasmer) + +1. Install the WASI target and Wasmer: + +```bash +rustup target add wasm32-wasip1 +curl https://get.wasmer.io -sSfL | sh +``` + +2. Build and run: + +```bash +cargo build --target wasm32-wasip1 +wasmer run ./target/wasm32-wasip1/debug/wasm-test.wasm +``` + +## Project Structure + +- `src/lib.rs` - WASM library with wasm-bindgen exports. +- `src/main.rs` - WASI binary for integration testing via Wasmer. +- `index.html` - Browser test page. +- `pkg/` - Generated JS bindings (created by wasm-pack). diff --git a/wasm-test/index.html b/wasm-test/index.html new file mode 100644 index 00000000000..04d11fc4888 --- /dev/null +++ b/wasm-test/index.html @@ -0,0 +1,182 @@ + + + + + Vortex WASM Test + + + +

Vortex WASM Test

+

Test the Vortex library compiled to WebAssembly.

+ +
+ + + + + + +
+ +
+
+ + + + diff --git a/wasm-test/src/lib.rs b/wasm-test/src/lib.rs new file mode 100644 index 00000000000..f457f8708ff --- /dev/null +++ b/wasm-test/src/lib.rs @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use wasm_bindgen::prelude::*; + +// Helper macro for logging to browser console. +macro_rules! log { + ($($t:tt)*) => { + web_sys::console::log_1(&format!($($t)*).into()); + } +} + +#[wasm_bindgen] +pub struct VortexBenchmark { + size: usize, +} + +#[wasm_bindgen] +impl VortexBenchmark { + /// Create a new benchmark instance. + #[wasm_bindgen(constructor)] + pub fn new(size: usize) -> VortexBenchmark { + VortexBenchmark { size } + } + + /// Test Vortex arrays to ensure the library is linked. + pub fn test_vortex(&self) -> Result<(), JsValue> { + use vortex::arrays::PrimitiveArray; + use vortex::buffer::Buffer; + use vortex::validity::Validity; + + // Create a simple integer array. + let data: Vec = (0..self.size as i32).collect(); + let buffer = Buffer::from(data); + let _array = PrimitiveArray::new(buffer, Validity::NonNullable); + + log!("Created Vortex PrimitiveArray with {} elements", self.size); + + // Test compute functions. + use vortex::IntoArray; + use vortex::arrays::ConstantArray; + use vortex::compute::{Operator, compare, take}; + use vortex::scalar::Scalar; + + let data: Vec = vec![1, 2, 3, 4, 5]; + let buffer = Buffer::from(data.clone()); + let array = PrimitiveArray::new(buffer, Validity::NonNullable).into_array(); + + // Create a constant array for comparison. + let threshold_array = ConstantArray::new(Scalar::from(3i32), 5).into_array(); + let _comparison = compare(&array, &threshold_array, Operator::Gt) + .map_err(|e| JsValue::from_str(&e.to_string()))?; + + // Test take operation. + let indices: Vec = vec![0, 2, 4]; + let indices_buffer = Buffer::from(indices); + let indices_array = PrimitiveArray::new(indices_buffer, Validity::NonNullable).into_array(); + let _taken = take(&array, &indices_array).map_err(|e| JsValue::from_str(&e.to_string()))?; + + log!("Tested Vortex compute operations"); + + // Test various encodings exist. + use vortex::encodings; + let _ = std::mem::size_of::(); + let _ = std::mem::size_of::(); + let _ = std::mem::size_of::(); + let _ = std::mem::size_of::(); + + log!("Verified Vortex encodings are included"); + + Ok(()) + } + + /// Test compression and decompression. + pub fn test_compression(&self) -> Result<(), JsValue> { + use vortex::Array; + use vortex::arrays::PrimitiveArray; + use vortex::buffer::buffer; + use vortex::compressor::BtrBlocksCompressor; + use vortex::validity::Validity; + + log!("Testing compression with BtrBlocksCompressor..."); + + // Create an array with repeated values (good for compression). + let array = PrimitiveArray::new(buffer![1i32; 1024], Validity::AllValid).to_array(); + let original_len = array.len(); + + // Compress the array. + let compressed = BtrBlocksCompressor::default() + .compress(&array) + .map_err(|e| JsValue::from_str(&e.to_string()))?; + + log!( + "Compressed array from {} to {} elements", + original_len, + compressed.len() + ); + + Ok(()) + } + + /// Test different array types. + pub fn test_array_types(&self) -> Result<(), JsValue> { + use vortex::IntoArray; + use vortex::arrays::{ConstantArray, PrimitiveArray, StructArray}; + use vortex::buffer::Buffer; + use vortex::scalar::Scalar; + use vortex::validity::Validity; + + log!("Testing different array types..."); + + // Test ConstantArray. + let _const_array = ConstantArray::new(Scalar::from(42i32), 100); + log!("Created ConstantArray with 100 elements of value 42"); + + // Test StructArray. + let field1 = PrimitiveArray::new(Buffer::from(vec![1i32, 2, 3]), Validity::NonNullable); + let field2 = PrimitiveArray::new(Buffer::from(vec![4i32, 5, 6]), Validity::NonNullable); + + let _struct_array = + StructArray::from_fields(&[("a", field1.into_array()), ("b", field2.into_array())]) + .map_err(|e| JsValue::from_str(&e.to_string()))?; + log!("Created StructArray with 2 fields"); + + // Test PrimitiveArray with different types. + let _int_array = + PrimitiveArray::new(Buffer::from(vec![1i64, 2, 3, 4]), Validity::NonNullable); + let _float_array = + PrimitiveArray::new(Buffer::from(vec![1.0f64, 2.0, 3.0]), Validity::NonNullable); + log!("Created PrimitiveArrays with different numeric types"); + + Ok(()) + } + + /// Test more compute operations. + pub fn test_compute_ops(&self) -> Result<(), JsValue> { + use vortex::IntoArray; + use vortex::arrays::{ConstantArray, PrimitiveArray}; + use vortex::buffer::Buffer; + use vortex::compute::{Operator, compare}; + use vortex::scalar::Scalar; + use vortex::validity::Validity; + + log!("Testing additional compute operations..."); + + let data: Vec = vec![10, 20, 30, 40, 50]; + let buffer = Buffer::from(data); + let array = PrimitiveArray::new(buffer, Validity::NonNullable).into_array(); + + // Test comparison operations with a scalar converted to array. + let scalar_array = ConstantArray::new(Scalar::from(25i32), 5).into_array(); + let _gt_result = compare(&array, &scalar_array, Operator::Gt) + .map_err(|e| JsValue::from_str(&e.to_string()))?; + log!("Compared array elements > 25"); + + // Test comparison with another array. + let threshold_array = ConstantArray::new(Scalar::from(30i32), 5).into_array(); + let _comparison = compare(&array, &threshold_array, Operator::Gte) + .map_err(|e| JsValue::from_str(&e.to_string()))?; + log!("Compared array elements >= 30"); + + // Test equality comparison. + let eq_array = ConstantArray::new(Scalar::from(30i32), 5).into_array(); + let _eq_result = compare(&array, &eq_array, Operator::Eq) + .map_err(|e| JsValue::from_str(&e.to_string()))?; + log!("Compared array elements == 30"); + + Ok(()) + } + + /// Get size info. + pub fn get_size(&self) -> usize { + self.size + } +} + +/// Initialize the WASM module. +#[wasm_bindgen(start)] +pub fn init() { + log!("Vortex WASM module initialized"); +} + +/// Get version information. +#[wasm_bindgen] +pub fn get_version() -> String { + format!("vortex-wasm-test v{}", env!("CARGO_PKG_VERSION")) +} + +/// A simple test function to verify WASM is working. +#[wasm_bindgen] +pub fn add(a: i32, b: i32) -> i32 { + a + b +} + +#[cfg(test)] +mod tests { + use wasm_bindgen_test::*; + + wasm_bindgen_test_configure!(run_in_browser); + + use vortex::Array; + use vortex::IntoArray; + use vortex::arrays::{ConstantArray, PrimitiveArray, StructArray}; + use vortex::buffer::{Buffer, buffer}; + use vortex::compressor::BtrBlocksCompressor; + use vortex::compute::{Operator, compare, take}; + use vortex::scalar::Scalar; + use vortex::validity::Validity; + + #[wasm_bindgen_test] + fn test_primitive_array() { + let data: Vec = (0..1000).collect(); + let buffer = Buffer::from(data); + let array = PrimitiveArray::new(buffer, Validity::NonNullable); + assert_eq!(array.len(), 1000); + } + + #[wasm_bindgen_test] + fn test_compute_operations() { + let data: Vec = vec![1, 2, 3, 4, 5]; + let buffer = Buffer::from(data); + let array = PrimitiveArray::new(buffer, Validity::NonNullable).into_array(); + + // Test comparison. + let threshold_array = ConstantArray::new(Scalar::from(3i32), 5).into_array(); + let comparison = compare(&array, &threshold_array, Operator::Gt).expect("compare failed"); + assert_eq!(comparison.len(), 5); + + // Test take. + let indices: Vec = vec![0, 2, 4]; + let indices_buffer = Buffer::from(indices); + let indices_array = + PrimitiveArray::new(indices_buffer, Validity::NonNullable).into_array(); + let taken = take(&array, &indices_array).expect("take failed"); + assert_eq!(taken.len(), 3); + } + + #[wasm_bindgen_test] + fn test_encodings() { + use vortex::encodings; + + // Verify encodings are linked by checking their sizes. + let alp_size = std::mem::size_of::(); + let bitpacked_size = std::mem::size_of::(); + let runend_size = std::mem::size_of::(); + let zigzag_size = std::mem::size_of::(); + + assert!(alp_size > 0); + assert!(bitpacked_size > 0); + assert!(runend_size > 0); + assert!(zigzag_size > 0); + } + + #[wasm_bindgen_test] + fn test_compression() { + let array = PrimitiveArray::new(buffer![1i32; 1024], Validity::AllValid).to_array(); + let original_len = array.len(); + + let compressed = BtrBlocksCompressor::default() + .compress(&array) + .expect("compression failed"); + + assert_eq!(compressed.len(), original_len); + } + + #[wasm_bindgen_test] + fn test_array_types() { + // ConstantArray. + let const_array = ConstantArray::new(Scalar::from(42i32), 100); + assert_eq!(const_array.len(), 100); + + // StructArray. + let field1 = PrimitiveArray::new(Buffer::from(vec![1i32, 2, 3]), Validity::NonNullable); + let field2 = PrimitiveArray::new(Buffer::from(vec![4i32, 5, 6]), Validity::NonNullable); + let struct_array = + StructArray::from_fields(&[("a", field1.into_array()), ("b", field2.into_array())]) + .expect("StructArray creation failed"); + assert_eq!(struct_array.len(), 3); + + // Different numeric types. + let int_array = + PrimitiveArray::new(Buffer::from(vec![1i64, 2, 3, 4]), Validity::NonNullable); + let float_array = + PrimitiveArray::new(Buffer::from(vec![1.0f64, 2.0, 3.0]), Validity::NonNullable); + assert_eq!(int_array.len(), 4); + assert_eq!(float_array.len(), 3); + } +} diff --git a/wasm-test/src/main.rs b/wasm-test/src/main.rs index 8be0ac82570..015f3ab3df4 100644 --- a/wasm-test/src/main.rs +++ b/wasm-test/src/main.rs @@ -1,19 +1,129 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex::array::arrays::PrimitiveArray; -use vortex::array::validity::Validity; -use vortex::array::Array; +//! WASI integration test for Vortex. +//! +//! This binary is compiled to `wasm32-wasip1` and executed via Wasmer to verify that Vortex works +//! correctly in a WASI environment. + +use vortex::Array; +use vortex::IntoArray; +use vortex::arrays::ConstantArray; +use vortex::arrays::PrimitiveArray; +use vortex::arrays::StructArray; +use vortex::buffer::Buffer; use vortex::buffer::buffer; use vortex::compressor::BtrBlocksCompressor; +use vortex::compute::Operator; +use vortex::compute::compare; +use vortex::compute::take; +use vortex::scalar::Scalar; +use vortex::validity::Validity; + +fn main() { + println!("Running Vortex WASI integration tests...\n"); + + test_primitive_array(); + test_compute_operations(); + test_encodings(); + test_compression(); + test_array_types(); + + println!("\nAll WASI integration tests passed!"); +} + +fn test_primitive_array() { + println!("Testing PrimitiveArray creation..."); + + let data: Vec = (0..1000).collect(); + let buffer = Buffer::from(data); + let array = PrimitiveArray::new(buffer, Validity::NonNullable); + + assert_eq!(array.len(), 1000); + println!(" Created PrimitiveArray with {} elements", array.len()); +} + +fn test_compute_operations() { + println!("Testing compute operations..."); + + let data: Vec = vec![1, 2, 3, 4, 5]; + let buffer = Buffer::from(data); + let array = PrimitiveArray::new(buffer, Validity::NonNullable).into_array(); + + // Test comparison. + let threshold_array = ConstantArray::new(Scalar::from(3i32), 5).into_array(); + let comparison = compare(&array, &threshold_array, Operator::Gt).expect("compare failed"); + assert_eq!(comparison.len(), 5); + println!(" Comparison operation succeeded"); + + // Test take. + let indices: Vec = vec![0, 2, 4]; + let indices_buffer = Buffer::from(indices); + let indices_array = PrimitiveArray::new(indices_buffer, Validity::NonNullable).into_array(); + let taken = take(&array, &indices_array).expect("take failed"); + assert_eq!(taken.len(), 3); + println!(" Take operation succeeded"); +} + +fn test_encodings() { + println!("Testing encoding types..."); + + use vortex::encodings; + + // Verify encodings are linked by checking their sizes. + let alp_size = std::mem::size_of::(); + let bitpacked_size = std::mem::size_of::(); + let runend_size = std::mem::size_of::(); + let zigzag_size = std::mem::size_of::(); + + assert!(alp_size > 0); + assert!(bitpacked_size > 0); + assert!(runend_size > 0); + assert!(zigzag_size > 0); + + println!(" ALP, BitPacked, RunEnd, ZigZag encodings are linked"); +} -//use wasm_bindgen::prelude::*; +fn test_compression() { + println!("Testing compression..."); -pub fn main() { - // Extremely simple test of compression/decompression and a few compute functions. + // Create an array with repeated values (good for compression). let array = PrimitiveArray::new(buffer![1i32; 1024], Validity::AllValid).to_array(); + let original_len = array.len(); + + let compressed = BtrBlocksCompressor::default() + .compress(&array) + .expect("compression failed"); + + println!( + " Compressed array: {} -> {} elements", + original_len, + compressed.len() + ); +} + +fn test_array_types() { + println!("Testing array types..."); + + // ConstantArray. + let const_array = ConstantArray::new(Scalar::from(42i32), 100); + assert_eq!(const_array.len(), 100); + println!(" ConstantArray created"); + + // StructArray. + let field1 = PrimitiveArray::new(Buffer::from(vec![1i32, 2, 3]), Validity::NonNullable); + let field2 = PrimitiveArray::new(Buffer::from(vec![4i32, 5, 6]), Validity::NonNullable); + let struct_array = + StructArray::from_fields(&[("a", field1.into_array()), ("b", field2.into_array())]) + .expect("StructArray creation failed"); + assert_eq!(struct_array.len(), 3); + println!(" StructArray created with 2 fields"); - let compressed = BtrBlocksCompressor::default().compress(&array).unwrap(); - println!("Compressed size: {}", compressed.len()); - println!("Tree view: {}", compressed.display_tree()); + // Different numeric types. + let int_array = PrimitiveArray::new(Buffer::from(vec![1i64, 2, 3, 4]), Validity::NonNullable); + let float_array = + PrimitiveArray::new(Buffer::from(vec![1.0f64, 2.0, 3.0]), Validity::NonNullable); + assert_eq!(int_array.len(), 4); + assert_eq!(float_array.len(), 3); + println!(" PrimitiveArrays with i64 and f64 created"); } From 53aea802071d7340338a8a02266aeffae742dd06 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Mon, 1 Dec 2025 15:38:08 -0500 Subject: [PATCH 02/30] add naive vortex append Signed-off-by: Connor Tsui --- vortex-wasm/Cargo.toml | 40 ++++++++++ vortex-wasm/src/benchmark_website.rs | 109 +++++++++++++++++++++++++++ vortex-wasm/src/bin/append.rs | 70 +++++++++++++++++ vortex-wasm/src/bin/migrate.rs | 104 +++++++++++++++++++++++++ vortex-wasm/src/lib.rs | 4 + 5 files changed, 327 insertions(+) create mode 100644 vortex-wasm/Cargo.toml create mode 100644 vortex-wasm/src/benchmark_website.rs create mode 100644 vortex-wasm/src/bin/append.rs create mode 100644 vortex-wasm/src/bin/migrate.rs create mode 100644 vortex-wasm/src/lib.rs diff --git a/vortex-wasm/Cargo.toml b/vortex-wasm/Cargo.toml new file mode 100644 index 00000000000..09de0017784 --- /dev/null +++ b/vortex-wasm/Cargo.toml @@ -0,0 +1,40 @@ +[package] +name = "vortex-wasm" +description = "WASM bindings for Vortex" +authors.workspace = true +categories.workspace = true +edition.workspace = true +homepage.workspace = true +include.workspace = true +keywords.workspace = true +license.workspace = true +readme.workspace = true +repository.workspace = true +rust-version.workspace = true +version.workspace = true + +[dependencies] +async-fs = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +vortex-array = { workspace = true } +vortex-buffer = { workspace = true } +vortex-dtype = { workspace = true } +vortex-error = { workspace = true } +vortex-file = { workspace = true } +vortex-io = { workspace = true } +vortex-layout = { workspace = true } +vortex-metrics = { workspace = true } +vortex-scalar = { workspace = true } +vortex-session = { workspace = true } + +[[bin]] +name = "migrate" +path = "src/bin/migrate.rs" + +[[bin]] +name = "append" +path = "src/bin/append.rs" + +[lints] +workspace = true diff --git a/vortex-wasm/src/benchmark_website.rs b/vortex-wasm/src/benchmark_website.rs new file mode 100644 index 00000000000..4b3b7d7f551 --- /dev/null +++ b/vortex-wasm/src/benchmark_website.rs @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Utilities for the Vortex benchmark website. + +use std::path::Path; + +use vortex_array::builders::builder_with_capacity; +use vortex_array::expr::session::ExprSession; +use vortex_array::session::ArraySession; +use vortex_array::stream::ArrayStreamExt; +use vortex_error::VortexResult; +use vortex_file::OpenOptionsSessionExt; +use vortex_file::WriteOptionsSessionExt; +use vortex_io::runtime::BlockingRuntime; +use vortex_io::runtime::current::CurrentThreadRuntime; +use vortex_io::session::RuntimeSession; +use vortex_io::session::RuntimeSessionExt; +use vortex_layout::session::LayoutSession; +use vortex_metrics::VortexMetrics; +use vortex_scalar::Scalar; +use vortex_session::VortexSession; + +/// Reads a Vortex file and appends a single scalar entry, writing the result back. +/// +/// This function: +/// 1. Reads the existing Vortex file (using the scalar's dtype) +/// 2. Appends the new scalar to the existing data using a builder +/// 3. Writes the combined data back to the output path +/// +/// The input and output paths can be the same to overwrite the existing file. +/// +/// # Arguments +/// +/// * `input_path` - Path to the existing Vortex file to read. +/// * `output_path` - Path to write the updated Vortex file (can be same as input). +/// * `new_entry` - The scalar to append. Its dtype is used for reading/writing the file. +/// +/// # Returns +/// +/// The total number of entries in the resulting file. +pub fn append_entry( + input_path: impl AsRef, + output_path: impl AsRef, + new_entry: Scalar, +) -> VortexResult { + let runtime = CurrentThreadRuntime::new(); + + let session = VortexSession::empty() + .with::() + .with::() + .with::() + .with::() + .with::() + .with_handle(runtime.handle()); + + vortex_file::register_default_encodings(&session); + + runtime.block_on(naive_append_entry_async( + &session, + input_path.as_ref(), + output_path.as_ref(), + new_entry, + )) +} + +/// SUPER NAIVE append to a Vortex file. +async fn naive_append_entry_async( + session: &VortexSession, + input_path: &Path, + output_path: &Path, + new_entry: Scalar, +) -> VortexResult { + let dtype = new_entry.dtype().clone(); + + // Read the existing file. + let file = session + .open_options() + .with_dtype(dtype.clone()) + .open(input_path) + .await?; + + // Read all existing data. + let existing_array = file.scan()?.into_array_stream()?.read_all().await?; + let existing_len = existing_array.len(); + + // Create a builder and extend with existing data, then append the new entry. + let total_capacity = existing_len + 1; + let mut builder = builder_with_capacity(&dtype, total_capacity); + + // Add existing data. + builder.extend_from_array(&existing_array); + + // Append the new entry. + builder.append_scalar(&new_entry)?; + + let combined = builder.finish(); + let total_len = combined.len(); + + // Write to output file. + let file = async_fs::File::create(output_path).await?; + + let mut writer = session.write_options().writer(file, dtype); + + writer.push(combined).await?; + writer.finish().await?; + + Ok(total_len) +} diff --git a/vortex-wasm/src/bin/append.rs b/vortex-wasm/src/bin/append.rs new file mode 100644 index 00000000000..0fc4b1405fb --- /dev/null +++ b/vortex-wasm/src/bin/append.rs @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Binary to append a single benchmark entry to a Vortex file. + +#![allow(clippy::expect_used)] + +use std::env; + +use vortex_dtype::DType; +use vortex_dtype::FieldNames; +use vortex_dtype::Nullability; +use vortex_dtype::PType; +use vortex_dtype::StructFields; +use vortex_scalar::Scalar; +use vortex_wasm::benchmark_website::append_entry; + +/// Returns the expected DType for the benchmark data file. +/// +/// The schema is a struct with two fields: +/// - `value`: u64 (non-nullable) +/// - `commit_id`: utf8 string (non-nullable) +fn benchmark_dtype() -> DType { + DType::Struct( + StructFields::new( + FieldNames::from(["value", "commit_id"]), + vec![ + DType::Primitive(PType::U64, Nullability::NonNullable), + DType::Utf8(Nullability::NonNullable), + ], + ), + Nullability::NonNullable, + ) +} + +/// Creates a benchmark scalar from a value and commit ID. +fn benchmark_scalar(value: u64, commit_id: &str) -> Scalar { + Scalar::struct_( + benchmark_dtype(), + vec![ + Scalar::primitive(value, Nullability::NonNullable), + Scalar::utf8(commit_id, Nullability::NonNullable), + ], + ) +} + +fn main() { + let args: Vec = env::args().collect(); + + if args.len() < 4 { + eprintln!("Usage: append_entries "); + eprintln!(); + eprintln!("Appends a single benchmark entry to the Vortex file."); + return; + } + + let vortex_path = &args[1]; + let value: u64 = args[2].parse().expect("Failed to parse value as u64"); + let commit_id = &args[3]; + + let scalar = benchmark_scalar(value, commit_id); + + let total = append_entry(vortex_path, vortex_path, scalar) + .expect("Failed to append entry to Vortex file"); + + println!( + "Appended entry (value={}, commit_id={}) to {} (total: {} entries)", + value, commit_id, vortex_path, total + ); +} diff --git a/vortex-wasm/src/bin/migrate.rs b/vortex-wasm/src/bin/migrate.rs new file mode 100644 index 00000000000..58516a8ff66 --- /dev/null +++ b/vortex-wasm/src/bin/migrate.rs @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +#![allow(clippy::expect_used)] + +use std::env; +use std::fs; + +use serde::Deserialize; +use vortex_array::IntoArray; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::arrays::StructArray; +use vortex_array::arrays::VarBinArray; +use vortex_array::expr::session::ExprSession; +use vortex_array::session::ArraySession; +use vortex_array::validity::Validity; +use vortex_buffer::Buffer; +use vortex_dtype::DType; +use vortex_dtype::Nullability; +use vortex_file::WriteOptionsSessionExt; +use vortex_io::runtime::BlockingRuntime; +use vortex_io::runtime::current::CurrentThreadRuntime; +use vortex_io::session::RuntimeSession; +use vortex_io::session::RuntimeSessionExt; +use vortex_layout::session::LayoutSession; +use vortex_metrics::VortexMetrics; +use vortex_session::VortexSession; + +/// Represents a benchmark entry with value and commit ID. +#[derive(Debug, Deserialize)] +struct BenchmarkEntry { + value: u64, + commit_id: String, +} + +fn main() { + let runtime = CurrentThreadRuntime::new(); + + let session = VortexSession::empty() + .with::() + .with::() + .with::() + .with::() + .with::() + .with_handle(runtime.handle()); + + vortex_file::register_default_encodings(&session); + + runtime.block_on(async_main(session)); +} + +async fn async_main(session: VortexSession) { + let args: Vec = env::args().collect(); + let input_path = args + .get(1) + .expect("Usage: migrate "); + let output_path = args + .get(2) + .expect("Usage: migrate "); + + // Parse JSON. + let contents = fs::read_to_string(input_path).expect("Failed to read file"); + let entries: Vec = + serde_json::from_str(&contents).expect("Failed to parse JSON"); + + // Extract values and commit_ids into separate vectors. + let values: Vec = entries.iter().map(|e| e.value).collect(); + let commit_ids: Vec<&str> = entries.iter().map(|e| e.commit_id.as_str()).collect(); + let num_entries = entries.len(); + + // Create primitive array for values. + let values_array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable); + + // Create VarBin array for commit_ids (UTF8 strings). + let commit_ids_array = VarBinArray::from_iter( + commit_ids.into_iter().map(Some), + DType::Utf8(Nullability::NonNullable), + ); + + // Create struct array with both fields. + let struct_array = StructArray::from_fields(&[ + ("value", values_array.into_array()), + ("commit_id", commit_ids_array.into_array()), + ]) + .expect("Failed to create struct array"); + + // Write to Vortex file using push-based API. + let file = async_fs::File::create(output_path) + .await + .expect("Failed to create output file"); + + let mut writer = session + .write_options() + .writer(file, struct_array.dtype().clone()); + + writer + .push(struct_array.into_array()) + .await + .expect("Failed to push array"); + + writer.finish().await.expect("Failed to finish writing"); + + println!("Wrote {} entries to {}", num_entries, output_path); +} diff --git a/vortex-wasm/src/lib.rs b/vortex-wasm/src/lib.rs new file mode 100644 index 00000000000..0e5a76b95be --- /dev/null +++ b/vortex-wasm/src/lib.rs @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +pub mod benchmark_website; From f631c0db83a4bc6d3e15fcfa7225c1f2afc1eb4c Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Mon, 1 Dec 2025 16:59:41 -0500 Subject: [PATCH 03/30] poc rust binary for updating S3 Signed-off-by: Connor Tsui --- .github/workflows/bench.yml | 11 +- .github/workflows/sql-benchmarks.yml | 4 +- Cargo.lock | 542 +++++++++++++++++++++++---- bench-vortex/Cargo.toml | 3 + bench-vortex/src/bin/s3_cat.rs | 180 +++++++++ 5 files changed, 670 insertions(+), 70 deletions(-) create mode 100644 bench-vortex/src/bin/s3_cat.rs diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 46a0ef0c5bb..73fdacc10ae 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -18,6 +18,11 @@ jobs: timeout-minutes: 120 steps: - uses: actions/checkout@v6 + - uses: ./.github/actions/setup-rust + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + - name: Build s3_cat binary + run: cargo build --bin s3_cat --package bench-vortex --release - name: Setup AWS CLI uses: aws-actions/configure-aws-credentials@v5 with: @@ -30,7 +35,7 @@ jobs: sudo apt-get update && sudo apt-get install -y jq bash scripts/commit-json.sh > new-commit.json - bash scripts/cat-s3.sh vortex-benchmark-results-database commits.json new-commit.json + target/release/s3_cat vortex-benchmark-results-database-test commits.json new-commit.json bench: timeout-minutes: 120 @@ -69,7 +74,7 @@ jobs: RUSTFLAGS: "-C target-cpu=native -C force-frame-pointers=yes" # The main difference between this and `bench-pr.yml` is that we add the `lance` feature. run: | - cargo build --bin ${{ matrix.benchmark.id }} --package bench-vortex --profile release_debug --features lance + cargo build --bin ${{ matrix.benchmark.id }} --bin s3_cat --package bench-vortex --profile release_debug --features lance - name: Setup Polar Signals uses: polarsignals/gh-actions-ps-profiling@v0.6.0 @@ -96,7 +101,7 @@ jobs: - name: Upload Benchmark Results shell: bash run: | - bash scripts/cat-s3.sh vortex-benchmark-results-database data.json.gz ${{ matrix.benchmark.id }}.json + target/release_debug/s3_cat vortex-benchmark-results-database-test data.json.gz ${{ matrix.benchmark.id }}.json sql: uses: ./.github/workflows/sql-benchmarks.yml secrets: inherit diff --git a/.github/workflows/sql-benchmarks.yml b/.github/workflows/sql-benchmarks.yml index 949075339ef..63d574a5c6d 100644 --- a/.github/workflows/sql-benchmarks.yml +++ b/.github/workflows/sql-benchmarks.yml @@ -145,7 +145,7 @@ jobs: env: RUSTFLAGS: "-C target-cpu=native -C force-frame-pointers=yes" run: | - cargo build --bin query_bench --package bench-vortex --profile release_debug ${{ matrix.build_args }} + cargo build --bin query_bench --bin s3_cat --package bench-vortex --profile release_debug ${{ matrix.build_args }} - name: Generate data shell: bash @@ -265,4 +265,4 @@ jobs: if: inputs.mode == 'develop' shell: bash run: | - bash scripts/cat-s3.sh vortex-benchmark-results-database data.json.gz results.json + target/release_debug/s3_cat vortex-benchmark-results-database-test data.json.gz results.json diff --git a/Cargo.lock b/Cargo.lock index cf5c3a600f7..4b269171394 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -245,7 +245,7 @@ dependencies = [ "arrow-schema", "arrow-select", "atoi", - "base64", + "base64 0.22.1", "chrono", "comfy-table", "half", @@ -680,6 +680,7 @@ dependencies = [ "aws-credential-types", "aws-sigv4", "aws-smithy-async", + "aws-smithy-eventstream", "aws-smithy-http", "aws-smithy-runtime", "aws-smithy-runtime-api", @@ -695,6 +696,40 @@ dependencies = [ "uuid", ] +[[package]] +name = "aws-sdk-s3" +version = "1.109.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c6d81b75f8ff78882e70c5909804b44553d56136899fb4015a0a68ecc870e0e" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-checksums", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "bytes", + "fastrand", + "hex", + "hmac", + "http 0.2.12", + "http 1.3.1", + "http-body 0.4.6", + "lru", + "percent-encoding", + "regex-lite", + "sha2", + "tracing", + "url", +] + [[package]] name = "aws-sdk-sso" version = "1.86.0" @@ -769,19 +804,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bffc03068fbb9c8dd5ce1c6fb240678a5cffb86fb2b7b1985c999c4b83c8df68" dependencies = [ "aws-credential-types", + "aws-smithy-eventstream", "aws-smithy-http", "aws-smithy-runtime-api", "aws-smithy-types", "bytes", + "crypto-bigint 0.5.5", "form_urlencoded", "hex", "hmac", "http 0.2.12", "http 1.3.1", + "p256", "percent-encoding", + "ring", "sha2", + "subtle", "time", "tracing", + "zeroize", ] [[package]] @@ -795,12 +836,44 @@ dependencies = [ "tokio", ] +[[package]] +name = "aws-smithy-checksums" +version = "0.63.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "165d8583d8d906e2fb5511d29201d447cc710864f075debcdd9c31c265412806" +dependencies = [ + "aws-smithy-http", + "aws-smithy-types", + "bytes", + "crc-fast", + "hex", + "http 0.2.12", + "http-body 0.4.6", + "md-5", + "pin-project-lite", + "sha1", + "sha2", + "tracing", +] + +[[package]] +name = "aws-smithy-eventstream" +version = "0.60.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9656b85088f8d9dc7ad40f9a6c7228e1e8447cdf4b046c87e152e0805dea02fa" +dependencies = [ + "aws-smithy-types", + "bytes", + "crc32fast", +] + [[package]] name = "aws-smithy-http" version = "0.62.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3feafd437c763db26aa04e0cc7591185d0961e64c61885bece0fb9d50ceac671" dependencies = [ + "aws-smithy-eventstream", "aws-smithy-runtime-api", "aws-smithy-types", "bytes", @@ -824,17 +897,23 @@ dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", "aws-smithy-types", - "h2", + "h2 0.3.27", + "h2 0.4.12", + "http 0.2.12", "http 1.3.1", - "hyper", - "hyper-rustls", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper 1.7.0", + "hyper-rustls 0.24.2", + "hyper-rustls 0.27.7", "hyper-util", "pin-project-lite", - "rustls", - "rustls-native-certs", + "rustls 0.21.12", + "rustls 0.23.32", + "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower", "tracing", ] @@ -917,6 +996,7 @@ dependencies = [ "base64-simd", "bytes", "bytes-utils", + "futures-core", "http 0.2.12", "http 1.3.1", "http-body 0.4.6", @@ -929,6 +1009,8 @@ dependencies = [ "ryu", "serde", "time", + "tokio", + "tokio-util", ] [[package]] @@ -965,6 +1047,18 @@ dependencies = [ "tokio", ] +[[package]] +name = "base16ct" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce" + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + [[package]] name = "base64" version = "0.22.1" @@ -997,6 +1091,8 @@ dependencies = [ "arrow-schema", "arrow-select", "async-trait", + "aws-config", + "aws-sdk-s3", "bytes", "bzip2", "clap", @@ -1005,6 +1101,7 @@ dependencies = [ "datafusion-physical-plan", "dirs", "erased-serde", + "flate2", "futures", "glob", "humansize", @@ -1479,7 +1576,7 @@ checksum = "af491d569909a7e4dee0ad7db7f5341fef5c614d5b8ec8cf765732aba3cff681" dependencies = [ "serde", "termcolor", - "unicode-width 0.1.14", + "unicode-width 0.2.0", ] [[package]] @@ -1555,7 +1652,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" dependencies = [ "lazy_static", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1767,6 +1864,19 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +[[package]] +name = "crc-fast" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bf62af4cc77d8fe1c22dde4e721d87f2f54056139d8c412e1366b740305f56f" +dependencies = [ + "crc", + "digest", + "libc", + "rand 0.9.2", + "regex", +] + [[package]] name = "crc32c" version = "0.6.8" @@ -1877,6 +1987,28 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "crypto-bigint" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef2b4b23cddf68b89b8f8069890e8c270d54e2d5fe1b143820234805e4cb17ef" +dependencies = [ + "generic-array", + "rand_core 0.6.4", + "subtle", + "zeroize", +] + +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] + [[package]] name = "crypto-common" version = "0.1.6" @@ -2173,7 +2305,7 @@ dependencies = [ "ahash", "arrow", "arrow-ipc", - "base64", + "base64 0.22.1", "chrono", "half", "hashbrown 0.14.5", @@ -2381,7 +2513,7 @@ checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" dependencies = [ "arrow", "arrow-buffer", - "base64", + "base64 0.22.1", "blake2", "blake3", "chrono", @@ -2718,6 +2850,16 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26bf8fc351c5ed29b5c2f0cbbac1b209b74f60ecd62e675a998df72c49af5204" +[[package]] +name = "der" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1a467a65c5e759bce6e65eaf91cc29f466cdc57cb65777bd646872a8a1fd4de" +dependencies = [ + "const-oid", + "zeroize", +] + [[package]] name = "der" version = "0.7.10" @@ -2801,7 +2943,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -2862,12 +3004,44 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" +[[package]] +name = "ecdsa" +version = "0.14.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413301934810f597c1d19ca71c8710e99a3f1ba28a0d2ebc01551a2daeea3c5c" +dependencies = [ + "der 0.6.1", + "elliptic-curve", + "rfc6979", + "signature 1.6.4", +] + [[package]] name = "either" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "elliptic-curve" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7bb888ab5300a19b8e5bceef25ac745ad065f3c9f7efc6de1b91958110891d3" +dependencies = [ + "base16ct", + "crypto-bigint 0.4.9", + "der 0.6.1", + "digest", + "ff", + "generic-array", + "group", + "pkcs8 0.9.0", + "rand_core 0.6.4", + "sec1", + "subtle", + "zeroize", +] + [[package]] name = "encode_unicode" version = "1.0.0" @@ -2970,7 +3144,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -3070,6 +3244,16 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "ff" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d013fc25338cc558c5c2cfbad646908fb23591e2404481826742b651c9af7160" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] + [[package]] name = "filetime" version = "0.2.26" @@ -3406,6 +3590,36 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9e2d4c0a8296178d8802098410ca05d86b17a10bb5ab559b3fb404c1f948220" +[[package]] +name = "group" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfbfb3a6cfbd390d5c9564ab283a0349b9b9fcd46a706c1eb10e0db70bfbac7" +dependencies = [ + "ff", + "rand_core 0.6.4", + "subtle", +] + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "h2" version = "0.4.12" @@ -3581,6 +3795,12 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + [[package]] name = "humansize" version = "2.1.3" @@ -3596,6 +3816,30 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.7.0" @@ -3606,7 +3850,7 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "h2", + "h2 0.4.12", "http 1.3.1", "http-body 1.0.1", "httparse", @@ -3618,6 +3862,22 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "rustls-native-certs 0.6.3", + "tokio", + "tokio-rustls 0.24.1", +] + [[package]] name = "hyper-rustls" version = "0.27.7" @@ -3625,13 +3885,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ "http 1.3.1", - "hyper", + "hyper 1.7.0", "hyper-util", - "rustls", - "rustls-native-certs", + "rustls 0.23.32", + "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower-service", "webpki-roots", ] @@ -3642,19 +3902,19 @@ version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-channel", "futures-core", "futures-util", "http 1.3.1", "http-body 1.0.1", - "hyper", + "hyper 1.7.0", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2", + "socket2 0.6.0", "system-configuration", "tokio", "tower-service", @@ -3835,7 +4095,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5" dependencies = [ "equivalent", - "hashbrown 0.15.5", + "hashbrown 0.16.1", ] [[package]] @@ -3983,7 +4243,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4082,7 +4342,7 @@ version = "9.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" dependencies = [ - "base64", + "base64 0.22.1", "js-sys", "pem", "ring", @@ -5188,7 +5448,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -5354,7 +5614,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" dependencies = [ "async-trait", - "base64", + "base64 0.22.1", "bytes", "chrono", "form_urlencoded", @@ -5363,7 +5623,7 @@ dependencies = [ "http-body-util", "httparse", "humantime", - "hyper", + "hyper 1.7.0", "itertools 0.14.0", "md-5", "parking_lot", @@ -5372,7 +5632,7 @@ dependencies = [ "rand 0.9.2", "reqwest", "ring", - "rustls-pemfile", + "rustls-pemfile 2.2.0", "serde", "serde_json", "serde_urlencoded", @@ -5429,7 +5689,7 @@ checksum = "ffb9838d0575c6dbaf3fcec7255af8d5771996d4af900bbb6fa9a314dec00a1a" dependencies = [ "anyhow", "backon", - "base64", + "base64 0.22.1", "bytes", "chrono", "crc32c", @@ -5577,6 +5837,17 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "p256" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594" +dependencies = [ + "ecdsa", + "elliptic-curve", + "sha2", +] + [[package]] name = "papergrid" version = "0.17.0" @@ -5659,7 +5930,7 @@ dependencies = [ "arrow-ipc", "arrow-schema", "arrow-select", - "base64", + "base64 0.22.1", "brotli", "bytes", "chrono", @@ -5728,7 +5999,7 @@ version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" dependencies = [ - "base64", + "base64 0.22.1", "serde_core", ] @@ -5842,9 +6113,9 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" dependencies = [ - "der", - "pkcs8", - "spki", + "der 0.7.10", + "pkcs8 0.10.2", + "spki 0.7.3", ] [[package]] @@ -5855,11 +6126,21 @@ checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6" dependencies = [ "aes", "cbc", - "der", + "der 0.7.10", "pbkdf2", "scrypt", "sha2", - "spki", + "spki 0.7.3", +] + +[[package]] +name = "pkcs8" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9eca2c590a5f85da82668fa685c09ce2888b9430e83299debf1f34b65fd4a4ba" +dependencies = [ + "der 0.6.1", + "spki 0.6.0", ] [[package]] @@ -5868,10 +6149,10 @@ version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" dependencies = [ - "der", + "der 0.7.10", "pkcs5", "rand_core 0.6.4", - "spki", + "spki 0.7.3", ] [[package]] @@ -6225,8 +6506,8 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls", - "socket2", + "rustls 0.23.32", + "socket2 0.6.0", "thiserror 2.0.17", "tokio", "tracing", @@ -6245,7 +6526,7 @@ dependencies = [ "rand 0.9.2", "ring", "rustc-hash", - "rustls", + "rustls 0.23.32", "rustls-pki-types", "slab", "thiserror 2.0.17", @@ -6263,9 +6544,9 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2", + "socket2 0.6.0", "tracing", - "windows-sys 0.52.0", + "windows-sys 0.60.2", ] [[package]] @@ -6521,7 +6802,7 @@ checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701" dependencies = [ "anyhow", "async-trait", - "base64", + "base64 0.22.1", "chrono", "form_urlencoded", "getrandom 0.2.16", @@ -6552,18 +6833,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" dependencies = [ "async-compression", - "base64", + "base64 0.22.1", "bytes", "encoding_rs", "futures-channel", "futures-core", "futures-util", - "h2", + "h2 0.4.12", "http 1.3.1", "http-body 1.0.1", "http-body-util", - "hyper", - "hyper-rustls", + "hyper 1.7.0", + "hyper-rustls 0.27.7", "hyper-util", "js-sys", "log", @@ -6572,15 +6853,15 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls", - "rustls-native-certs", + "rustls 0.23.32", + "rustls-native-certs 0.8.1", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tokio-util", "tower", "tower-http", @@ -6593,6 +6874,17 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "rfc6979" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7743f17af12fa0b03b803ba12cd6a8d9483a587e89c69445e3909655c0b9fabb" +dependencies = [ + "crypto-bigint 0.4.9", + "hmac", + "zeroize", +] + [[package]] name = "ring" version = "0.17.14" @@ -6639,11 +6931,11 @@ dependencies = [ "num-integer", "num-traits", "pkcs1", - "pkcs8", + "pkcs8 0.10.2", "rand_core 0.6.4", "sha2", - "signature", - "spki", + "signature 2.2.0", + "spki 0.7.3", "subtle", "zeroize", ] @@ -6739,7 +7031,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.4.15", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -6752,7 +7044,19 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.11.0", - "windows-sys 0.52.0", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", ] [[package]] @@ -6765,11 +7069,23 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki", + "rustls-webpki 0.103.7", "subtle", "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe", + "rustls-pemfile 1.0.4", + "schannel", + "security-framework 2.11.1", +] + [[package]] name = "rustls-native-certs" version = "0.8.1" @@ -6779,7 +7095,16 @@ dependencies = [ "openssl-probe", "rustls-pki-types", "schannel", - "security-framework", + "security-framework 3.5.1", +] + +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64 0.21.7", ] [[package]] @@ -6801,6 +7126,16 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "rustls-webpki" version = "0.103.7" @@ -6881,6 +7216,43 @@ dependencies = [ "sha2", ] +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "sec1" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be24c1842290c45df0a7bf069e0c268a747ad05a192f2fd7dcfdbc1cba40928" +dependencies = [ + "base16ct", + "der 0.6.1", + "generic-array", + "pkcs8 0.9.0", + "subtle", + "zeroize", +] + +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + [[package]] name = "security-framework" version = "3.5.1" @@ -7086,6 +7458,16 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "1.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74233d3b3b2f6d4b006dc19dee745e73e2a6bfb6f93607cd3b02bd5b00797d7c" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + [[package]] name = "signature" version = "2.2.0" @@ -7231,6 +7613,16 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "socket2" version = "0.6.0" @@ -7247,6 +7639,16 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "spki" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67cf02bbac7a337dc36e4f5a693db6c21e7863f45070f7064577eb4367a3212b" +dependencies = [ + "base64ct", + "der 0.6.1", +] + [[package]] name = "spki" version = "0.7.3" @@ -7254,7 +7656,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" dependencies = [ "base64ct", - "der", + "der 0.7.10", ] [[package]] @@ -7486,7 +7888,7 @@ checksum = "64a966cb0e76e311f09cf18507c9af192f15d34886ee43d7ba7c7e3803660c43" dependencies = [ "aho-corasick", "arc-swap", - "base64", + "base64 0.22.1", "bitpacking", "bon", "byteorder", @@ -7663,7 +8065,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix 1.1.2", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -7858,7 +8260,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.6.0", "tokio-macros", "windows-sys 0.61.2", ] @@ -7874,13 +8276,23 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls", + "rustls 0.23.32", "tokio", ] @@ -7966,7 +8378,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203" dependencies = [ "async-trait", - "base64", + "base64 0.22.1", "bytes", "http 1.3.1", "http-body 1.0.1", @@ -9355,7 +9767,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] diff --git a/bench-vortex/Cargo.toml b/bench-vortex/Cargo.toml index 4bab8b9c093..1e0dfa49f0e 100644 --- a/bench-vortex/Cargo.toml +++ b/bench-vortex/Cargo.toml @@ -29,6 +29,8 @@ arrow-cast = { workspace = true } arrow-schema = { workspace = true } arrow-select = { workspace = true } async-trait = { workspace = true } +aws-config = "1" +aws-sdk-s3 = "1" bytes = { workspace = true } bzip2 = { workspace = true } clap = { workspace = true, features = ["derive"] } @@ -42,6 +44,7 @@ datafusion-common = { workspace = true } datafusion-physical-plan = { workspace = true } dirs = { workspace = true } erased-serde = { workspace = true } +flate2 = "1" futures = { workspace = true } glob = { workspace = true } humansize = { workspace = true } diff --git a/bench-vortex/src/bin/s3_cat.rs b/bench-vortex/src/bin/s3_cat.rs new file mode 100644 index 00000000000..ea79aff44d0 --- /dev/null +++ b/bench-vortex/src/bin/s3_cat.rs @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Appends a local file to an S3 object using optimistic concurrency control via ETags. +//! +//! This binary is a Rust port of `scripts/cat-s3.sh` and handles concurrent appends to S3 objects +//! by using conditional requests with ETags. If the object has been modified by another process +//! between read and write, the operation is retried. + +use std::io::Read; +use std::io::Write; +use std::path::PathBuf; +use std::time::Duration; + +use anyhow::Context; +use anyhow::Result; +use anyhow::bail; +use aws_sdk_s3::Client; +use aws_sdk_s3::error::ProvideErrorMetadata; +use aws_sdk_s3::error::SdkError; +use aws_sdk_s3::primitives::ByteStream; +use clap::Parser; +use flate2::Compression; +use flate2::read::GzDecoder; +use flate2::write::GzEncoder; + +const MAX_RETRIES: u32 = 5; +const RETRY_DELAY: Duration = Duration::from_millis(100); + +#[derive(Parser, Debug)] +#[command( + name = "s3_cat", + about = "Append a local file to an S3 object with optimistic concurrency control" +)] +struct Args { + /// S3 bucket name. + bucket: String, + + /// S3 object key. + key: String, + + /// Path to the local file to append. + local_file: PathBuf, +} + +#[tokio::main] +async fn main() -> Result<()> { + let args = Args::parse(); + + let config = aws_config::load_defaults(aws_config::BehaviorVersion::latest()).await; + let client = Client::new(&config); + + let local_content = + std::fs::read(&args.local_file).context("Failed to read local file to append")?; + + let is_gzipped = args.key.ends_with(".gz"); + + for attempt in 0..MAX_RETRIES { + match try_append(&client, &args.bucket, &args.key, &local_content, is_gzipped).await { + Ok(()) => { + println!("File updated and uploaded successfully."); + return Ok(()); + } + Err(AppendError::EtagMismatch) => { + println!("ETag mismatch on attempt {}. Retrying...", attempt + 1); + tokio::time::sleep(RETRY_DELAY).await; + } + Err(AppendError::Other(e)) => { + return Err(e); + } + } + } + + bail!("Too many failures: {MAX_RETRIES}"); +} + +enum AppendError { + EtagMismatch, + Other(anyhow::Error), +} + +impl From for AppendError { + fn from(e: anyhow::Error) -> Self { + AppendError::Other(e) + } +} + +async fn try_append( + client: &Client, + bucket: &str, + key: &str, + local_content: &[u8], + is_gzipped: bool, +) -> Result<(), AppendError> { + // Get current ETag. + let head = client + .head_object() + .bucket(bucket) + .key(key) + .send() + .await + .context("Failed to get object metadata")?; + + let etag = head + .e_tag() + .context("No ETag returned from head_object")? + .to_string(); + + // Download with if-match. + let get_result = client + .get_object() + .bucket(bucket) + .key(key) + .if_match(&etag) + .send() + .await; + + let get_output = match get_result { + Ok(output) => output, + Err(SdkError::ServiceError(err)) if err.err().code() == Some("PreconditionFailed") => { + return Err(AppendError::EtagMismatch); + } + Err(e) => { + return Err(AppendError::Other( + anyhow::Error::new(e).context("Failed to download object"), + )); + } + }; + + let existing_bytes = get_output + .body + .collect() + .await + .context("Failed to read object body")? + .into_bytes(); + + // Concatenate contents. + let new_content = if is_gzipped { + // Decompress existing content. + let mut decoder = GzDecoder::new(&existing_bytes[..]); + let mut decompressed = Vec::new(); + decoder + .read_to_end(&mut decompressed) + .context("Failed to decompress existing content")?; + + // Append new content. + decompressed.extend_from_slice(local_content); + + // Recompress. + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + encoder + .write_all(&decompressed) + .context("Failed to compress concatenated content")?; + encoder.finish().context("Failed to finish compression")? + } else { + let mut combined = existing_bytes.to_vec(); + combined.extend_from_slice(local_content); + combined + }; + + // Upload with if-match. + let put_result = client + .put_object() + .bucket(bucket) + .key(key) + .if_match(&etag) + .body(ByteStream::from(new_content)) + .send() + .await; + + match put_result { + Ok(_) => Ok(()), + Err(SdkError::ServiceError(err)) if err.err().code() == Some("PreconditionFailed") => { + Err(AppendError::EtagMismatch) + } + Err(e) => Err(AppendError::Other( + anyhow::Error::new(e).context("Failed to upload object"), + )), + } +} From 8ac29c5806307406bbb845398c1a4cdf00d3d8b9 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Mon, 1 Dec 2025 18:19:05 -0500 Subject: [PATCH 04/30] clean up Signed-off-by: Connor Tsui --- Cargo.lock | 20 +++ Cargo.toml | 2 + vortex-file/Cargo.toml | 1 + vortex-file/src/lib.rs | 2 + vortex-file/src/update.rs | 176 +++++++++++++++++++++++++++ vortex-wasm/src/benchmark_website.rs | 109 ----------------- vortex-wasm/src/bin/append.rs | 50 +++++++- vortex-wasm/src/lib.rs | 2 - 8 files changed, 250 insertions(+), 112 deletions(-) create mode 100644 vortex-file/src/update.rs delete mode 100644 vortex-wasm/src/benchmark_website.rs diff --git a/Cargo.lock b/Cargo.lock index 4b269171394..ee8301efbdb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9141,6 +9141,7 @@ dependencies = [ name = "vortex-file" version = "0.1.0" dependencies = [ + "async-fs", "async-trait", "bytes", "flatbuffers", @@ -9571,6 +9572,25 @@ dependencies = [ "vortex-mask", ] +[[package]] +name = "vortex-wasm" +version = "0.1.0" +dependencies = [ + "async-fs", + "serde", + "serde_json", + "vortex-array", + "vortex-buffer", + "vortex-dtype", + "vortex-error", + "vortex-file", + "vortex-io", + "vortex-layout", + "vortex-metrics", + "vortex-scalar", + "vortex-session", +] + [[package]] name = "vortex-zigzag" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 7a676a4006e..b5adc580779 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ members = [ "vortex-tui", "vortex-utils", "vortex-vector", + "vortex-wasm", "xtask", ] exclude = ["java/testfiles", "wasm-test"] @@ -246,6 +247,7 @@ vortex-sparse = { version = "0.1.0", path = "./encodings/sparse", default-featur vortex-tui = { version = "0.1.0", path = "./vortex-tui", default-features = false } vortex-utils = { version = "0.1.0", path = "./vortex-utils", default-features = false } vortex-vector = { version = "0.1.0", path = "./vortex-vector", default-features = false } +vortex-wasm = { version = "0.1.0", path = "./vortex-wasm", default-features = false } vortex-zigzag = { version = "0.1.0", path = "./encodings/zigzag", default-features = false } vortex-zstd = { version = "0.1.0", path = "./encodings/zstd", default-features = false } # END crates published by this project diff --git a/vortex-file/Cargo.toml b/vortex-file/Cargo.toml index 032789211db..11c5c9e7b6f 100644 --- a/vortex-file/Cargo.toml +++ b/vortex-file/Cargo.toml @@ -17,6 +17,7 @@ version = { workspace = true } all-features = true [dependencies] +async-fs = { workspace = true } async-trait = { workspace = true } bytes = { workspace = true } flatbuffers = { workspace = true } diff --git a/vortex-file/src/lib.rs b/vortex-file/src/lib.rs index 8fd03b26470..eba5571aef8 100644 --- a/vortex-file/src/lib.rs +++ b/vortex-file/src/lib.rs @@ -99,6 +99,7 @@ pub mod segments; mod strategy; #[cfg(test)] mod tests; +mod update; mod writer; pub use file::*; @@ -106,6 +107,7 @@ pub use footer::*; pub use forever_constant::*; pub use open::*; pub use strategy::*; +pub use update::*; use vortex_alp::ALPRDVTable; use vortex_alp::ALPVTable; use vortex_array::arrays::DictVTable; diff --git a/vortex-file/src/update.rs b/vortex-file/src/update.rs new file mode 100644 index 00000000000..baa71fc2281 --- /dev/null +++ b/vortex-file/src/update.rs @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Update operations for Vortex files. +//! +//! This module provides functions to read a Vortex file, apply a transformation, and write the +//! result back to a file. The update operation uses atomic file replacement for safety. + +use std::future::Future; +use std::path::Path; + +use vortex_array::ArrayRef; +use vortex_array::expr::session::ExprSession; +use vortex_array::session::ArraySession; +use vortex_array::stream::ArrayStreamExt; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_io::runtime::BlockingRuntime; +use vortex_io::runtime::current::CurrentThreadRuntime; +use vortex_io::session::RuntimeSession; +use vortex_io::session::RuntimeSessionExt; +use vortex_layout::session::LayoutSession; +use vortex_metrics::VortexMetrics; +use vortex_session::VortexSession; + +use crate::OpenOptionsSessionExt; +use crate::WriteOptionsSessionExt; +use crate::WriteSummary; +use crate::register_default_encodings; + +/// Updates a Vortex file by reading it, applying a transformation, and writing the result. +/// +/// This is a blocking convenience wrapper around [`update_file_async`]. It creates a new session +/// with default encodings and a current-thread runtime. +/// +/// # Arguments +/// +/// * `input_path` - Path to the existing Vortex file to read. +/// * `output_path` - Path to write the updated Vortex file. Can be the same as input. +/// * `update_fn` - An async function that takes the file's array data and returns the updated +/// array. The returned array must have the same dtype as the input. +/// +/// # Returns +/// +/// A [`WriteSummary`] containing information about the written file. +/// +/// # Errors +/// +/// Returns an error if: +/// - The input file cannot be read. +/// - The update function returns an error. +/// - The update function returns an array with a different dtype. +/// - The output file cannot be written. +/// +/// # Atomic Write Guarantee +/// +/// The write operation uses a temporary file and atomic rename to ensure that the output file is +/// never left in a corrupted state, even if the process crashes during the write. +pub fn update_file( + input_path: impl AsRef, + output_path: impl AsRef, + update_fn: F, +) -> VortexResult +where + F: FnOnce(ArrayRef) -> Fut, + Fut: Future>, +{ + let runtime = CurrentThreadRuntime::new(); + + let session = VortexSession::empty() + .with::() + .with::() + .with::() + .with::() + .with::() + .with_handle(runtime.handle()); + + register_default_encodings(&session); + + runtime.block_on(update_file_async( + &session, + input_path.as_ref(), + output_path.as_ref(), + update_fn, + )) +} + +/// Updates a Vortex file asynchronously by reading it, applying a transformation, and writing the +/// result. +/// +/// This function: +/// 1. Reads the existing Vortex file into memory. +/// 2. Calls the update function with the array data. +/// 3. Validates the returned array has the same dtype. +/// 4. Writes the updated data to a temporary file. +/// 5. Atomically renames the temporary file to the output path. +/// +/// # Arguments +/// +/// * `session` - The Vortex session to use for reading and writing. +/// * `input_path` - Path to the existing Vortex file to read. +/// * `output_path` - Path to write the updated Vortex file. Can be the same as input. +/// * `update_fn` - An async function that takes the file's array data and returns the updated +/// array. The returned array must have the same dtype as the input. +/// +/// # Returns +/// +/// A [`WriteSummary`] containing information about the written file. +/// +/// # Errors +/// +/// Returns an error if: +/// - The input file cannot be read. +/// - The update function returns an error. +/// - The update function returns an array with a different dtype. +/// - The output file cannot be written. +pub async fn update_file_async( + session: &VortexSession, + input_path: impl AsRef, + output_path: impl AsRef, + update_fn: F, +) -> VortexResult +where + F: FnOnce(ArrayRef) -> Fut, + Fut: Future>, +{ + let input_path = input_path.as_ref(); + let output_path = output_path.as_ref(); + + // Read the existing file. + let file = session.open_options().open(input_path).await?; + let original_dtype = file.dtype().clone(); + + // Read all existing data into memory. + let existing_array = file.scan()?.into_array_stream()?.read_all().await?; + + // Apply the user's update function. + let updated_array = update_fn(existing_array).await?; + + // Validate that the dtype matches. + if updated_array.dtype() != &original_dtype { + vortex_bail!( + "Update function changed dtype from {} to {}. \ + The updated array must have the same dtype as the input file.", + original_dtype, + updated_array.dtype() + ); + } + + // Generate a temporary file path in the same directory as output. + // This ensures the rename will be atomic (same filesystem). + let temp_path = generate_temp_path(output_path); + + // Write to the temporary file. + let temp_file = async_fs::File::create(&temp_path).await?; + let mut writer = session.write_options().writer(temp_file, original_dtype); + writer.push(updated_array).await?; + let summary = writer.finish().await?; + + // Atomically rename the temp file to the output path. + async_fs::rename(&temp_path, output_path).await?; + + Ok(summary) +} + +/// Generates a temporary file path in the same directory as the target path. +fn generate_temp_path(target: &Path) -> std::path::PathBuf { + let parent = target.parent().unwrap_or_else(|| Path::new(".")); + let file_name = target + .file_name() + .map(|s| s.to_string_lossy()) + .unwrap_or_else(|| "file".into()); + + let temp_name = format!(".{}.{}.tmp", file_name, uuid::Uuid::new_v4()); + parent.join(temp_name) +} diff --git a/vortex-wasm/src/benchmark_website.rs b/vortex-wasm/src/benchmark_website.rs deleted file mode 100644 index 4b3b7d7f551..00000000000 --- a/vortex-wasm/src/benchmark_website.rs +++ /dev/null @@ -1,109 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -//! Utilities for the Vortex benchmark website. - -use std::path::Path; - -use vortex_array::builders::builder_with_capacity; -use vortex_array::expr::session::ExprSession; -use vortex_array::session::ArraySession; -use vortex_array::stream::ArrayStreamExt; -use vortex_error::VortexResult; -use vortex_file::OpenOptionsSessionExt; -use vortex_file::WriteOptionsSessionExt; -use vortex_io::runtime::BlockingRuntime; -use vortex_io::runtime::current::CurrentThreadRuntime; -use vortex_io::session::RuntimeSession; -use vortex_io::session::RuntimeSessionExt; -use vortex_layout::session::LayoutSession; -use vortex_metrics::VortexMetrics; -use vortex_scalar::Scalar; -use vortex_session::VortexSession; - -/// Reads a Vortex file and appends a single scalar entry, writing the result back. -/// -/// This function: -/// 1. Reads the existing Vortex file (using the scalar's dtype) -/// 2. Appends the new scalar to the existing data using a builder -/// 3. Writes the combined data back to the output path -/// -/// The input and output paths can be the same to overwrite the existing file. -/// -/// # Arguments -/// -/// * `input_path` - Path to the existing Vortex file to read. -/// * `output_path` - Path to write the updated Vortex file (can be same as input). -/// * `new_entry` - The scalar to append. Its dtype is used for reading/writing the file. -/// -/// # Returns -/// -/// The total number of entries in the resulting file. -pub fn append_entry( - input_path: impl AsRef, - output_path: impl AsRef, - new_entry: Scalar, -) -> VortexResult { - let runtime = CurrentThreadRuntime::new(); - - let session = VortexSession::empty() - .with::() - .with::() - .with::() - .with::() - .with::() - .with_handle(runtime.handle()); - - vortex_file::register_default_encodings(&session); - - runtime.block_on(naive_append_entry_async( - &session, - input_path.as_ref(), - output_path.as_ref(), - new_entry, - )) -} - -/// SUPER NAIVE append to a Vortex file. -async fn naive_append_entry_async( - session: &VortexSession, - input_path: &Path, - output_path: &Path, - new_entry: Scalar, -) -> VortexResult { - let dtype = new_entry.dtype().clone(); - - // Read the existing file. - let file = session - .open_options() - .with_dtype(dtype.clone()) - .open(input_path) - .await?; - - // Read all existing data. - let existing_array = file.scan()?.into_array_stream()?.read_all().await?; - let existing_len = existing_array.len(); - - // Create a builder and extend with existing data, then append the new entry. - let total_capacity = existing_len + 1; - let mut builder = builder_with_capacity(&dtype, total_capacity); - - // Add existing data. - builder.extend_from_array(&existing_array); - - // Append the new entry. - builder.append_scalar(&new_entry)?; - - let combined = builder.finish(); - let total_len = combined.len(); - - // Write to output file. - let file = async_fs::File::create(output_path).await?; - - let mut writer = session.write_options().writer(file, dtype); - - writer.push(combined).await?; - writer.finish().await?; - - Ok(total_len) -} diff --git a/vortex-wasm/src/bin/append.rs b/vortex-wasm/src/bin/append.rs index 0fc4b1405fb..35fa7bc2b8a 100644 --- a/vortex-wasm/src/bin/append.rs +++ b/vortex-wasm/src/bin/append.rs @@ -6,14 +6,17 @@ #![allow(clippy::expect_used)] use std::env; +use std::path::Path; +use vortex_array::builders::builder_with_capacity; use vortex_dtype::DType; use vortex_dtype::FieldNames; use vortex_dtype::Nullability; use vortex_dtype::PType; use vortex_dtype::StructFields; +use vortex_error::VortexResult; +use vortex_file::update_file; use vortex_scalar::Scalar; -use vortex_wasm::benchmark_website::append_entry; /// Returns the expected DType for the benchmark data file. /// @@ -68,3 +71,48 @@ fn main() { value, commit_id, vortex_path, total ); } + +/// Reads a Vortex file and appends a single scalar entry, writing the result back. +/// +/// This function: +/// 1. Reads the existing Vortex file (using the scalar's dtype) +/// 2. Appends the new scalar to the existing data using a builder +/// 3. Writes the combined data back to the output path +/// +/// The input and output paths can be the same to overwrite the existing file. +/// +/// # Arguments +/// +/// * `input_path` - Path to the existing Vortex file to read. +/// * `output_path` - Path to write the updated Vortex file (can be same as input). +/// * `new_entry` - The scalar to append. Its dtype is used for reading/writing the file. +/// +/// # Returns +/// +/// The total number of entries in the resulting file. +pub fn append_entry( + input_path: impl AsRef, + output_path: impl AsRef, + new_entry: Scalar, +) -> VortexResult { + let dtype = new_entry.dtype().clone(); + + let summary = update_file(input_path, output_path, |existing_array| async move { + let existing_len = existing_array.len(); + + // Create a builder and extend with existing data, then append the new entry. + let total_capacity = existing_len + 1; + let mut builder = builder_with_capacity(&dtype, total_capacity); + + // Add existing data. + builder.extend_from_array(&existing_array); + + // Append the new entry. + builder.append_scalar(&new_entry)?; + + Ok(builder.finish()) + })?; + + #[expect(clippy::cast_possible_truncation)] + Ok(summary.row_count() as usize) +} diff --git a/vortex-wasm/src/lib.rs b/vortex-wasm/src/lib.rs index 0e5a76b95be..0d735177e5d 100644 --- a/vortex-wasm/src/lib.rs +++ b/vortex-wasm/src/lib.rs @@ -1,4 +1,2 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors - -pub mod benchmark_website; From dd78a93ca2b5381f54ee8fcfc080cc4256524d5a Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 2 Dec 2025 14:44:32 -0500 Subject: [PATCH 05/30] add read and write bindings for benchmark entries Signed-off-by: Connor Tsui --- Cargo.lock | 49 +++- Cargo.toml | 2 + bench-vortex/Cargo.toml | 2 + bench-vortex/src/bin/migrate_random_access.rs | 172 +++++++++++ bench-vortex/src/bin/test_s3_read.rs | 74 +++++ bench-vortex/src/bin/test_s3_update.rs | 277 ++++++++++++++++++ bench-vortex/src/lib.rs | 1 + bench-vortex/src/website/entry.rs | 85 ++++++ bench-vortex/src/website/mod.rs | 7 + bench-vortex/src/website/names.rs | 15 + bench-vortex/src/website/read_s3.rs | 137 +++++++++ bench-vortex/src/website/update_s3.rs | 218 ++++++++++++++ plan.md | 81 +++++ 13 files changed, 1118 insertions(+), 2 deletions(-) create mode 100644 bench-vortex/src/bin/migrate_random_access.rs create mode 100644 bench-vortex/src/bin/test_s3_read.rs create mode 100644 bench-vortex/src/bin/test_s3_update.rs create mode 100644 bench-vortex/src/website/entry.rs create mode 100644 bench-vortex/src/website/mod.rs create mode 100644 bench-vortex/src/website/names.rs create mode 100644 bench-vortex/src/website/read_s3.rs create mode 100644 bench-vortex/src/website/update_s3.rs create mode 100644 plan.md diff --git a/Cargo.lock b/Cargo.lock index ee8301efbdb..2737b17cb60 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1104,6 +1104,7 @@ dependencies = [ "flate2", "futures", "glob", + "hex", "humansize", "indicatif", "itertools 0.14.0", @@ -1120,6 +1121,7 @@ dependencies = [ "parking_lot", "parquet", "paste", + "phf 0.13.1", "rand 0.9.2", "rayon", "regex", @@ -1494,7 +1496,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" dependencies = [ "chrono", - "phf", + "phf 0.12.1", ] [[package]] @@ -6052,7 +6054,41 @@ version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" dependencies = [ - "phf_shared", + "phf_shared 0.12.1", +] + +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_macros", + "phf_shared 0.13.1", + "serde", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared 0.13.1", +] + +[[package]] +name = "phf_macros" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" +dependencies = [ + "phf_generator", + "phf_shared 0.13.1", + "proc-macro2", + "quote", + "syn 2.0.106", ] [[package]] @@ -6064,6 +6100,15 @@ dependencies = [ "siphasher", ] +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project" version = "1.1.10" diff --git a/Cargo.toml b/Cargo.toml index b5adc580779..343273ed51b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -126,6 +126,7 @@ glob = "0.3.2" goldenfile = "1" half = { version = "2.6", features = ["std", "num-traits"] } hashbrown = "0.16.0" +hex = "0.4" humansize = "2.1.3" indicatif = "0.18.0" insta = "1.43" @@ -155,6 +156,7 @@ parking_lot = { version = "0.12.3", features = ["nightly"] } parquet = "56" paste = "1.0.15" pco = "0.4.4" +phf = { version = "0.13", features = ["macros"] } pin-project-lite = "0.2.15" primitive-types = { version = "0.14.0" } prost = "0.14" diff --git a/bench-vortex/Cargo.toml b/bench-vortex/Cargo.toml index 1e0dfa49f0e..93ace837333 100644 --- a/bench-vortex/Cargo.toml +++ b/bench-vortex/Cargo.toml @@ -47,6 +47,7 @@ erased-serde = { workspace = true } flate2 = "1" futures = { workspace = true } glob = { workspace = true } +hex = { workspace = true } humansize = { workspace = true } indicatif = { workspace = true, features = ["futures"] } itertools = { workspace = true } @@ -61,6 +62,7 @@ opentelemetry_sdk = { workspace = true } parking_lot = { workspace = true } parquet = { workspace = true, features = ["async"] } paste = { workspace = true } +phf = { workspace = true } rand = { workspace = true } rayon = { workspace = true } regex = { workspace = true } diff --git a/bench-vortex/src/bin/migrate_random_access.rs b/bench-vortex/src/bin/migrate_random_access.rs new file mode 100644 index 00000000000..52500ae50a5 --- /dev/null +++ b/bench-vortex/src/bin/migrate_random_access.rs @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Binary to migrate the random-access JSON benchmark data to a Vortex file. +//! +//! This reads the JSON file from `organized_data/random-access/random_access.json` and converts +//! it to a Vortex file with the [`BenchmarkEntry`] schema. + +#![allow(clippy::expect_used, clippy::panic)] + +use std::env; +use std::fs; + +use serde::Deserialize; +use vortex::VortexSessionDefault; +use vortex::array::IntoArray; +use vortex::array::arrays::FixedSizeListArray; +use vortex::array::arrays::PrimitiveArray; +use vortex::array::arrays::StructArray; +use vortex::array::validity::Validity; +use vortex::buffer::Buffer; +use vortex::compressor::CompactCompressor; +use vortex::dtype::FieldNames; +use vortex::file::WriteOptionsSessionExt; +use vortex::file::WriteStrategyBuilder; +use vortex::session::VortexSession; + +/// Name ID constants from `bench-vortex/src/website/names.rs`. +mod name_ids { + pub const RANDOM_ACCESS: u32 = 2; + pub const VORTEX_NVME: u32 = 3; + pub const PARQUET_NVME: u32 = 4; + pub const LANCE_NVME: u32 = 5; +} + +/// Represents a benchmark entry from the JSON file. +#[derive(Debug, Deserialize)] +struct JsonEntry { + name: String, + value: u64, + commit_id: String, + // Ignore other fields from JSON. + #[serde(flatten)] + _extra: serde_json::Value, +} + +/// Maps the JSON `name` field to a series name ID. +fn series_name_id(name: &str) -> u32 { + match name { + "random-access/vortex-tokio-local-disk" => name_ids::VORTEX_NVME, + "random-access/parquet-tokio-local-disk" => name_ids::PARQUET_NVME, + "random-access/lance-tokio-local-disk" => name_ids::LANCE_NVME, + _ => panic!("Unknown benchmark name: {}", name), + } +} + +fn main() { + let runtime = tokio::runtime::Runtime::new().expect("Failed to create tokio runtime"); + + runtime.block_on(async_main()); +} + +async fn async_main() { + let session = VortexSession::default(); + + let args: Vec = env::args().collect(); + let input_path = args + .get(1) + .expect("Usage: migrate_random_access "); + let output_path = args + .get(2) + .map(String::as_str) + .expect("Usage: migrate_random_access "); + + // Parse JSON. + let contents = fs::read_to_string(input_path).expect("Failed to read file"); + let entries: Vec = serde_json::from_str(&contents).expect("Failed to parse JSON"); + + let num_entries = entries.len(); + println!("Parsing {} entries from JSON...", num_entries); + + // Extract fields into separate vectors. + let mut commit_id_bytes: Vec = Vec::with_capacity(num_entries * 20); + let mut benchmark_groups: Vec = Vec::with_capacity(num_entries); + let mut chart_names: Vec = Vec::with_capacity(num_entries); + let mut series_names: Vec = Vec::with_capacity(num_entries); + let mut values: Vec = Vec::with_capacity(num_entries); + + for entry in &entries { + // Decode hex commit_id to 20 binary bytes. + let bytes = hex::decode(&entry.commit_id).expect("Invalid hex in commit_id"); + assert_eq!(bytes.len(), 20, "commit_id must decode to 20 bytes"); + commit_id_bytes.extend_from_slice(&bytes); + + // All entries have the same benchmark_group and chart_name. + benchmark_groups.push(name_ids::RANDOM_ACCESS); + chart_names.push(name_ids::RANDOM_ACCESS); + + // Map name to series_name ID. + series_names.push(series_name_id(&entry.name)); + + values.push(entry.value); + } + + // Create arrays. + + // commit_id: FixedSizeList + let commit_id_elements = + PrimitiveArray::new(Buffer::from(commit_id_bytes), Validity::NonNullable); + let commit_id_array = FixedSizeListArray::try_new( + commit_id_elements.into_array(), + 20, + Validity::NonNullable, + num_entries, + ) + .expect("Failed to create commit_id array"); + + // benchmark_group: u32 + let benchmark_group_array = + PrimitiveArray::new(Buffer::from(benchmark_groups), Validity::NonNullable); + + // chart_name: u32 + let chart_name_array = PrimitiveArray::new(Buffer::from(chart_names), Validity::NonNullable); + + // series_name: u32 + let series_name_array = PrimitiveArray::new(Buffer::from(series_names), Validity::NonNullable); + + // value: u64 + let value_array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable); + + // Create struct array with all fields. + let struct_array = StructArray::try_new( + FieldNames::from([ + "commit_id", + "benchmark_group", + "chart_name", + "series_name", + "value", + ]), + vec![ + commit_id_array.into_array(), + benchmark_group_array.into_array(), + chart_name_array.into_array(), + series_name_array.into_array(), + value_array.into_array(), + ], + num_entries, + Validity::NonNullable, + ) + .expect("Failed to create struct array"); + + println!("Created struct array with {} entries", num_entries); + println!("Schema: {}", struct_array.dtype()); + + // Write to Vortex file with compression. + let file = tokio::fs::File::create(output_path) + .await + .expect("Failed to create output file"); + + session + .write_options() + .with_strategy( + WriteStrategyBuilder::new() + .with_compressor(CompactCompressor::default()) + .build(), + ) + .write(file, struct_array.to_array_stream()) + .await + .expect("Failed to write Vortex file"); + + println!("Wrote {} entries to {}", num_entries, output_path); +} diff --git a/bench-vortex/src/bin/test_s3_read.rs b/bench-vortex/src/bin/test_s3_read.rs new file mode 100644 index 00000000000..d4e79812499 --- /dev/null +++ b/bench-vortex/src/bin/test_s3_read.rs @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Test binary for reading benchmark entries from S3 and printing them. + +#![allow(clippy::expect_used)] + +use aws_config::BehaviorVersion; +use aws_sdk_s3::Client; +use bench_vortex::website::names::NAMES; +use bench_vortex::website::read_s3::read_benchmark_entries; +use vortex::VortexSessionDefault; +use vortex::session::VortexSession; + +const BUCKET: &str = "vortex-benchmark-results-database"; +const KEY: &str = "test/random_access.vortex"; + +fn main() { + let runtime = tokio::runtime::Runtime::new().expect("Failed to create tokio runtime"); + runtime.block_on(async_main()); +} + +async fn async_main() { + let session = VortexSession::default(); + + // Load AWS config with SSO profile. + let config = aws_config::defaults(BehaviorVersion::latest()) + .profile_name("PowerUserAccess-375504701696") + .load() + .await; + let client = Client::new(&config); + + println!( + "Reading benchmark entries from s3://{}/{}...\n", + BUCKET, KEY + ); + + let entries = read_benchmark_entries(&client, &session, BUCKET, KEY) + .await + .expect("Failed to read benchmark entries"); + + println!("Read {} entries:\n", entries.len()); + + // Print header. + println!( + "{:<44} {:>15} {:>15} {:>15} {:>12}", + "commit_id", "benchmark_group", "chart_name", "series_name", "value" + ); + println!("{}", "-".repeat(105)); + + // Print entries (limit to first 20 and last 5 for readability). + let show_first = 20; + let show_last = 5; + + for (i, entry) in entries.iter().enumerate() { + if i < show_first || i >= entries.len() - show_last { + let benchmark_group = NAMES.get(&entry.benchmark_group.0).unwrap_or(&"unknown"); + let chart_name = NAMES.get(&entry.chart_name.0).unwrap_or(&"unknown"); + let series_name = NAMES.get(&entry.series_name.0).unwrap_or(&"unknown"); + + println!( + "{} {:>15} {:>15} {:>15} {:>12}", + entry.commit_id, benchmark_group, chart_name, series_name, entry.value + ); + } else if i == show_first { + println!( + "... ({} more entries) ...", + entries.len() - show_first - show_last + ); + } + } + + println!("\nTotal: {} entries", entries.len()); +} diff --git a/bench-vortex/src/bin/test_s3_update.rs b/bench-vortex/src/bin/test_s3_update.rs new file mode 100644 index 00000000000..e855890aad2 --- /dev/null +++ b/bench-vortex/src/bin/test_s3_update.rs @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Test binary for uploading a Vortex file to S3 and testing the `update_s3_object` function. + +#![allow(clippy::expect_used)] + +use std::env; +use std::fs; + +use aws_config::BehaviorVersion; +use aws_sdk_s3::Client; +use aws_sdk_s3::primitives::ByteStream; +use bench_vortex::website::update_s3::update_s3_object; +use vortex::VortexSessionDefault; +use vortex::array::Array; +use vortex::array::builders::builder_with_capacity; +use vortex::array::stream::ArrayStreamExt; +use vortex::dtype::FieldNames; +use vortex::file::OpenOptionsSessionExt; +use vortex::scalar::Scalar; +use vortex::session::VortexSession; + +const BUCKET: &str = "vortex-benchmark-results-database"; +const KEY: &str = "test/random_access.vortex"; + +fn main() { + let runtime = tokio::runtime::Runtime::new().expect("Failed to create tokio runtime"); + runtime.block_on(async_main()); +} + +async fn async_main() { + let session = VortexSession::default(); + + // Load AWS config with SSO profile. + let config = aws_config::defaults(BehaviorVersion::latest()) + .profile_name("PowerUserAccess-375504701696") + .load() + .await; + let client = Client::new(&config); + + // Check for --upload flag. + let args: Vec = env::args().collect(); + if args.iter().any(|a| a == "--upload") { + println!("Uploading random_access.vortex to S3..."); + let local_path = "/Users/connor/spiral/vortex-data/vortex/vortex-wasm/random_access.vortex"; + let file_bytes = fs::read(local_path).expect("Failed to read local file"); + let size = file_bytes.len(); + + client + .put_object() + .bucket(BUCKET) + .key(KEY) + .body(ByteStream::from(file_bytes)) + .send() + .await + .expect("Failed to upload to S3"); + + println!("Uploaded {} bytes to s3://{}/{}", size, BUCKET, KEY); + } + + // Check for --concurrent flag to test atomicity with multiple concurrent updates. + let concurrent = args.iter().any(|a| a == "--concurrent"); + let num_concurrent: usize = args + .iter() + .position(|a| a == "--concurrent") + .and_then(|i| args.get(i + 1)) + .and_then(|s| s.parse().ok()) + .unwrap_or(10); + + if concurrent { + println!( + "\nTesting concurrent updates with {} tasks...", + num_concurrent + ); + + // Get initial count. + let initial_count = get_entry_count(&client, &session).await; + println!("Initial entry count: {}", initial_count); + + // Spawn concurrent update tasks. + let mut handles = Vec::new(); + for i in 0..num_concurrent { + let client = client.clone(); + let session = session.clone(); + handles.push(tokio::spawn(async move { + let result = update_s3_object( + &client, + &session, + BUCKET, + KEY, + |existing_array| async move { + let existing_len = existing_array.len(); + + // Create a new entry to append. + let new_entry = create_test_entry(); + + // Build a new array with existing data + new entry. + let dtype = existing_array.dtype().clone(); + let mut builder = builder_with_capacity(&dtype, existing_len + 1); + builder.extend_from_array(&existing_array); + builder.append_scalar(&new_entry)?; + + Ok(builder.finish()) + }, + ) + .await; + + match result { + Ok(()) => { + println!(" Task {} succeeded", i); + true + } + Err(e) => { + println!(" Task {} failed: {}", i, e); + false + } + } + })); + } + + // Wait for all tasks. + let mut successes = 0; + let mut failures = 0; + for handle in handles { + if handle.await.unwrap_or(false) { + successes += 1; + } else { + failures += 1; + } + } + + // Verify final count. + let final_count = get_entry_count(&client, &session).await; + println!("\nResults:"); + println!(" Successes: {}", successes); + println!(" Failures: {}", failures); + println!(" Initial count: {}", initial_count); + println!(" Final count: {}", final_count); + println!(" Expected count: {}", initial_count + successes); + + if final_count == initial_count + successes { + println!( + "\n✓ Atomicity verified! All {} successful updates were applied.", + successes + ); + } else { + println!("\n✗ Atomicity FAILED! Count mismatch."); + } + } else { + // Single update test. + println!("\nTesting update_s3_object..."); + + let result = update_s3_object( + &client, + &session, + BUCKET, + KEY, + |existing_array| async move { + let existing_len = existing_array.len(); + println!(" Existing array has {} entries", existing_len); + + // Create a new entry to append. + let new_entry = create_test_entry(); + + // Build a new array with existing data + new entry. + let dtype = existing_array.dtype().clone(); + let mut builder = builder_with_capacity(&dtype, existing_len + 1); + builder.extend_from_array(&existing_array); + builder.append_scalar(&new_entry)?; + + let result = builder.finish(); + println!(" New array has {} entries", result.len()); + + Ok(result) + }, + ) + .await; + + match result { + Ok(()) => { + println!("update_s3_object succeeded!"); + let count = get_entry_count(&client, &session).await; + println!("Verified: updated file has {} entries", count); + } + Err(e) => { + println!("update_s3_object failed: {}", e); + } + } + } + + println!("Done!"); +} + +async fn get_entry_count(client: &Client, session: &VortexSession) -> usize { + let get_result = client + .get_object() + .bucket(BUCKET) + .key(KEY) + .send() + .await + .expect("Failed to download file"); + + let bytes = get_result + .body + .collect() + .await + .expect("Failed to read body") + .into_bytes(); + + let file = session + .open_options() + .open_buffer(bytes) + .expect("Failed to open buffer"); + + let array = file + .scan() + .expect("Failed to scan") + .into_array_stream() + .expect("Failed to get stream") + .read_all() + .await + .expect("Failed to read all"); + + array.len() +} + +/// Creates a test entry matching the BenchmarkEntry schema. +fn create_test_entry() -> Scalar { + use std::sync::Arc; + + use vortex::dtype::DType; + use vortex::dtype::Nullability::NonNullable; + use vortex::dtype::PType; + + let u8_dtype = DType::Primitive(PType::U8, NonNullable); + + // Build the dtype to match the schema: + // {commit_id=fixed_size_list(u8)[20], benchmark_group=u32, chart_name=u32, series_name=u32, value=u64} + let dtype = DType::Struct( + vortex::dtype::StructFields::new( + FieldNames::from([ + "commit_id", + "benchmark_group", + "chart_name", + "series_name", + "value", + ]), + vec![ + DType::FixedSizeList(Arc::new(u8_dtype.clone()), 20, NonNullable), + DType::Primitive(PType::U32, NonNullable), + DType::Primitive(PType::U32, NonNullable), + DType::Primitive(PType::U32, NonNullable), + DType::Primitive(PType::U64, NonNullable), + ], + ), + NonNullable, + ); + + // Create a test commit_id (20 bytes of 'x'). + let commit_id_bytes: Vec = b"xxxxxxxxxxxxxxxxxxxx" + .iter() + .map(|&b| Scalar::primitive(b, NonNullable)) + .collect(); + let commit_id_scalar = Scalar::fixed_size_list(u8_dtype, commit_id_bytes, NonNullable); + + Scalar::struct_( + dtype, + vec![ + commit_id_scalar, + Scalar::primitive(2u32, NonNullable), // benchmark_group: random-access + Scalar::primitive(2u32, NonNullable), // chart_name: random-access + Scalar::primitive(3u32, NonNullable), // series_name: vortex-nvme + Scalar::primitive(999999u64, NonNullable), // value: test value + ], + ) +} diff --git a/bench-vortex/src/lib.rs b/bench-vortex/src/lib.rs index 2ec25d14030..f22fb3e7de1 100644 --- a/bench-vortex/src/lib.rs +++ b/bench-vortex/src/lib.rs @@ -42,6 +42,7 @@ pub mod statpopgen; pub mod tpcds; pub mod tpch; pub mod utils; +pub mod website; pub use datasets::BenchmarkDataset; pub use datasets::file; diff --git a/bench-vortex/src/website/entry.rs b/bench-vortex/src/website/entry.rs new file mode 100644 index 00000000000..159cb9cd75d --- /dev/null +++ b/bench-vortex/src/website/entry.rs @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt; + +use serde::Deserialize; +use serde::Serialize; + +/// A benchmark entry, grouped by benchmark group, then chart name, then series name. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct BenchmarkEntry { + // `StructArray` + pub commit_id: CommitId, // fixed size list of `u8` (20 bytes) + pub benchmark_group: NameId, // `u32` array + pub chart_name: NameId, // `u32` array + pub series_name: NameId, // `u32` array + pub value: u64, // `u64` array +} + +/// String ID lookup so that we don't have to store the string every time. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct NameId(pub u32); + +/// The 20-byte binary SHA-1 Git commit ID. +#[derive(Clone, PartialEq, Eq)] +pub struct CommitId(pub [u8; 20]); + +impl fmt::Display for CommitId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0)) + } +} + +impl fmt::Debug for CommitId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "CommitId(\"{}\")", hex::encode(self.0)) + } +} + +impl Serialize for CommitId { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.serialize_str(&hex::encode(self.0)) + } +} + +impl<'de> Deserialize<'de> for CommitId { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + struct CommitIdVisitor; + + impl<'de> serde::de::Visitor<'de> for CommitIdVisitor { + type Value = CommitId; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a 40-character hexadecimal string") + } + + fn visit_str(self, value: &str) -> Result + where + E: serde::de::Error, + { + if value.len() != 40 { + return Err(E::custom(format!( + "expected 40 hex characters, got {}", + value.len() + ))); + } + + let bytes = hex::decode(value) + .map_err(|e| E::custom(format!("invalid hexadecimal: {}", e)))?; + + let mut arr = [0u8; 20]; + arr.copy_from_slice(&bytes); + Ok(CommitId(arr)) + } + } + + deserializer.deserialize_str(CommitIdVisitor) + } +} diff --git a/bench-vortex/src/website/mod.rs b/bench-vortex/src/website/mod.rs new file mode 100644 index 00000000000..b9fbb134503 --- /dev/null +++ b/bench-vortex/src/website/mod.rs @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +pub mod entry; +pub mod names; +pub mod read_s3; +pub mod update_s3; diff --git a/bench-vortex/src/website/names.rs b/bench-vortex/src/website/names.rs new file mode 100644 index 00000000000..86f34733147 --- /dev/null +++ b/bench-vortex/src/website/names.rs @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use phf::Map; +use phf::phf_map; + +// TODO(connor): This should probably be generated smarter. +pub static NAMES: Map = phf_map! { + 0 => "null", + 1 => "invalid", + 2 => "random-access", + 3 => "vortex-nvme", + 4 => "parquet-nvme", + 5 => "lance-nvme", +}; diff --git a/bench-vortex/src/website/read_s3.rs b/bench-vortex/src/website/read_s3.rs new file mode 100644 index 00000000000..6f68afbc437 --- /dev/null +++ b/bench-vortex/src/website/read_s3.rs @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Functions for reading benchmark data from S3. + +use aws_sdk_s3::Client; +use vortex::array::Array; +use vortex::array::ToCanonical; +use vortex::array::arrays::FixedSizeListArray; +use vortex::array::arrays::PrimitiveArray; +use vortex::array::arrays::StructArray; +use vortex::array::stream::ArrayStreamExt; +use vortex::error::VortexResult; +use vortex::error::vortex_bail; +use vortex::error::vortex_err; +use vortex::file::OpenOptionsSessionExt; +use vortex::session::VortexSession; + +use super::entry::BenchmarkEntry; +use super::entry::CommitId; +use super::entry::NameId; + +/// Reads benchmark entries from an S3 object containing a Vortex file. +/// +/// This function downloads the Vortex file from S3, parses the columnar struct array, and converts +/// it to a vector of row-wise [`BenchmarkEntry`] structs. +/// +/// # Arguments +/// +/// * `client` - The AWS S3 client to use for operations. +/// * `session` - The Vortex session for reading files. +/// * `bucket` - The S3 bucket name. +/// * `key` - The S3 object key. +/// +/// # Errors +/// +/// Returns an error if: +/// - The S3 object does not exist or cannot be downloaded. +/// - The file is not a valid Vortex file. +/// - The schema does not match the expected [`BenchmarkEntry`] schema. +pub async fn read_benchmark_entries( + client: &Client, + session: &VortexSession, + bucket: &str, + key: &str, +) -> VortexResult> { + // Download the file from S3. + let get_result = client + .get_object() + .bucket(bucket) + .key(key) + .send() + .await + .map_err(|e| vortex_err!("Failed to download S3 object: {}", e))?; + + let bytes = get_result + .body + .collect() + .await + .map_err(|e| vortex_err!("Failed to read S3 object body: {}", e))? + .into_bytes(); + + // Parse as Vortex file and read all data. + let file = session.open_options().open_buffer(bytes)?; + let array = file.scan()?.into_array_stream()?.read_all().await?; + + // Convert the array to benchmark entries. + array_to_benchmark_entries(&array) +} + +/// Converts a Vortex array (expected to be a struct array) into a vector of [`BenchmarkEntry`]. +/// +/// The array must have the following schema: +/// - `commit_id`: FixedSizeList +/// - `benchmark_group`: u32 +/// - `chart_name`: u32 +/// - `series_name`: u32 +/// - `value`: u64 +pub fn array_to_benchmark_entries(array: &dyn Array) -> VortexResult> { + // Convert to canonical struct array. + let struct_array: StructArray = array.to_struct(); + + let len = struct_array.len(); + let mut entries = Vec::with_capacity(len); + + // Extract each field. + let commit_id_field = struct_array.field_by_name("commit_id")?; + let benchmark_group_field = struct_array.field_by_name("benchmark_group")?; + let chart_name_field = struct_array.field_by_name("chart_name")?; + let series_name_field = struct_array.field_by_name("series_name")?; + let value_field = struct_array.field_by_name("value")?; + + // Convert commit_id to canonical fixed-size list and get the underlying bytes. + let commit_id_fsl: FixedSizeListArray = commit_id_field.to_fixed_size_list(); + if commit_id_fsl.list_size() != 20 { + vortex_bail!( + "Expected commit_id to have list_size 20, got {}", + commit_id_fsl.list_size() + ); + } + + // Get the elements as a primitive array of u8. + let commit_id_elements: PrimitiveArray = commit_id_fsl.elements().to_primitive(); + let commit_id_bytes: &[u8] = commit_id_elements.as_slice(); + + // Convert primitive fields. + let benchmark_group_prim: PrimitiveArray = benchmark_group_field.to_primitive(); + let benchmark_groups: &[u32] = benchmark_group_prim.as_slice(); + + let chart_name_prim: PrimitiveArray = chart_name_field.to_primitive(); + let chart_names: &[u32] = chart_name_prim.as_slice(); + + let series_name_prim: PrimitiveArray = series_name_field.to_primitive(); + let series_names: &[u32] = series_name_prim.as_slice(); + + let value_prim: PrimitiveArray = value_field.to_primitive(); + let values: &[u64] = value_prim.as_slice(); + + // Build the entries. + for i in 0..len { + // Extract the 20-byte commit_id for this row. + let start = i * 20; + let end = start + 20; + let mut commit_id_arr = [0u8; 20]; + commit_id_arr.copy_from_slice(&commit_id_bytes[start..end]); + + entries.push(BenchmarkEntry { + commit_id: CommitId(commit_id_arr), + benchmark_group: NameId(benchmark_groups[i]), + chart_name: NameId(chart_names[i]), + series_name: NameId(series_names[i]), + value: values[i], + }); + } + + Ok(entries) +} diff --git a/bench-vortex/src/website/update_s3.rs b/bench-vortex/src/website/update_s3.rs new file mode 100644 index 00000000000..bd195667678 --- /dev/null +++ b/bench-vortex/src/website/update_s3.rs @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Atomic S3 update operations for Vortex files. +//! +//! This module provides functions to read a Vortex file from S3, apply a transformation, and write +//! the result back atomically using optimistic concurrency control via ETags. + +use std::future::Future; +use std::time::Duration; + +use aws_sdk_s3::Client; +use aws_sdk_s3::error::ProvideErrorMetadata; +use aws_sdk_s3::error::SdkError; +use aws_sdk_s3::primitives::ByteStream; +use vortex::array::ArrayRef; +use vortex::array::stream::ArrayStreamExt; +use vortex::error::VortexError; +use vortex::error::VortexResult; +use vortex::error::vortex_bail; +use vortex::error::vortex_err; +use vortex::file::OpenOptionsSessionExt; +use vortex::file::WriteOptionsSessionExt; +use vortex::session::VortexSession; + +const INITIAL_DELAY: Duration = Duration::from_millis(100); +const MAX_DELAY: Duration = Duration::from_secs(60); + +/// Internal error type for retry control. +enum UpdateError { + /// The ETag has changed since we read the object. The operation should be retried. + EtagMismatch, + /// A non-retryable error occurred. + Other(VortexError), +} + +impl From for UpdateError { + fn from(e: VortexError) -> Self { + UpdateError::Other(e) + } +} + +/// Updates a Vortex file stored in S3 atomically using optimistic concurrency control. +/// +/// This function reads the existing file from S3, applies a transformation, and writes it back +/// using conditional puts with ETags. If another process modifies the file between read and write, +/// the operation is automatically retried with exponential backoff. +/// +/// # Arguments +/// +/// * `client` - The AWS S3 client to use for operations. +/// * `session` - The Vortex session for reading and writing files. +/// * `bucket` - The S3 bucket name. +/// * `key` - The S3 object key. +/// * `update_fn` - An async function that takes the file's array data and returns the updated +/// array. The returned array must have the same dtype as the input. This function may be called +/// multiple times if retries are needed. +/// +/// # Errors +/// +/// Returns an error if: +/// - The S3 object does not exist. +/// - The update function returns an error. +/// - The update function returns an array with a different dtype. +/// - The retry delay reaches the maximum (60 seconds) without success. +/// - An S3 operation fails with a non-retryable error. +#[expect(clippy::use_debug)] +pub async fn update_s3_object( + client: &Client, + session: &VortexSession, + bucket: &str, + key: &str, + mut update_fn: F, +) -> VortexResult<()> +where + F: FnMut(ArrayRef) -> Fut, + Fut: Future>, +{ + let mut delay = INITIAL_DELAY; + let mut attempt = 0; + + loop { + match try_update_s3_object(client, session, bucket, key, &mut update_fn).await { + Ok(()) => return Ok(()), + Err(UpdateError::EtagMismatch) => { + attempt += 1; + tracing::debug!( + "ETag mismatch on attempt {}. Retrying after {:?}...", + attempt, + delay + ); + } + Err(UpdateError::Other(e)) => { + attempt += 1; + eprintln!( + "Error on attempt {}: {}. Retrying after {:?}...", + attempt, e, delay + ); + } + } + + // If we've reached max delay, fail. + if delay >= MAX_DELAY { + vortex_bail!( + "Failed to update S3 object after {} attempts (delay reached {:?})", + attempt, + MAX_DELAY + ); + } + + tokio::time::sleep(delay).await; + + // Exponential backoff: double the delay, capped at MAX_DELAY. + delay = (delay * 2).min(MAX_DELAY); + } +} + +/// Attempts a single update of an S3 object. +async fn try_update_s3_object( + client: &Client, + session: &VortexSession, + bucket: &str, + key: &str, + update_fn: &mut F, +) -> Result<(), UpdateError> +where + F: FnMut(ArrayRef) -> Fut, + Fut: Future>, +{ + // Get current ETag. + let head = client + .head_object() + .bucket(bucket) + .key(key) + .send() + .await + .map_err(|e| vortex_err!("Failed to get object metadata: {}", e))?; + + let etag = head + .e_tag() + .ok_or_else(|| vortex_err!("No ETag returned from head_object"))? + .to_string(); + + // Download with if-match. + let get_result = client + .get_object() + .bucket(bucket) + .key(key) + .if_match(&etag) + .send() + .await; + + let get_output = match get_result { + Ok(output) => output, + Err(SdkError::ServiceError(err)) if err.err().code() == Some("PreconditionFailed") => { + return Err(UpdateError::EtagMismatch); + } + Err(e) => { + return Err(UpdateError::Other(vortex_err!( + "Failed to download object: {}", + e + ))); + } + }; + + let existing_bytes = get_output + .body + .collect() + .await + .map_err(|e| vortex_err!("Failed to read object body: {}", e))? + .into_bytes(); + + // Parse as Vortex file and read all data. + let file = session.open_options().open_buffer(existing_bytes)?; + let original_dtype = file.dtype().clone(); + let existing_array = file.scan()?.into_array_stream()?.read_all().await?; + + // Apply the user's update function. + let updated_array = update_fn(existing_array).await?; + + // Validate that the dtype matches. + if updated_array.dtype() != &original_dtype { + return Err(UpdateError::Other(vortex_err!( + "Update function changed dtype from {} to {}. \ + The updated array must have the same dtype as the input file.", + original_dtype, + updated_array.dtype() + ))); + } + + // Serialize updated array to Vortex file bytes. + let mut buffer = Vec::new(); + session + .write_options() + .write(&mut buffer, updated_array.to_array_stream()) + .await?; + + // Upload with if-match. + let put_result = client + .put_object() + .bucket(bucket) + .key(key) + .if_match(&etag) + .body(ByteStream::from(buffer)) + .send() + .await; + + match put_result { + Ok(_) => Ok(()), + Err(SdkError::ServiceError(err)) if err.err().code() == Some("PreconditionFailed") => { + Err(UpdateError::EtagMismatch) + } + Err(e) => Err(UpdateError::Other(vortex_err!( + "Failed to upload object: {}", + e + ))), + } +} diff --git a/plan.md b/plan.md new file mode 100644 index 00000000000..5ac44435071 --- /dev/null +++ b/plan.md @@ -0,0 +1,81 @@ + + +# Hack Week + +## Goals + +_Approximately in order of priority:_ + +- Get faster load times of Vortex benchmarks on the web by using Vortex itself to store benchmarks measurements instead of JSON +- Make Vortex work reliably on the web via WASM +- Allow addition/removal of different benchmark measurements with schema evolution on Vortex +- Make the benchmarks website easier to read / more understandable +- Rewrite the entire benchmarks website to a WASM framework like Dioxus +- (Stretch) Make benchmarks website more dynamic +- (Stretch) Add Vortex demo in the browser +- (Stretch) Add Vortex vs. Parquet demo in the browser +- (Stretch) Add wasm-bindgen bindings for Vortex? + +## Plan of Attack + +- Design (at a high level) a better benchmarks website (figure out what components and pages it needs, plus general layout) +- Figure out the minimal API for the current benchmark website +- If the current JavaScript code conflicts than the new design, refactor the architecture of the website so that it is easy to switch out the implementations +- Determine the schema of each of the current benchmark (and the evolution of each over time) +- Figure out if the current schemas make sense or if they need to change +- Design extensible(?) Vortex schemas for benchmarking +- Migrate all existing data to Vortex files +- Design writer (append-only) interface for adding benchmark measurements that can evolve its schema +- Design reader interface for loading specific columns of Vortex from S3 and parsing data to a format easily read by JavaScript (should probably be streaming over chunks?) +- Implement the reader and writer interfaces with wasm-bindgen +- Migrate the JavaScript code to use the Rust bindings +- Test + + +### Ideas + +```rust +/// The 20 byte SHA-1 Git commit ID. +pub struct CommitId([u8; 20]); + +/// String ID lookup so that we don't have to store the string every time. +pub struct NameId(u32); + +/// A benchmark entry, grouped by benchmark group, then chart name, then series name. +pub struct BenchmarkEntry { // `StructArray` + commit_id: CommitId, // fixed size list of `u8`? + benchmark_group: NameId, // `u16` array + chart_name: NameId, // `u16` array + series_name: NameId, // `u16` array + value: u64, // `u64` array +} + +fn main() { + println!("{}", size_of::()); // 64 + println!("{}", align_of::()); // 8 +} +``` + +### Findings + +- There is an insane amount of wasted space in `data.json` +- The amount of actual benchmarking data is actually very small, and it can easily fit in memory of + the CI runners +- We can simply read the entire file of all benchmarking data into memory, decompress in memory, add + a new entry, compress, and then write back to S3 + + +### Things to update + +Start with just the random access benchmark + + +generate a bunch of fake data and upload it to S3 + +- Add bindings to read and write `BenchmarkEntry` vortex arrays to and from S3 +USE IPC FORMAT INSTEAD +- `query_bench` to post directly to S3 +- `random_access` and `compress` to also post directly to S3 From a40cfc834c2eb62095eb48905f100fb20d37a104 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 2 Dec 2025 15:10:59 -0500 Subject: [PATCH 06/30] add helper functions Signed-off-by: Connor Tsui --- bench-vortex/src/website/entry.rs | 95 +++++++++++++++++++++++++-- bench-vortex/src/website/update_s3.rs | 41 ++++++++++++ 2 files changed, 130 insertions(+), 6 deletions(-) diff --git a/bench-vortex/src/website/entry.rs b/bench-vortex/src/website/entry.rs index 159cb9cd75d..3c0ccb1caa4 100644 --- a/bench-vortex/src/website/entry.rs +++ b/bench-vortex/src/website/entry.rs @@ -2,19 +2,102 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::fmt; +use std::sync::Arc; use serde::Deserialize; use serde::Serialize; +use vortex::dtype::DType; +use vortex::dtype::FieldNames; +use vortex::dtype::Nullability::NonNullable; +use vortex::dtype::PType; +use vortex::dtype::StructFields; +use vortex::scalar::Scalar; /// A benchmark entry, grouped by benchmark group, then chart name, then series name. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct BenchmarkEntry { - // `StructArray` - pub commit_id: CommitId, // fixed size list of `u8` (20 bytes) - pub benchmark_group: NameId, // `u32` array - pub chart_name: NameId, // `u32` array - pub series_name: NameId, // `u32` array - pub value: u64, // `u64` array + pub commit_id: CommitId, + pub benchmark_group: NameId, + pub chart_name: NameId, + pub series_name: NameId, + pub value: u64, +} + +impl BenchmarkEntry { + pub fn new( + commit_id: CommitId, + benchmark_group: NameId, + chart_name: NameId, + series_name: NameId, + value: u64, + ) -> Self { + Self { + commit_id, + benchmark_group, + chart_name, + series_name, + value, + } + } + + /// Returns the [`DType`] for a [`BenchmarkEntry`]. + /// + /// The schema is: + /// - `commit_id`: `FixedSizeList` (20-byte binary SHA-1) + /// - `benchmark_group`: `u32` + /// - `chart_name`: `u32` + /// - `series_name`: `u32` + /// - `value`: `u64` + pub fn dtype() -> DType { + DType::Struct( + StructFields::new( + FieldNames::from([ + "commit_id", + "benchmark_group", + "chart_name", + "series_name", + "value", + ]), + vec![ + DType::FixedSizeList( + Arc::new(DType::Primitive(PType::U8, NonNullable)), + 20, + NonNullable, + ), + DType::Primitive(PType::U32, NonNullable), + DType::Primitive(PType::U32, NonNullable), + DType::Primitive(PType::U32, NonNullable), + DType::Primitive(PType::U64, NonNullable), + ], + ), + NonNullable, + ) + } + + /// Converts a [`BenchmarkEntry`] to a [`Scalar`]. + pub fn into_scalar(&self) -> Scalar { + let u8_dtype = DType::Primitive(PType::U8, NonNullable); + + // Convert the 20-byte commit_id to a FixedSizeList scalar. + let commit_id_bytes: Vec = self + .commit_id + .0 + .iter() + .map(|&b| Scalar::primitive(b, NonNullable)) + .collect(); + let commit_id_scalar = Scalar::fixed_size_list(u8_dtype, commit_id_bytes, NonNullable); + + Scalar::struct_( + BenchmarkEntry::dtype(), + vec![ + commit_id_scalar, + Scalar::primitive(self.benchmark_group.0, NonNullable), + Scalar::primitive(self.chart_name.0, NonNullable), + Scalar::primitive(self.series_name.0, NonNullable), + Scalar::primitive(self.value, NonNullable), + ], + ) + } } /// String ID lookup so that we don't have to store the string every time. diff --git a/bench-vortex/src/website/update_s3.rs b/bench-vortex/src/website/update_s3.rs index bd195667678..0ffd259ecfc 100644 --- a/bench-vortex/src/website/update_s3.rs +++ b/bench-vortex/src/website/update_s3.rs @@ -14,6 +14,7 @@ use aws_sdk_s3::error::ProvideErrorMetadata; use aws_sdk_s3::error::SdkError; use aws_sdk_s3::primitives::ByteStream; use vortex::array::ArrayRef; +use vortex::array::builders::builder_with_capacity; use vortex::array::stream::ArrayStreamExt; use vortex::error::VortexError; use vortex::error::VortexResult; @@ -23,6 +24,8 @@ use vortex::file::OpenOptionsSessionExt; use vortex::file::WriteOptionsSessionExt; use vortex::session::VortexSession; +use super::entry::BenchmarkEntry; + const INITIAL_DELAY: Duration = Duration::from_millis(100); const MAX_DELAY: Duration = Duration::from_secs(60); @@ -216,3 +219,41 @@ where ))), } } + +/// Appends a single [`BenchmarkEntry`] to a Vortex file stored in S3. +/// +/// This function uses [`update_s3_object`] with optimistic concurrency control to atomically +/// append the entry to the existing data. If concurrent modifications are detected, the operation +/// is automatically retried. +/// +/// # Arguments +/// +/// * `client` - The AWS S3 client. +/// * `session` - The Vortex session for reading and writing files. +/// * `bucket` - The S3 bucket name. +/// * `key` - The S3 object key. +/// * `entry` - The benchmark entry to append. +pub async fn append_benchmark_entry( + client: &Client, + session: &VortexSession, + bucket: &str, + key: &str, + entry: &BenchmarkEntry, +) -> VortexResult<()> { + let scalar = entry.into_scalar(); + + update_s3_object(client, session, bucket, key, |existing_array| { + let scalar = scalar.clone(); + async move { + let existing_len = existing_array.len(); + let dtype = existing_array.dtype().clone(); + + let mut builder = builder_with_capacity(&dtype, existing_len + 1); + builder.extend_from_array(&existing_array); + builder.append_scalar(&scalar)?; + + Ok(builder.finish()) + } + }) + .await +} From 09ca0e5a6785ea99719da7d2a3d9f52716285592 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 2 Dec 2025 15:59:30 -0500 Subject: [PATCH 07/30] rip out aws sdk and move to vortex-wasm Signed-off-by: Connor Tsui --- Cargo.lock | 23 ++ bench-vortex/src/bin/test_s3_update.rs | 277 ---------------- bench-vortex/src/lib.rs | 1 - bench-vortex/src/website/update_s3.rs | 259 --------------- vortex-wasm/Cargo.toml | 32 ++ .../src/bin/migrate_random_access.rs | 0 .../src/bin/test_s3_read.rs | 21 +- vortex-wasm/src/bin/test_s3_update.rs | 127 ++++++++ vortex-wasm/src/lib.rs | 82 +++++ .../src/website/entry.rs | 0 .../src/website/mod.rs | 0 .../src/website/names.rs | 3 + .../src/website/read_s3.rs | 46 +-- vortex-wasm/src/website/update_s3.rs | 298 ++++++++++++++++++ 14 files changed, 593 insertions(+), 576 deletions(-) delete mode 100644 bench-vortex/src/bin/test_s3_update.rs delete mode 100644 bench-vortex/src/website/update_s3.rs rename {bench-vortex => vortex-wasm}/src/bin/migrate_random_access.rs (100%) rename {bench-vortex => vortex-wasm}/src/bin/test_s3_read.rs (74%) create mode 100644 vortex-wasm/src/bin/test_s3_update.rs rename {bench-vortex => vortex-wasm}/src/website/entry.rs (100%) rename {bench-vortex => vortex-wasm}/src/website/mod.rs (100%) rename {bench-vortex => vortex-wasm}/src/website/names.rs (79%) rename {bench-vortex => vortex-wasm}/src/website/read_s3.rs (80%) create mode 100644 vortex-wasm/src/website/update_s3.rs diff --git a/Cargo.lock b/Cargo.lock index 2737b17cb60..977f6ef927c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7353,6 +7353,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde-wasm-bindgen" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8302e169f0eddcc139c70f139d19d6467353af16f9fce27e8c30158036a1e16b" +dependencies = [ + "js-sys", + "serde", + "wasm-bindgen", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -9622,8 +9633,17 @@ name = "vortex-wasm" version = "0.1.0" dependencies = [ "async-fs", + "hex", + "js-sys", + "phf 0.13.1", + "reqwest", "serde", + "serde-wasm-bindgen", "serde_json", + "tempfile", + "tokio", + "tracing", + "vortex", "vortex-array", "vortex-buffer", "vortex-dtype", @@ -9634,6 +9654,9 @@ dependencies = [ "vortex-metrics", "vortex-scalar", "vortex-session", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", ] [[package]] diff --git a/bench-vortex/src/bin/test_s3_update.rs b/bench-vortex/src/bin/test_s3_update.rs deleted file mode 100644 index e855890aad2..00000000000 --- a/bench-vortex/src/bin/test_s3_update.rs +++ /dev/null @@ -1,277 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -//! Test binary for uploading a Vortex file to S3 and testing the `update_s3_object` function. - -#![allow(clippy::expect_used)] - -use std::env; -use std::fs; - -use aws_config::BehaviorVersion; -use aws_sdk_s3::Client; -use aws_sdk_s3::primitives::ByteStream; -use bench_vortex::website::update_s3::update_s3_object; -use vortex::VortexSessionDefault; -use vortex::array::Array; -use vortex::array::builders::builder_with_capacity; -use vortex::array::stream::ArrayStreamExt; -use vortex::dtype::FieldNames; -use vortex::file::OpenOptionsSessionExt; -use vortex::scalar::Scalar; -use vortex::session::VortexSession; - -const BUCKET: &str = "vortex-benchmark-results-database"; -const KEY: &str = "test/random_access.vortex"; - -fn main() { - let runtime = tokio::runtime::Runtime::new().expect("Failed to create tokio runtime"); - runtime.block_on(async_main()); -} - -async fn async_main() { - let session = VortexSession::default(); - - // Load AWS config with SSO profile. - let config = aws_config::defaults(BehaviorVersion::latest()) - .profile_name("PowerUserAccess-375504701696") - .load() - .await; - let client = Client::new(&config); - - // Check for --upload flag. - let args: Vec = env::args().collect(); - if args.iter().any(|a| a == "--upload") { - println!("Uploading random_access.vortex to S3..."); - let local_path = "/Users/connor/spiral/vortex-data/vortex/vortex-wasm/random_access.vortex"; - let file_bytes = fs::read(local_path).expect("Failed to read local file"); - let size = file_bytes.len(); - - client - .put_object() - .bucket(BUCKET) - .key(KEY) - .body(ByteStream::from(file_bytes)) - .send() - .await - .expect("Failed to upload to S3"); - - println!("Uploaded {} bytes to s3://{}/{}", size, BUCKET, KEY); - } - - // Check for --concurrent flag to test atomicity with multiple concurrent updates. - let concurrent = args.iter().any(|a| a == "--concurrent"); - let num_concurrent: usize = args - .iter() - .position(|a| a == "--concurrent") - .and_then(|i| args.get(i + 1)) - .and_then(|s| s.parse().ok()) - .unwrap_or(10); - - if concurrent { - println!( - "\nTesting concurrent updates with {} tasks...", - num_concurrent - ); - - // Get initial count. - let initial_count = get_entry_count(&client, &session).await; - println!("Initial entry count: {}", initial_count); - - // Spawn concurrent update tasks. - let mut handles = Vec::new(); - for i in 0..num_concurrent { - let client = client.clone(); - let session = session.clone(); - handles.push(tokio::spawn(async move { - let result = update_s3_object( - &client, - &session, - BUCKET, - KEY, - |existing_array| async move { - let existing_len = existing_array.len(); - - // Create a new entry to append. - let new_entry = create_test_entry(); - - // Build a new array with existing data + new entry. - let dtype = existing_array.dtype().clone(); - let mut builder = builder_with_capacity(&dtype, existing_len + 1); - builder.extend_from_array(&existing_array); - builder.append_scalar(&new_entry)?; - - Ok(builder.finish()) - }, - ) - .await; - - match result { - Ok(()) => { - println!(" Task {} succeeded", i); - true - } - Err(e) => { - println!(" Task {} failed: {}", i, e); - false - } - } - })); - } - - // Wait for all tasks. - let mut successes = 0; - let mut failures = 0; - for handle in handles { - if handle.await.unwrap_or(false) { - successes += 1; - } else { - failures += 1; - } - } - - // Verify final count. - let final_count = get_entry_count(&client, &session).await; - println!("\nResults:"); - println!(" Successes: {}", successes); - println!(" Failures: {}", failures); - println!(" Initial count: {}", initial_count); - println!(" Final count: {}", final_count); - println!(" Expected count: {}", initial_count + successes); - - if final_count == initial_count + successes { - println!( - "\n✓ Atomicity verified! All {} successful updates were applied.", - successes - ); - } else { - println!("\n✗ Atomicity FAILED! Count mismatch."); - } - } else { - // Single update test. - println!("\nTesting update_s3_object..."); - - let result = update_s3_object( - &client, - &session, - BUCKET, - KEY, - |existing_array| async move { - let existing_len = existing_array.len(); - println!(" Existing array has {} entries", existing_len); - - // Create a new entry to append. - let new_entry = create_test_entry(); - - // Build a new array with existing data + new entry. - let dtype = existing_array.dtype().clone(); - let mut builder = builder_with_capacity(&dtype, existing_len + 1); - builder.extend_from_array(&existing_array); - builder.append_scalar(&new_entry)?; - - let result = builder.finish(); - println!(" New array has {} entries", result.len()); - - Ok(result) - }, - ) - .await; - - match result { - Ok(()) => { - println!("update_s3_object succeeded!"); - let count = get_entry_count(&client, &session).await; - println!("Verified: updated file has {} entries", count); - } - Err(e) => { - println!("update_s3_object failed: {}", e); - } - } - } - - println!("Done!"); -} - -async fn get_entry_count(client: &Client, session: &VortexSession) -> usize { - let get_result = client - .get_object() - .bucket(BUCKET) - .key(KEY) - .send() - .await - .expect("Failed to download file"); - - let bytes = get_result - .body - .collect() - .await - .expect("Failed to read body") - .into_bytes(); - - let file = session - .open_options() - .open_buffer(bytes) - .expect("Failed to open buffer"); - - let array = file - .scan() - .expect("Failed to scan") - .into_array_stream() - .expect("Failed to get stream") - .read_all() - .await - .expect("Failed to read all"); - - array.len() -} - -/// Creates a test entry matching the BenchmarkEntry schema. -fn create_test_entry() -> Scalar { - use std::sync::Arc; - - use vortex::dtype::DType; - use vortex::dtype::Nullability::NonNullable; - use vortex::dtype::PType; - - let u8_dtype = DType::Primitive(PType::U8, NonNullable); - - // Build the dtype to match the schema: - // {commit_id=fixed_size_list(u8)[20], benchmark_group=u32, chart_name=u32, series_name=u32, value=u64} - let dtype = DType::Struct( - vortex::dtype::StructFields::new( - FieldNames::from([ - "commit_id", - "benchmark_group", - "chart_name", - "series_name", - "value", - ]), - vec![ - DType::FixedSizeList(Arc::new(u8_dtype.clone()), 20, NonNullable), - DType::Primitive(PType::U32, NonNullable), - DType::Primitive(PType::U32, NonNullable), - DType::Primitive(PType::U32, NonNullable), - DType::Primitive(PType::U64, NonNullable), - ], - ), - NonNullable, - ); - - // Create a test commit_id (20 bytes of 'x'). - let commit_id_bytes: Vec = b"xxxxxxxxxxxxxxxxxxxx" - .iter() - .map(|&b| Scalar::primitive(b, NonNullable)) - .collect(); - let commit_id_scalar = Scalar::fixed_size_list(u8_dtype, commit_id_bytes, NonNullable); - - Scalar::struct_( - dtype, - vec![ - commit_id_scalar, - Scalar::primitive(2u32, NonNullable), // benchmark_group: random-access - Scalar::primitive(2u32, NonNullable), // chart_name: random-access - Scalar::primitive(3u32, NonNullable), // series_name: vortex-nvme - Scalar::primitive(999999u64, NonNullable), // value: test value - ], - ) -} diff --git a/bench-vortex/src/lib.rs b/bench-vortex/src/lib.rs index f22fb3e7de1..2ec25d14030 100644 --- a/bench-vortex/src/lib.rs +++ b/bench-vortex/src/lib.rs @@ -42,7 +42,6 @@ pub mod statpopgen; pub mod tpcds; pub mod tpch; pub mod utils; -pub mod website; pub use datasets::BenchmarkDataset; pub use datasets::file; diff --git a/bench-vortex/src/website/update_s3.rs b/bench-vortex/src/website/update_s3.rs deleted file mode 100644 index 0ffd259ecfc..00000000000 --- a/bench-vortex/src/website/update_s3.rs +++ /dev/null @@ -1,259 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -//! Atomic S3 update operations for Vortex files. -//! -//! This module provides functions to read a Vortex file from S3, apply a transformation, and write -//! the result back atomically using optimistic concurrency control via ETags. - -use std::future::Future; -use std::time::Duration; - -use aws_sdk_s3::Client; -use aws_sdk_s3::error::ProvideErrorMetadata; -use aws_sdk_s3::error::SdkError; -use aws_sdk_s3::primitives::ByteStream; -use vortex::array::ArrayRef; -use vortex::array::builders::builder_with_capacity; -use vortex::array::stream::ArrayStreamExt; -use vortex::error::VortexError; -use vortex::error::VortexResult; -use vortex::error::vortex_bail; -use vortex::error::vortex_err; -use vortex::file::OpenOptionsSessionExt; -use vortex::file::WriteOptionsSessionExt; -use vortex::session::VortexSession; - -use super::entry::BenchmarkEntry; - -const INITIAL_DELAY: Duration = Duration::from_millis(100); -const MAX_DELAY: Duration = Duration::from_secs(60); - -/// Internal error type for retry control. -enum UpdateError { - /// The ETag has changed since we read the object. The operation should be retried. - EtagMismatch, - /// A non-retryable error occurred. - Other(VortexError), -} - -impl From for UpdateError { - fn from(e: VortexError) -> Self { - UpdateError::Other(e) - } -} - -/// Updates a Vortex file stored in S3 atomically using optimistic concurrency control. -/// -/// This function reads the existing file from S3, applies a transformation, and writes it back -/// using conditional puts with ETags. If another process modifies the file between read and write, -/// the operation is automatically retried with exponential backoff. -/// -/// # Arguments -/// -/// * `client` - The AWS S3 client to use for operations. -/// * `session` - The Vortex session for reading and writing files. -/// * `bucket` - The S3 bucket name. -/// * `key` - The S3 object key. -/// * `update_fn` - An async function that takes the file's array data and returns the updated -/// array. The returned array must have the same dtype as the input. This function may be called -/// multiple times if retries are needed. -/// -/// # Errors -/// -/// Returns an error if: -/// - The S3 object does not exist. -/// - The update function returns an error. -/// - The update function returns an array with a different dtype. -/// - The retry delay reaches the maximum (60 seconds) without success. -/// - An S3 operation fails with a non-retryable error. -#[expect(clippy::use_debug)] -pub async fn update_s3_object( - client: &Client, - session: &VortexSession, - bucket: &str, - key: &str, - mut update_fn: F, -) -> VortexResult<()> -where - F: FnMut(ArrayRef) -> Fut, - Fut: Future>, -{ - let mut delay = INITIAL_DELAY; - let mut attempt = 0; - - loop { - match try_update_s3_object(client, session, bucket, key, &mut update_fn).await { - Ok(()) => return Ok(()), - Err(UpdateError::EtagMismatch) => { - attempt += 1; - tracing::debug!( - "ETag mismatch on attempt {}. Retrying after {:?}...", - attempt, - delay - ); - } - Err(UpdateError::Other(e)) => { - attempt += 1; - eprintln!( - "Error on attempt {}: {}. Retrying after {:?}...", - attempt, e, delay - ); - } - } - - // If we've reached max delay, fail. - if delay >= MAX_DELAY { - vortex_bail!( - "Failed to update S3 object after {} attempts (delay reached {:?})", - attempt, - MAX_DELAY - ); - } - - tokio::time::sleep(delay).await; - - // Exponential backoff: double the delay, capped at MAX_DELAY. - delay = (delay * 2).min(MAX_DELAY); - } -} - -/// Attempts a single update of an S3 object. -async fn try_update_s3_object( - client: &Client, - session: &VortexSession, - bucket: &str, - key: &str, - update_fn: &mut F, -) -> Result<(), UpdateError> -where - F: FnMut(ArrayRef) -> Fut, - Fut: Future>, -{ - // Get current ETag. - let head = client - .head_object() - .bucket(bucket) - .key(key) - .send() - .await - .map_err(|e| vortex_err!("Failed to get object metadata: {}", e))?; - - let etag = head - .e_tag() - .ok_or_else(|| vortex_err!("No ETag returned from head_object"))? - .to_string(); - - // Download with if-match. - let get_result = client - .get_object() - .bucket(bucket) - .key(key) - .if_match(&etag) - .send() - .await; - - let get_output = match get_result { - Ok(output) => output, - Err(SdkError::ServiceError(err)) if err.err().code() == Some("PreconditionFailed") => { - return Err(UpdateError::EtagMismatch); - } - Err(e) => { - return Err(UpdateError::Other(vortex_err!( - "Failed to download object: {}", - e - ))); - } - }; - - let existing_bytes = get_output - .body - .collect() - .await - .map_err(|e| vortex_err!("Failed to read object body: {}", e))? - .into_bytes(); - - // Parse as Vortex file and read all data. - let file = session.open_options().open_buffer(existing_bytes)?; - let original_dtype = file.dtype().clone(); - let existing_array = file.scan()?.into_array_stream()?.read_all().await?; - - // Apply the user's update function. - let updated_array = update_fn(existing_array).await?; - - // Validate that the dtype matches. - if updated_array.dtype() != &original_dtype { - return Err(UpdateError::Other(vortex_err!( - "Update function changed dtype from {} to {}. \ - The updated array must have the same dtype as the input file.", - original_dtype, - updated_array.dtype() - ))); - } - - // Serialize updated array to Vortex file bytes. - let mut buffer = Vec::new(); - session - .write_options() - .write(&mut buffer, updated_array.to_array_stream()) - .await?; - - // Upload with if-match. - let put_result = client - .put_object() - .bucket(bucket) - .key(key) - .if_match(&etag) - .body(ByteStream::from(buffer)) - .send() - .await; - - match put_result { - Ok(_) => Ok(()), - Err(SdkError::ServiceError(err)) if err.err().code() == Some("PreconditionFailed") => { - Err(UpdateError::EtagMismatch) - } - Err(e) => Err(UpdateError::Other(vortex_err!( - "Failed to upload object: {}", - e - ))), - } -} - -/// Appends a single [`BenchmarkEntry`] to a Vortex file stored in S3. -/// -/// This function uses [`update_s3_object`] with optimistic concurrency control to atomically -/// append the entry to the existing data. If concurrent modifications are detected, the operation -/// is automatically retried. -/// -/// # Arguments -/// -/// * `client` - The AWS S3 client. -/// * `session` - The Vortex session for reading and writing files. -/// * `bucket` - The S3 bucket name. -/// * `key` - The S3 object key. -/// * `entry` - The benchmark entry to append. -pub async fn append_benchmark_entry( - client: &Client, - session: &VortexSession, - bucket: &str, - key: &str, - entry: &BenchmarkEntry, -) -> VortexResult<()> { - let scalar = entry.into_scalar(); - - update_s3_object(client, session, bucket, key, |existing_array| { - let scalar = scalar.clone(); - async move { - let existing_len = existing_array.len(); - let dtype = existing_array.dtype().clone(); - - let mut builder = builder_with_capacity(&dtype, existing_len + 1); - builder.extend_from_array(&existing_array); - builder.append_scalar(&scalar)?; - - Ok(builder.finish()) - } - }) - .await -} diff --git a/vortex-wasm/Cargo.toml b/vortex-wasm/Cargo.toml index 09de0017784..8861d33d4be 100644 --- a/vortex-wasm/Cargo.toml +++ b/vortex-wasm/Cargo.toml @@ -13,10 +13,21 @@ repository.workspace = true rust-version.workspace = true version.workspace = true +[lib] +crate-type = ["cdylib", "rlib"] + [dependencies] async-fs = { workspace = true } +hex = { workspace = true } +phf = { workspace = true } +reqwest = { workspace = true } serde = { workspace = true, features = ["derive"] } +serde-wasm-bindgen = "0.6" serde_json = { workspace = true } +tempfile = { workspace = true } +tokio = { workspace = true, features = ["full"] } +tracing = { workspace = true } +vortex = { workspace = true, features = ["tokio"] } vortex-array = { workspace = true } vortex-buffer = { workspace = true } vortex-dtype = { workspace = true } @@ -27,6 +38,15 @@ vortex-layout = { workspace = true } vortex-metrics = { workspace = true } vortex-scalar = { workspace = true } vortex-session = { workspace = true } +wasm-bindgen = "0.2" +wasm-bindgen-futures = { workspace = true } + +[dependencies.web-sys] +features = ["console", "Window", "Response"] +version = "0.3" + +[dependencies.js-sys] +version = "0.3" [[bin]] name = "migrate" @@ -36,5 +56,17 @@ path = "src/bin/migrate.rs" name = "append" path = "src/bin/append.rs" +[[bin]] +name = "test_s3_read" +path = "src/bin/test_s3_read.rs" + +[[bin]] +name = "test_s3_update" +path = "src/bin/test_s3_update.rs" + +[[bin]] +name = "migrate_random_access" +path = "src/bin/migrate_random_access.rs" + [lints] workspace = true diff --git a/bench-vortex/src/bin/migrate_random_access.rs b/vortex-wasm/src/bin/migrate_random_access.rs similarity index 100% rename from bench-vortex/src/bin/migrate_random_access.rs rename to vortex-wasm/src/bin/migrate_random_access.rs diff --git a/bench-vortex/src/bin/test_s3_read.rs b/vortex-wasm/src/bin/test_s3_read.rs similarity index 74% rename from bench-vortex/src/bin/test_s3_read.rs rename to vortex-wasm/src/bin/test_s3_read.rs index d4e79812499..b637cf6e0fd 100644 --- a/bench-vortex/src/bin/test_s3_read.rs +++ b/vortex-wasm/src/bin/test_s3_read.rs @@ -5,14 +5,11 @@ #![allow(clippy::expect_used)] -use aws_config::BehaviorVersion; -use aws_sdk_s3::Client; -use bench_vortex::website::names::NAMES; -use bench_vortex::website::read_s3::read_benchmark_entries; use vortex::VortexSessionDefault; use vortex::session::VortexSession; +use vortex_wasm::website::names::NAMES; +use vortex_wasm::website::read_s3::read_benchmark_entries; -const BUCKET: &str = "vortex-benchmark-results-database"; const KEY: &str = "test/random_access.vortex"; fn main() { @@ -23,19 +20,9 @@ fn main() { async fn async_main() { let session = VortexSession::default(); - // Load AWS config with SSO profile. - let config = aws_config::defaults(BehaviorVersion::latest()) - .profile_name("PowerUserAccess-375504701696") - .load() - .await; - let client = Client::new(&config); + println!("Reading benchmark entries from {}...\n", KEY); - println!( - "Reading benchmark entries from s3://{}/{}...\n", - BUCKET, KEY - ); - - let entries = read_benchmark_entries(&client, &session, BUCKET, KEY) + let entries = read_benchmark_entries(&session, KEY) .await .expect("Failed to read benchmark entries"); diff --git a/vortex-wasm/src/bin/test_s3_update.rs b/vortex-wasm/src/bin/test_s3_update.rs new file mode 100644 index 00000000000..80f66825f42 --- /dev/null +++ b/vortex-wasm/src/bin/test_s3_update.rs @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Test binary for testing the `update_s3_object` function using the AWS CLI. + +#![allow(clippy::expect_used, clippy::exit)] + +use std::env; +use std::fs; +use std::process::Command; +use std::sync::Arc; + +use vortex::VortexSessionDefault; +use vortex::array::builders::builder_with_capacity; +use vortex::dtype::DType; +use vortex::dtype::FieldNames; +use vortex::dtype::Nullability::NonNullable; +use vortex::dtype::PType; +use vortex::scalar::Scalar; +use vortex::session::VortexSession; +use vortex_wasm::website::update_s3::update_s3_object; + +const BUCKET: &str = "vortex-benchmark-results-database"; +const KEY: &str = "test/random_access.vortex"; + +fn main() { + let session = VortexSession::default(); + + // Check for --upload flag. + let args: Vec = env::args().collect(); + if args.iter().any(|a| a == "--upload") { + println!("Uploading random_access.vortex to S3..."); + let local_path = "/Users/connor/spiral/vortex-data/vortex/vortex-wasm/random_access.vortex"; + let file_bytes = fs::read(local_path).expect("Failed to read local file"); + let size = file_bytes.len(); + + let status = Command::new("aws") + .args(["s3", "cp", local_path, &format!("s3://{}/{}", BUCKET, KEY)]) + .status() + .expect("Failed to run aws CLI"); + + if !status.success() { + eprintln!("Failed to upload to S3"); + std::process::exit(1); + } + + println!("Uploaded {} bytes to s3://{}/{}", size, BUCKET, KEY); + } + + // Single update test. + println!("\nTesting update_s3_object..."); + + let result = update_s3_object(&session, BUCKET, KEY, |existing_array| { + let existing_len = existing_array.len(); + println!(" Existing array has {} entries", existing_len); + + // Create a new entry to append. + let new_entry = create_test_entry(); + + // Build a new array with existing data + new entry. + let dtype = existing_array.dtype().clone(); + let mut builder = builder_with_capacity(&dtype, existing_len + 1); + builder.extend_from_array(&existing_array); + builder.append_scalar(&new_entry)?; + + let result = builder.finish(); + println!(" New array has {} entries", result.len()); + + Ok(result) + }); + + match result { + Ok(()) => { + println!("update_s3_object succeeded!"); + } + Err(e) => { + println!("update_s3_object failed: {}", e); + } + } + + println!("Done!"); +} + +/// Creates a test entry matching the BenchmarkEntry schema. +fn create_test_entry() -> Scalar { + let u8_dtype = DType::Primitive(PType::U8, NonNullable); + + // Build the dtype to match the schema: + // {commit_id=fixed_size_list(u8)[20], benchmark_group=u32, chart_name=u32, series_name=u32, value=u64} + let dtype = DType::Struct( + vortex::dtype::StructFields::new( + FieldNames::from([ + "commit_id", + "benchmark_group", + "chart_name", + "series_name", + "value", + ]), + vec![ + DType::FixedSizeList(Arc::new(u8_dtype.clone()), 20, NonNullable), + DType::Primitive(PType::U32, NonNullable), + DType::Primitive(PType::U32, NonNullable), + DType::Primitive(PType::U32, NonNullable), + DType::Primitive(PType::U64, NonNullable), + ], + ), + NonNullable, + ); + + // Create a test commit_id (20 bytes of 'x'). + let commit_id_bytes: Vec = b"xxxxxxxxxxxxxxxxxxxx" + .iter() + .map(|&b| Scalar::primitive(b, NonNullable)) + .collect(); + let commit_id_scalar = Scalar::fixed_size_list(u8_dtype, commit_id_bytes, NonNullable); + + Scalar::struct_( + dtype, + vec![ + commit_id_scalar, + Scalar::primitive(2u32, NonNullable), // benchmark_group: random-access + Scalar::primitive(2u32, NonNullable), // chart_name: random-access + Scalar::primitive(3u32, NonNullable), // series_name: vortex-nvme + Scalar::primitive(999999u64, NonNullable), // value: test value + ], + ) +} diff --git a/vortex-wasm/src/lib.rs b/vortex-wasm/src/lib.rs index 0d735177e5d..846b54af71e 100644 --- a/vortex-wasm/src/lib.rs +++ b/vortex-wasm/src/lib.rs @@ -1,2 +1,84 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! WASM bindings for the Vortex benchmark website. +//! +//! This module provides a `load_random_access_data()` function that fetches benchmark data from S3, +//! parses it, and returns it in a format ready for JavaScript to render. + +pub mod website; + +use serde::Serialize; +use vortex::VortexSessionDefault; +use vortex::session::VortexSession; +use wasm_bindgen::prelude::*; +use website::names::NAMES; +use website::read_s3::read_benchmark_entries; + +const KEY: &str = "test/random_access.vortex"; + +/// Helper macro for logging to browser console. +macro_rules! log { + ($($t:tt)*) => { + web_sys::console::log_1(&format!($($t)*).into()); + } +} + +/// A single random-access benchmark entry for JavaScript. +#[derive(Serialize)] +pub struct JsEntry { + pub commit_id: String, + pub series_name: &'static str, + pub value_ms: f64, +} + +/// Load random-access benchmark data from S3. +/// +/// This function fetches the Vortex file from S3, parses it, and returns an array of benchmark +/// entries ready for rendering. +/// +/// # Returns +/// +/// A JavaScript array of objects with: +/// - `commit_id`: 40-character hex string (SHA-1 hash) +/// - `series_name`: One of "vortex-nvme", "parquet-nvme", "lance-nvme" +/// - `value_ms`: Value in milliseconds +#[wasm_bindgen] +pub async fn load_random_access_data() -> Result { + log!("Loading random-access benchmark data..."); + + let session = VortexSession::default(); + + let entries = read_benchmark_entries(&session, KEY) + .await + .map_err(|e| JsValue::from_str(&format!("Failed to read benchmark entries: {}", e)))?; + + log!("Loaded {} entries", entries.len()); + + // Convert to JS-friendly format. + let js_entries: Vec = entries + .iter() + .map(|e| JsEntry { + commit_id: e.commit_id.to_string(), + series_name: NAMES.get(&e.series_name.0).copied().unwrap_or("unknown"), + value_ms: e.value as f64 / 1_000_000.0, + }) + .collect(); + + log!("Returning {} JS entries", js_entries.len()); + + serde_wasm_bindgen::to_value(&js_entries) + .map_err(|e| JsValue::from_str(&format!("Failed to serialize: {}", e))) +} + +/// Initialize the WASM module. +#[wasm_bindgen(start)] +pub fn init() { + log!("vortex-wasm initialized"); +} + +/// Get version information. +#[wasm_bindgen] +pub fn get_version() -> String { + format!("vortex-wasm v{}", env!("CARGO_PKG_VERSION")) +} diff --git a/bench-vortex/src/website/entry.rs b/vortex-wasm/src/website/entry.rs similarity index 100% rename from bench-vortex/src/website/entry.rs rename to vortex-wasm/src/website/entry.rs diff --git a/bench-vortex/src/website/mod.rs b/vortex-wasm/src/website/mod.rs similarity index 100% rename from bench-vortex/src/website/mod.rs rename to vortex-wasm/src/website/mod.rs diff --git a/bench-vortex/src/website/names.rs b/vortex-wasm/src/website/names.rs similarity index 79% rename from bench-vortex/src/website/names.rs rename to vortex-wasm/src/website/names.rs index 86f34733147..27e911c45f1 100644 --- a/bench-vortex/src/website/names.rs +++ b/vortex-wasm/src/website/names.rs @@ -1,10 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Name ID to string mapping for benchmark data. + use phf::Map; use phf::phf_map; // TODO(connor): This should probably be generated smarter. +/// Maps name IDs to their string representations. pub static NAMES: Map = phf_map! { 0 => "null", 1 => "invalid", diff --git a/bench-vortex/src/website/read_s3.rs b/vortex-wasm/src/website/read_s3.rs similarity index 80% rename from bench-vortex/src/website/read_s3.rs rename to vortex-wasm/src/website/read_s3.rs index 6f68afbc437..9cdb1c8d320 100644 --- a/bench-vortex/src/website/read_s3.rs +++ b/vortex-wasm/src/website/read_s3.rs @@ -3,7 +3,6 @@ //! Functions for reading benchmark data from S3. -use aws_sdk_s3::Client; use vortex::array::Array; use vortex::array::ToCanonical; use vortex::array::arrays::FixedSizeListArray; @@ -20,48 +19,51 @@ use super::entry::BenchmarkEntry; use super::entry::CommitId; use super::entry::NameId; +/// Base URL for the S3 bucket containing benchmark data. +const S3_BASE_URL: &str = "https://vortex-benchmark-results-database.s3.amazonaws.com"; + /// Reads benchmark entries from an S3 object containing a Vortex file. /// -/// This function downloads the Vortex file from S3, parses the columnar struct array, and converts -/// it to a vector of row-wise [`BenchmarkEntry`] structs. +/// This function downloads the Vortex file from S3 using HTTP (the bucket is public), parses the +/// columnar struct array, and converts it to a vector of row-wise [`BenchmarkEntry`] structs. /// /// # Arguments /// -/// * `client` - The AWS S3 client to use for operations. /// * `session` - The Vortex session for reading files. -/// * `bucket` - The S3 bucket name. -/// * `key` - The S3 object key. +/// * `key` - The S3 object key (e.g., "test/random_access.vortex"). /// /// # Errors /// /// Returns an error if: -/// - The S3 object does not exist or cannot be downloaded. +/// - The HTTP request fails. /// - The file is not a valid Vortex file. /// - The schema does not match the expected [`BenchmarkEntry`] schema. pub async fn read_benchmark_entries( - client: &Client, session: &VortexSession, - bucket: &str, key: &str, ) -> VortexResult> { - // Download the file from S3. - let get_result = client - .get_object() - .bucket(bucket) - .key(key) - .send() + let url = format!("{}/{}", S3_BASE_URL, key); + + let response = reqwest::get(&url) .await - .map_err(|e| vortex_err!("Failed to download S3 object: {}", e))?; + .map_err(|e| vortex_err!("Failed to fetch {}: {}", url, e))?; + + if !response.status().is_success() { + vortex_bail!( + "HTTP error fetching {}: {} {}", + url, + response.status().as_u16(), + response.status().as_str() + ); + } - let bytes = get_result - .body - .collect() + let bytes = response + .bytes() .await - .map_err(|e| vortex_err!("Failed to read S3 object body: {}", e))? - .into_bytes(); + .map_err(|e| vortex_err!("Failed to read response body: {}", e))?; // Parse as Vortex file and read all data. - let file = session.open_options().open_buffer(bytes)?; + let file = session.open_options().open_buffer(bytes.to_vec())?; let array = file.scan()?.into_array_stream()?.read_all().await?; // Convert the array to benchmark entries. diff --git a/vortex-wasm/src/website/update_s3.rs b/vortex-wasm/src/website/update_s3.rs new file mode 100644 index 00000000000..6602df23ee7 --- /dev/null +++ b/vortex-wasm/src/website/update_s3.rs @@ -0,0 +1,298 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Atomic S3 update operations for Vortex files using the AWS CLI. +//! +//! This module provides functions to read a Vortex file from S3, apply a transformation, and write +//! the result back atomically using optimistic concurrency control via ETags. + +use std::fs; +use std::io::Write; +use std::process::Command; +use std::time::Duration; + +use tempfile::NamedTempFile; +use vortex::array::ArrayRef; +use vortex::array::builders::builder_with_capacity; +use vortex::array::stream::ArrayStreamExt; +use vortex::error::VortexResult; +use vortex::error::vortex_bail; +use vortex::error::vortex_err; +use vortex::file::OpenOptionsSessionExt; +use vortex::file::WriteOptionsSessionExt; +use vortex::session::VortexSession; + +use super::entry::BenchmarkEntry; + +const MAX_RETRIES: u32 = 5; + +/// Internal error type for retry control. +enum UpdateError { + /// The ETag has changed since we read the object. The operation should be retried. + EtagMismatch, + /// A non-retryable error occurred. + Other(String), +} + +/// Gets the current ETag of an S3 object using the AWS CLI. +fn get_etag(bucket: &str, key: &str) -> Result { + let output = Command::new("aws") + .args([ + "s3api", + "head-object", + "--bucket", + bucket, + "--key", + key, + "--query", + "ETag", + "--output", + "text", + ]) + .output() + .map_err(|e| UpdateError::Other(format!("Failed to run aws CLI: {}", e)))?; + + if !output.status.success() { + return Err(UpdateError::Other(format!( + "aws s3api head-object failed: {}", + String::from_utf8_lossy(&output.stderr) + ))); + } + + let etag = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if etag.is_empty() || etag == "null" { + return Err(UpdateError::Other("Failed to retrieve ETag".to_string())); + } + + Ok(etag) +} + +/// Downloads an S3 object to a local file using the AWS CLI with ETag matching. +fn download_object( + bucket: &str, + key: &str, + etag: &str, + dest_path: &str, +) -> Result<(), UpdateError> { + let output = Command::new("aws") + .args([ + "s3api", + "get-object", + "--bucket", + bucket, + "--key", + key, + "--if-match", + etag, + dest_path, + ]) + .output() + .map_err(|e| UpdateError::Other(format!("Failed to run aws CLI: {}", e)))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + if stderr.contains("PreconditionFailed") || stderr.contains("412") { + return Err(UpdateError::EtagMismatch); + } + return Err(UpdateError::Other(format!( + "aws s3api get-object failed: {}", + stderr + ))); + } + + Ok(()) +} + +/// Uploads a local file to S3 using the AWS CLI with ETag matching. +fn upload_object(bucket: &str, key: &str, etag: &str, src_path: &str) -> Result<(), UpdateError> { + let output = Command::new("aws") + .args([ + "s3api", + "put-object", + "--bucket", + bucket, + "--key", + key, + "--if-match", + etag, + "--body", + src_path, + ]) + .output() + .map_err(|e| UpdateError::Other(format!("Failed to run aws CLI: {}", e)))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + if stderr.contains("PreconditionFailed") || stderr.contains("412") { + return Err(UpdateError::EtagMismatch); + } + return Err(UpdateError::Other(format!( + "aws s3api put-object failed: {}", + stderr + ))); + } + + Ok(()) +} + +/// Updates a Vortex file stored in S3 atomically using optimistic concurrency control. +/// +/// This function reads the existing file from S3, applies a transformation, and writes it back +/// using conditional puts with ETags. If another process modifies the file between read and write, +/// the operation is automatically retried. +/// +/// # Arguments +/// +/// * `session` - The Vortex session for reading and writing files. +/// * `bucket` - The S3 bucket name. +/// * `key` - The S3 object key. +/// * `update_fn` - A function that takes the file's array data and returns the updated array. +/// The returned array must have the same dtype as the input. This function may be called +/// multiple times if retries are needed. +/// +/// # Errors +/// +/// Returns an error if: +/// - The S3 object does not exist. +/// - The update function returns an error. +/// - The update function returns an array with a different dtype. +/// - The retry limit is reached without success. +/// - An S3 operation fails with a non-retryable error. +pub fn update_s3_object( + session: &VortexSession, + bucket: &str, + key: &str, + mut update_fn: F, +) -> VortexResult<()> +where + F: FnMut(ArrayRef) -> VortexResult, +{ + let runtime = tokio::runtime::Runtime::new() + .map_err(|e| vortex_err!("Failed to create tokio runtime: {}", e))?; + + for attempt in 0..MAX_RETRIES { + match try_update_s3_object(session, bucket, key, &mut update_fn, &runtime) { + Ok(()) => return Ok(()), + Err(UpdateError::EtagMismatch) => { + eprintln!("ETag mismatch on attempt {}. Retrying...", attempt + 1); + std::thread::sleep(Duration::from_millis(100 * (1 << attempt))); + } + Err(UpdateError::Other(e)) => { + vortex_bail!("S3 update failed: {}", e); + } + } + } + + vortex_bail!("Failed to update S3 object after {} attempts", MAX_RETRIES) +} + +/// Attempts a single update of an S3 object. +fn try_update_s3_object( + session: &VortexSession, + bucket: &str, + key: &str, + update_fn: &mut F, + runtime: &tokio::runtime::Runtime, +) -> Result<(), UpdateError> +where + F: FnMut(ArrayRef) -> VortexResult, +{ + // Get current ETag. + let etag = get_etag(bucket, key)?; + + // Download to temp file. + let download_file = NamedTempFile::new() + .map_err(|e| UpdateError::Other(format!("Failed to create temp file: {}", e)))?; + let download_path = download_file.path().to_string_lossy().to_string(); + + download_object(bucket, key, &etag, &download_path)?; + + // Read and parse. + let existing_bytes = fs::read(&download_path) + .map_err(|e| UpdateError::Other(format!("Failed to read downloaded file: {}", e)))?; + + let file = session + .open_options() + .open_buffer(existing_bytes) + .map_err(|e| UpdateError::Other(format!("Failed to open Vortex file: {}", e)))?; + + let original_dtype = file.dtype().clone(); + + let existing_array = runtime + .block_on(async { file.scan()?.into_array_stream()?.read_all().await }) + .map_err(|e| UpdateError::Other(format!("Failed to read array: {}", e)))?; + + // Apply the user's update function. + let updated_array = update_fn(existing_array) + .map_err(|e| UpdateError::Other(format!("Update function failed: {}", e)))?; + + // Validate that the dtype matches. + if updated_array.dtype() != &original_dtype { + return Err(UpdateError::Other(format!( + "Update function changed dtype from {} to {}. \ + The updated array must have the same dtype as the input file.", + original_dtype, + updated_array.dtype() + ))); + } + + // Serialize updated array to Vortex file bytes. + let mut buffer = Vec::new(); + runtime + .block_on(async { + session + .write_options() + .write(&mut buffer, updated_array.to_array_stream()) + .await + }) + .map_err(|e| UpdateError::Other(format!("Failed to serialize array: {}", e)))?; + + // Write to temp file for upload. + let mut upload_file = NamedTempFile::new() + .map_err(|e| UpdateError::Other(format!("Failed to create temp file: {}", e)))?; + upload_file + .write_all(&buffer) + .map_err(|e| UpdateError::Other(format!("Failed to write temp file: {}", e)))?; + upload_file + .flush() + .map_err(|e| UpdateError::Other(format!("Failed to flush temp file: {}", e)))?; + + let upload_path = upload_file.path().to_string_lossy().to_string(); + + // Upload with if-match. + upload_object(bucket, key, &etag, &upload_path)?; + + Ok(()) +} + +/// Appends a single [`BenchmarkEntry`] to a Vortex file stored in S3. +/// +/// This function uses [`update_s3_object`] with optimistic concurrency control to atomically +/// append the entry to the existing data. If concurrent modifications are detected, the operation +/// is automatically retried. +/// +/// # Arguments +/// +/// * `session` - The Vortex session for reading and writing files. +/// * `bucket` - The S3 bucket name. +/// * `key` - The S3 object key. +/// * `entry` - The benchmark entry to append. +pub fn append_benchmark_entry( + session: &VortexSession, + bucket: &str, + key: &str, + entry: &BenchmarkEntry, +) -> VortexResult<()> { + let scalar = entry.into_scalar(); + + update_s3_object(session, bucket, key, |existing_array| { + let existing_len = existing_array.len(); + let dtype = existing_array.dtype().clone(); + + let mut builder = builder_with_capacity(&dtype, existing_len + 1); + builder.extend_from_array(&existing_array); + builder.append_scalar(&scalar)?; + + Ok(builder.finish()) + }) +} From 8cb8a7702505ff2e763e1712aef411abece3be95 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 2 Dec 2025 17:20:27 -0500 Subject: [PATCH 08/30] vortex wasm works Signed-off-by: Connor Tsui --- Cargo.lock | 12 +- vortex-file/Cargo.toml | 6 +- vortex-file/src/lib.rs | 3 + vortex-file/src/open.rs | 8 +- vortex-io/Cargo.toml | 11 +- vortex-io/src/lib.rs | 2 + vortex-io/src/write.rs | 1 + vortex-layout/Cargo.toml | 7 +- vortex-layout/src/segments/cache.rs | 11 ++ vortex-wasm/Cargo.toml | 44 ++++-- vortex-wasm/src/bin/migrate_random_access.rs | 19 +-- vortex-wasm/src/bin/test_s3_update.rs | 34 ++++- vortex-wasm/src/lib.rs | 144 ++++++++++--------- vortex-wasm/src/website/mod.rs | 3 + vortex-wasm/src/website/names.rs | 20 ++- vortex-wasm/src/website/read_s3.rs | 12 +- vortex-wasm/src/website/update_s3.rs | 117 +++++++++------ 17 files changed, 292 insertions(+), 162 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 977f6ef927c..8a6d457289a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1749,6 +1749,16 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "console_error_panic_hook" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +dependencies = [ + "cfg-if", + "wasm-bindgen", +] + [[package]] name = "const-oid" version = "0.9.6" @@ -9633,8 +9643,8 @@ name = "vortex-wasm" version = "0.1.0" dependencies = [ "async-fs", + "console_error_panic_hook", "hex", - "js-sys", "phf 0.13.1", "reqwest", "serde", diff --git a/vortex-file/Cargo.toml b/vortex-file/Cargo.toml index 11c5c9e7b6f..aa28757d0c3 100644 --- a/vortex-file/Cargo.toml +++ b/vortex-file/Cargo.toml @@ -17,7 +17,6 @@ version = { workspace = true } all-features = true [dependencies] -async-fs = { workspace = true } async-trait = { workspace = true } bytes = { workspace = true } flatbuffers = { workspace = true } @@ -57,6 +56,11 @@ vortex-utils = { workspace = true, features = ["dashmap"] } vortex-zigzag = { workspace = true } vortex-zstd = { workspace = true, optional = true } +# async-fs uses std::time which is not available on WASM. It's only used in update.rs which is +# already feature-gated for non-WASM. +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +async-fs = { workspace = true } + [dev-dependencies] tokio = { workspace = true, features = ["full"] } vortex-array = { workspace = true, features = ["test-harness"] } diff --git a/vortex-file/src/lib.rs b/vortex-file/src/lib.rs index eba5571aef8..a76ed2d69f8 100644 --- a/vortex-file/src/lib.rs +++ b/vortex-file/src/lib.rs @@ -99,6 +99,8 @@ pub mod segments; mod strategy; #[cfg(test)] mod tests; +// The update module uses blocking runtimes that are not available in WASM. +#[cfg(not(target_arch = "wasm32"))] mod update; mod writer; @@ -107,6 +109,7 @@ pub use footer::*; pub use forever_constant::*; pub use open::*; pub use strategy::*; +#[cfg(not(target_arch = "wasm32"))] pub use update::*; use vortex_alp::ALPRDVTable; use vortex_alp::ALPVTable; diff --git a/vortex-file/src/open.rs b/vortex-file/src/open.rs index 1ee6d08feb2..48164c40afe 100644 --- a/vortex-file/src/open.rs +++ b/vortex-file/src/open.rs @@ -12,6 +12,7 @@ use vortex_dtype::DType; use vortex_error::VortexError; use vortex_error::VortexExpect; use vortex_error::VortexResult; +#[cfg(not(target_arch = "wasm32"))] use vortex_io::InstrumentedReadAt; use vortex_io::VortexReadAt; use vortex_io::file::IntoReadSource; @@ -159,7 +160,12 @@ impl VortexOpenOptions { /// /// This is a low-level API and we strongly recommend using [`VortexOpenOptions::open`]. pub async fn open_read_at(self, read: R) -> VortexResult { - let read = Arc::new(InstrumentedReadAt::new(Arc::new(read), &self.metrics)); + // On WASM, skip instrumentation because it uses std::time which is not available. + #[cfg(target_arch = "wasm32")] + let read: Arc = Arc::new(read); + #[cfg(not(target_arch = "wasm32"))] + let read: Arc = + Arc::new(InstrumentedReadAt::new(Arc::new(read), &self.metrics)); let footer = if let Some(footer) = self.footer { footer diff --git a/vortex-io/Cargo.toml b/vortex-io/Cargo.toml index eae3e778009..90d329d6530 100644 --- a/vortex-io/Cargo.toml +++ b/vortex-io/Cargo.toml @@ -17,8 +17,6 @@ version = { workspace = true } all-features = true [dependencies] -async-compat = { workspace = true } -async-fs = { workspace = true } async-stream = { workspace = true } async-trait = { workspace = true } bytes = { workspace = true } @@ -26,15 +24,13 @@ cfg-if = { workspace = true } futures = { workspace = true, features = ["std", "executor"] } # Needed to pickup the "wasm_js" feature for wasm targets from the workspace configuration getrandom_v03 = { workspace = true } +handle = "1.0.2" kanal = { workspace = true } log = { workspace = true } object_store = { workspace = true, optional = true, features = ["fs"] } oneshot = { workspace = true } parking_lot = { workspace = true } pin-project-lite = { workspace = true } -# this is the maximum subset of fetaures that is safe for wasm32 targets -handle = "1.0.2" -tokio = { workspace = true, features = ["io-util", "rt", "sync"] } tracing = { workspace = true } vortex-buffer = { workspace = true } vortex-error = { workspace = true } @@ -42,8 +38,11 @@ vortex-metrics = { workspace = true } vortex-session = { workspace = true } [target.'cfg(not(target_arch = "wasm32"))'.dependencies] -# Smol is our default impl, so we don't want it to be optional, but it cannot be part of wasm +# These deps use std::time which is not available in WASM. +async-compat = { workspace = true } +async-fs = { workspace = true } smol = { workspace = true } +tokio = { workspace = true, features = ["io-util", "rt", "sync"] } [target.'cfg(target_arch = "wasm32")'.dependencies] wasm-bindgen-futures = { workspace = true } diff --git a/vortex-io/src/lib.rs b/vortex-io/src/lib.rs index 6a08c821c8f..de1f9f4882b 100644 --- a/vortex-io/src/lib.rs +++ b/vortex-io/src/lib.rs @@ -11,6 +11,7 @@ //! flags implements the core traits for several common async runtimes and backing stores. pub use io_buf::*; +#[cfg(not(target_arch = "wasm32"))] pub use limit::*; #[cfg(feature = "object_store")] pub use object_store::*; @@ -20,6 +21,7 @@ pub use write::*; pub mod file; mod io_buf; pub mod kanal_ext; +#[cfg(not(target_arch = "wasm32"))] mod limit; #[cfg(feature = "object_store")] mod object_store; diff --git a/vortex-io/src/write.rs b/vortex-io/src/write.rs index 57048ce66ef..ce61aadcbf4 100644 --- a/vortex-io/src/write.rs +++ b/vortex-io/src/write.rs @@ -95,6 +95,7 @@ impl VortexWrite for &mut W { } } +#[cfg(not(target_arch = "wasm32"))] impl VortexWrite for async_fs::File { async fn write_all(&mut self, buffer: B) -> io::Result { AsyncWriteExt::write_all(self, buffer.as_slice()).await?; diff --git a/vortex-layout/Cargo.toml b/vortex-layout/Cargo.toml index 4e062f66ba3..4b2f106fe75 100644 --- a/vortex-layout/Cargo.toml +++ b/vortex-layout/Cargo.toml @@ -26,7 +26,6 @@ futures = { workspace = true, features = ["alloc", "async-await", "executor"] } itertools = { workspace = true } kanal = { workspace = true } log = { workspace = true } -moka = { workspace = true, features = ["future"] } once_cell = { workspace = true, features = ["parking_lot"] } oneshot = { workspace = true } parking_lot = { workspace = true } @@ -34,7 +33,6 @@ paste = { workspace = true } pco = { workspace = true } pin-project-lite = { workspace = true } prost = { workspace = true } -rustc-hash = { workspace = true } termtree = { workspace = true } tokio = { workspace = true, features = ["rt"], optional = true } uuid = { workspace = true } @@ -55,6 +53,11 @@ vortex-session = { workspace = true } vortex-utils = { workspace = true, features = ["dashmap"] } vortex-zstd = { workspace = true, optional = true } +# Moka uses std::time::Instant which is not available on WASM. +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +moka = { workspace = true, features = ["future"] } +rustc-hash = { workspace = true } + [dev-dependencies] futures = { workspace = true, features = ["executor"] } rstest = { workspace = true } diff --git a/vortex-layout/src/segments/cache.rs b/vortex-layout/src/segments/cache.rs index ce023b50f98..fdea8d3b9a3 100644 --- a/vortex-layout/src/segments/cache.rs +++ b/vortex-layout/src/segments/cache.rs @@ -5,12 +5,17 @@ use std::sync::Arc; use async_trait::async_trait; use futures::FutureExt; +#[cfg(not(target_arch = "wasm32"))] use moka::future::Cache; +#[cfg(not(target_arch = "wasm32"))] use moka::future::CacheBuilder; +#[cfg(not(target_arch = "wasm32"))] use moka::policy::EvictionPolicy; +#[cfg(not(target_arch = "wasm32"))] use rustc_hash::FxBuildHasher; use vortex_buffer::BufferHandle; use vortex_buffer::ByteBuffer; +#[cfg(not(target_arch = "wasm32"))] use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_metrics::Counter; @@ -41,8 +46,13 @@ impl SegmentCache for NoOpSegmentCache { } /// A [`SegmentCache`] based around an in-memory Moka cache. +/// +/// This cache is not available on WASM targets because moka uses `std::time::Instant` which is not +/// supported. Use [`NoOpSegmentCache`] for WASM targets instead. +#[cfg(not(target_arch = "wasm32"))] pub struct MokaSegmentCache(Cache); +#[cfg(not(target_arch = "wasm32"))] impl MokaSegmentCache { pub fn new(max_capacity_bytes: u64) -> Self { Self( @@ -61,6 +71,7 @@ impl MokaSegmentCache { } } +#[cfg(not(target_arch = "wasm32"))] #[async_trait] impl SegmentCache for MokaSegmentCache { async fn get(&self, id: SegmentId) -> VortexResult> { diff --git a/vortex-wasm/Cargo.toml b/vortex-wasm/Cargo.toml index 8861d33d4be..855fa842854 100644 --- a/vortex-wasm/Cargo.toml +++ b/vortex-wasm/Cargo.toml @@ -16,18 +16,31 @@ version.workspace = true [lib] crate-type = ["cdylib", "rlib"] +[features] +default = [] +# Native feature for binaries - includes tokio runtime and file system dependencies. +native = [ + "dep:async-fs", + "dep:serde_json", + "dep:tempfile", + "dep:tokio", + "dep:tracing", + "vortex/tokio", +] + [dependencies] -async-fs = { workspace = true } +# Core dependencies for both WASM and native. +console_error_panic_hook = "0.1" hex = { workspace = true } phf = { workspace = true } reqwest = { workspace = true } serde = { workspace = true, features = ["derive"] } serde-wasm-bindgen = "0.6" -serde_json = { workspace = true } -tempfile = { workspace = true } -tokio = { workspace = true, features = ["full"] } -tracing = { workspace = true } -vortex = { workspace = true, features = ["tokio"] } +vortex = { workspace = true } +wasm-bindgen = "0.2" +wasm-bindgen-futures = { workspace = true } + +# These are needed by binaries - accessed via vortex re-exports for library code. vortex-array = { workspace = true } vortex-buffer = { workspace = true } vortex-dtype = { workspace = true } @@ -38,35 +51,42 @@ vortex-layout = { workspace = true } vortex-metrics = { workspace = true } vortex-scalar = { workspace = true } vortex-session = { workspace = true } -wasm-bindgen = "0.2" -wasm-bindgen-futures = { workspace = true } -[dependencies.web-sys] -features = ["console", "Window", "Response"] -version = "0.3" +# Native-only dependencies (for binaries). +async-fs = { workspace = true, optional = true } +serde_json = { workspace = true, optional = true } +tempfile = { workspace = true, optional = true } +tokio = { workspace = true, features = ["full"], optional = true } +tracing = { workspace = true, optional = true } -[dependencies.js-sys] +[dependencies.web-sys] +features = ["console"] version = "0.3" [[bin]] name = "migrate" path = "src/bin/migrate.rs" +required-features = ["native"] [[bin]] name = "append" path = "src/bin/append.rs" +required-features = ["native"] [[bin]] name = "test_s3_read" path = "src/bin/test_s3_read.rs" +required-features = ["native"] [[bin]] name = "test_s3_update" path = "src/bin/test_s3_update.rs" +required-features = ["native"] [[bin]] name = "migrate_random_access" path = "src/bin/migrate_random_access.rs" +required-features = ["native"] [lints] workspace = true diff --git a/vortex-wasm/src/bin/migrate_random_access.rs b/vortex-wasm/src/bin/migrate_random_access.rs index 52500ae50a5..f1385a125ec 100644 --- a/vortex-wasm/src/bin/migrate_random_access.rs +++ b/vortex-wasm/src/bin/migrate_random_access.rs @@ -24,14 +24,7 @@ use vortex::dtype::FieldNames; use vortex::file::WriteOptionsSessionExt; use vortex::file::WriteStrategyBuilder; use vortex::session::VortexSession; - -/// Name ID constants from `bench-vortex/src/website/names.rs`. -mod name_ids { - pub const RANDOM_ACCESS: u32 = 2; - pub const VORTEX_NVME: u32 = 3; - pub const PARQUET_NVME: u32 = 4; - pub const LANCE_NVME: u32 = 5; -} +use vortex_wasm::website::names; /// Represents a benchmark entry from the JSON file. #[derive(Debug, Deserialize)] @@ -47,9 +40,9 @@ struct JsonEntry { /// Maps the JSON `name` field to a series name ID. fn series_name_id(name: &str) -> u32 { match name { - "random-access/vortex-tokio-local-disk" => name_ids::VORTEX_NVME, - "random-access/parquet-tokio-local-disk" => name_ids::PARQUET_NVME, - "random-access/lance-tokio-local-disk" => name_ids::LANCE_NVME, + "random-access/vortex-tokio-local-disk" => names::VORTEX_NVME, + "random-access/parquet-tokio-local-disk" => names::PARQUET_NVME, + "random-access/lance-tokio-local-disk" => names::LANCE_NVME, _ => panic!("Unknown benchmark name: {}", name), } } @@ -93,8 +86,8 @@ async fn async_main() { commit_id_bytes.extend_from_slice(&bytes); // All entries have the same benchmark_group and chart_name. - benchmark_groups.push(name_ids::RANDOM_ACCESS); - chart_names.push(name_ids::RANDOM_ACCESS); + benchmark_groups.push(names::RANDOM_ACCESS); + chart_names.push(names::RANDOM_ACCESS); // Map name to series_name ID. series_names.push(series_name_id(&entry.name)); diff --git a/vortex-wasm/src/bin/test_s3_update.rs b/vortex-wasm/src/bin/test_s3_update.rs index 80f66825f42..c462bf2394e 100644 --- a/vortex-wasm/src/bin/test_s3_update.rs +++ b/vortex-wasm/src/bin/test_s3_update.rs @@ -2,6 +2,10 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors //! Test binary for testing the `update_s3_object` function using the AWS CLI. +//! +//! Usage: +//! cargo run -p vortex-wasm --bin test_s3_update -- --profile +//! cargo run -p vortex-wasm --bin test_s3_update -- --upload --profile #![allow(clippy::expect_used, clippy::exit)] @@ -16,6 +20,7 @@ use vortex::dtype::DType; use vortex::dtype::FieldNames; use vortex::dtype::Nullability::NonNullable; use vortex::dtype::PType; +use vortex::io::session::RuntimeSessionExt; use vortex::scalar::Scalar; use vortex::session::VortexSession; use vortex_wasm::website::update_s3::update_s3_object; @@ -24,20 +29,35 @@ const BUCKET: &str = "vortex-benchmark-results-database"; const KEY: &str = "test/random_access.vortex"; fn main() { - let session = VortexSession::default(); + let session = VortexSession::default().with_tokio(); + let args: Vec = env::args().collect(); + + // Parse --profile argument. + let profile = args + .iter() + .position(|a| a == "--profile") + .and_then(|i| args.get(i + 1)) + .map(String::as_str); + + if profile.is_none() { + eprintln!("Warning: No --profile specified. AWS CLI will use default credentials."); + eprintln!("Usage: test_s3_update [--upload] --profile "); + } // Check for --upload flag. - let args: Vec = env::args().collect(); if args.iter().any(|a| a == "--upload") { println!("Uploading random_access.vortex to S3..."); let local_path = "/Users/connor/spiral/vortex-data/vortex/vortex-wasm/random_access.vortex"; let file_bytes = fs::read(local_path).expect("Failed to read local file"); let size = file_bytes.len(); - let status = Command::new("aws") - .args(["s3", "cp", local_path, &format!("s3://{}/{}", BUCKET, KEY)]) - .status() - .expect("Failed to run aws CLI"); + let mut cmd = Command::new("aws"); + cmd.args(["s3", "cp", local_path, &format!("s3://{}/{}", BUCKET, KEY)]); + if let Some(p) = profile { + cmd.args(["--profile", p]); + } + + let status = cmd.status().expect("Failed to run aws CLI"); if !status.success() { eprintln!("Failed to upload to S3"); @@ -50,7 +70,7 @@ fn main() { // Single update test. println!("\nTesting update_s3_object..."); - let result = update_s3_object(&session, BUCKET, KEY, |existing_array| { + let result = update_s3_object(&session, BUCKET, KEY, profile, |existing_array| { let existing_len = existing_array.len(); println!(" Existing array has {} entries", existing_len); diff --git a/vortex-wasm/src/lib.rs b/vortex-wasm/src/lib.rs index 846b54af71e..efcb7d6ebd1 100644 --- a/vortex-wasm/src/lib.rs +++ b/vortex-wasm/src/lib.rs @@ -8,77 +8,85 @@ pub mod website; -use serde::Serialize; -use vortex::VortexSessionDefault; -use vortex::session::VortexSession; -use wasm_bindgen::prelude::*; -use website::names::NAMES; -use website::read_s3::read_benchmark_entries; - -const KEY: &str = "test/random_access.vortex"; - -/// Helper macro for logging to browser console. -macro_rules! log { - ($($t:tt)*) => { - web_sys::console::log_1(&format!($($t)*).into()); +#[cfg(target_arch = "wasm32")] +mod wasm_bindings { + use serde::Serialize; + use vortex::VortexSessionDefault; + use vortex::io::runtime::wasm::WasmRuntime; + use vortex::io::session::RuntimeSessionExt; + use vortex::session::VortexSession; + use wasm_bindgen::prelude::*; + + use crate::website::names::NAMES; + use crate::website::read_s3::read_benchmark_entries; + + const KEY: &str = "test/random_access.vortex"; + + /// Helper macro for logging to browser console. + macro_rules! log { + ($($t:tt)*) => { + web_sys::console::log_1(&format!($($t)*).into()); + } } -} -/// A single random-access benchmark entry for JavaScript. -#[derive(Serialize)] -pub struct JsEntry { - pub commit_id: String, - pub series_name: &'static str, - pub value_ms: f64, -} + /// A single random-access benchmark entry for JavaScript. + #[derive(Serialize)] + pub struct JsEntry { + pub commit_id: String, + pub series_name: &'static str, + pub value_ms: f64, + } -/// Load random-access benchmark data from S3. -/// -/// This function fetches the Vortex file from S3, parses it, and returns an array of benchmark -/// entries ready for rendering. -/// -/// # Returns -/// -/// A JavaScript array of objects with: -/// - `commit_id`: 40-character hex string (SHA-1 hash) -/// - `series_name`: One of "vortex-nvme", "parquet-nvme", "lance-nvme" -/// - `value_ms`: Value in milliseconds -#[wasm_bindgen] -pub async fn load_random_access_data() -> Result { - log!("Loading random-access benchmark data..."); - - let session = VortexSession::default(); - - let entries = read_benchmark_entries(&session, KEY) - .await - .map_err(|e| JsValue::from_str(&format!("Failed to read benchmark entries: {}", e)))?; - - log!("Loaded {} entries", entries.len()); - - // Convert to JS-friendly format. - let js_entries: Vec = entries - .iter() - .map(|e| JsEntry { - commit_id: e.commit_id.to_string(), - series_name: NAMES.get(&e.series_name.0).copied().unwrap_or("unknown"), - value_ms: e.value as f64 / 1_000_000.0, - }) - .collect(); - - log!("Returning {} JS entries", js_entries.len()); - - serde_wasm_bindgen::to_value(&js_entries) - .map_err(|e| JsValue::from_str(&format!("Failed to serialize: {}", e))) -} + /// Load random-access benchmark data from S3. + /// + /// This function fetches the Vortex file from S3, parses it, and returns an array of benchmark + /// entries ready for rendering. + /// + /// # Returns + /// + /// A JavaScript array of objects with: + /// - `commit_id`: 40-character hex string (SHA-1 hash) + /// - `series_name`: One of "vortex-nvme", "parquet-nvme", "lance-nvme" + /// - `value_ms`: Value in milliseconds + #[wasm_bindgen] + pub async fn load_random_access_data() -> Result { + log!("Loading random-access benchmark data..."); -/// Initialize the WASM module. -#[wasm_bindgen(start)] -pub fn init() { - log!("vortex-wasm initialized"); -} + // Create a session configured with the WASM runtime. + let session = VortexSession::default().with_handle(WasmRuntime::handle()); + + let entries = read_benchmark_entries(&session, KEY) + .await + .map_err(|e| JsValue::from_str(&format!("Failed to read benchmark entries: {}", e)))?; -/// Get version information. -#[wasm_bindgen] -pub fn get_version() -> String { - format!("vortex-wasm v{}", env!("CARGO_PKG_VERSION")) + log!("Loaded {} entries", entries.len()); + + // Convert to JS-friendly format. + let js_entries: Vec = entries + .iter() + .map(|e| JsEntry { + commit_id: e.commit_id.to_string(), + series_name: NAMES.get(&e.series_name.0).copied().unwrap_or("unknown"), + value_ms: e.value as f64 / 1_000_000.0, + }) + .collect(); + + log!("Returning {} JS entries", js_entries.len()); + + serde_wasm_bindgen::to_value(&js_entries) + .map_err(|e| JsValue::from_str(&format!("Failed to serialize: {}", e))) + } + + /// Initialize the WASM module. + #[wasm_bindgen(start)] + pub fn init() { + console_error_panic_hook::set_once(); + log!("vortex-wasm initialized"); + } + + /// Get version information. + #[wasm_bindgen] + pub fn get_version() -> String { + format!("vortex-wasm v{}", env!("CARGO_PKG_VERSION")) + } } diff --git a/vortex-wasm/src/website/mod.rs b/vortex-wasm/src/website/mod.rs index b9fbb134503..49df715cfbc 100644 --- a/vortex-wasm/src/website/mod.rs +++ b/vortex-wasm/src/website/mod.rs @@ -4,4 +4,7 @@ pub mod entry; pub mod names; pub mod read_s3; + +// update_s3 uses tokio and std::process::Command which are not available in WASM. +#[cfg(feature = "native")] pub mod update_s3; diff --git a/vortex-wasm/src/website/names.rs b/vortex-wasm/src/website/names.rs index 27e911c45f1..9226f344985 100644 --- a/vortex-wasm/src/website/names.rs +++ b/vortex-wasm/src/website/names.rs @@ -6,13 +6,21 @@ use phf::Map; use phf::phf_map; +// Name ID constants. +pub const NULL: u32 = 0; +pub const INVALID: u32 = 1; +pub const RANDOM_ACCESS: u32 = 2; +pub const VORTEX_NVME: u32 = 3; +pub const PARQUET_NVME: u32 = 4; +pub const LANCE_NVME: u32 = 5; + // TODO(connor): This should probably be generated smarter. /// Maps name IDs to their string representations. pub static NAMES: Map = phf_map! { - 0 => "null", - 1 => "invalid", - 2 => "random-access", - 3 => "vortex-nvme", - 4 => "parquet-nvme", - 5 => "lance-nvme", + 0u32 => "null", + 1u32 => "invalid", + 2u32 => "random-access", + 3u32 => "vortex-nvme", + 4u32 => "parquet-nvme", + 5u32 => "lance-nvme", }; diff --git a/vortex-wasm/src/website/read_s3.rs b/vortex-wasm/src/website/read_s3.rs index 9cdb1c8d320..df51d00ecdf 100644 --- a/vortex-wasm/src/website/read_s3.rs +++ b/vortex-wasm/src/website/read_s3.rs @@ -63,10 +63,18 @@ pub async fn read_benchmark_entries( .map_err(|e| vortex_err!("Failed to read response body: {}", e))?; // Parse as Vortex file and read all data. - let file = session.open_options().open_buffer(bytes.to_vec())?; + // Note: We use `open_read_at` directly instead of `open_buffer` because `open_buffer` uses + // `futures::executor::block_on` which requires `std::time` (not available in WASM). + let buffer: vortex::buffer::ByteBuffer = bytes.to_vec().into(); + let file = session + .open_options() + .with_initial_read_size(0) + .without_segment_cache() + .open_read_at(buffer) + .await?; + let array = file.scan()?.into_array_stream()?.read_all().await?; - // Convert the array to benchmark entries. array_to_benchmark_entries(&array) } diff --git a/vortex-wasm/src/website/update_s3.rs b/vortex-wasm/src/website/update_s3.rs index 6602df23ee7..9ef5c5f2056 100644 --- a/vortex-wasm/src/website/update_s3.rs +++ b/vortex-wasm/src/website/update_s3.rs @@ -24,7 +24,7 @@ use vortex::session::VortexSession; use super::entry::BenchmarkEntry; -const MAX_RETRIES: u32 = 5; +const MAX_RETRIES: u32 = 8; /// Internal error type for retry control. enum UpdateError { @@ -34,21 +34,34 @@ enum UpdateError { Other(String), } +/// Builds AWS CLI arguments, optionally including a profile. +fn aws_args(base_args: &[&str], profile: Option<&str>) -> Vec { + let mut args: Vec = base_args.iter().map(|s| s.to_string()).collect(); + if let Some(p) = profile { + args.push("--profile".to_string()); + args.push(p.to_string()); + } + args +} + /// Gets the current ETag of an S3 object using the AWS CLI. -fn get_etag(bucket: &str, key: &str) -> Result { +fn get_etag(bucket: &str, key: &str, profile: Option<&str>) -> Result { + let base_args = [ + "s3api", + "head-object", + "--bucket", + bucket, + "--key", + key, + "--query", + "ETag", + "--output", + "text", + ]; + let args = aws_args(&base_args, profile); + let output = Command::new("aws") - .args([ - "s3api", - "head-object", - "--bucket", - bucket, - "--key", - key, - "--query", - "ETag", - "--output", - "text", - ]) + .args(&args) .output() .map_err(|e| UpdateError::Other(format!("Failed to run aws CLI: {}", e)))?; @@ -73,19 +86,23 @@ fn download_object( key: &str, etag: &str, dest_path: &str, + profile: Option<&str>, ) -> Result<(), UpdateError> { + let base_args = [ + "s3api", + "get-object", + "--bucket", + bucket, + "--key", + key, + "--if-match", + etag, + dest_path, + ]; + let args = aws_args(&base_args, profile); + let output = Command::new("aws") - .args([ - "s3api", - "get-object", - "--bucket", - bucket, - "--key", - key, - "--if-match", - etag, - dest_path, - ]) + .args(&args) .output() .map_err(|e| UpdateError::Other(format!("Failed to run aws CLI: {}", e)))?; @@ -104,20 +121,29 @@ fn download_object( } /// Uploads a local file to S3 using the AWS CLI with ETag matching. -fn upload_object(bucket: &str, key: &str, etag: &str, src_path: &str) -> Result<(), UpdateError> { +fn upload_object( + bucket: &str, + key: &str, + etag: &str, + src_path: &str, + profile: Option<&str>, +) -> Result<(), UpdateError> { + let base_args = [ + "s3api", + "put-object", + "--bucket", + bucket, + "--key", + key, + "--if-match", + etag, + "--body", + src_path, + ]; + let args = aws_args(&base_args, profile); + let output = Command::new("aws") - .args([ - "s3api", - "put-object", - "--bucket", - bucket, - "--key", - key, - "--if-match", - etag, - "--body", - src_path, - ]) + .args(&args) .output() .map_err(|e| UpdateError::Other(format!("Failed to run aws CLI: {}", e)))?; @@ -146,6 +172,7 @@ fn upload_object(bucket: &str, key: &str, etag: &str, src_path: &str) -> Result< /// * `session` - The Vortex session for reading and writing files. /// * `bucket` - The S3 bucket name. /// * `key` - The S3 object key. +/// * `profile` - Optional AWS CLI profile name (e.g., from `aws sso login`). /// * `update_fn` - A function that takes the file's array data and returns the updated array. /// The returned array must have the same dtype as the input. This function may be called /// multiple times if retries are needed. @@ -162,6 +189,7 @@ pub fn update_s3_object( session: &VortexSession, bucket: &str, key: &str, + profile: Option<&str>, mut update_fn: F, ) -> VortexResult<()> where @@ -171,7 +199,7 @@ where .map_err(|e| vortex_err!("Failed to create tokio runtime: {}", e))?; for attempt in 0..MAX_RETRIES { - match try_update_s3_object(session, bucket, key, &mut update_fn, &runtime) { + match try_update_s3_object(session, bucket, key, profile, &mut update_fn, &runtime) { Ok(()) => return Ok(()), Err(UpdateError::EtagMismatch) => { eprintln!("ETag mismatch on attempt {}. Retrying...", attempt + 1); @@ -191,6 +219,7 @@ fn try_update_s3_object( session: &VortexSession, bucket: &str, key: &str, + profile: Option<&str>, update_fn: &mut F, runtime: &tokio::runtime::Runtime, ) -> Result<(), UpdateError> @@ -198,14 +227,14 @@ where F: FnMut(ArrayRef) -> VortexResult, { // Get current ETag. - let etag = get_etag(bucket, key)?; + let etag = get_etag(bucket, key, profile)?; // Download to temp file. let download_file = NamedTempFile::new() .map_err(|e| UpdateError::Other(format!("Failed to create temp file: {}", e)))?; let download_path = download_file.path().to_string_lossy().to_string(); - download_object(bucket, key, &etag, &download_path)?; + download_object(bucket, key, &etag, &download_path, profile)?; // Read and parse. let existing_bytes = fs::read(&download_path) @@ -260,7 +289,7 @@ where let upload_path = upload_file.path().to_string_lossy().to_string(); // Upload with if-match. - upload_object(bucket, key, &etag, &upload_path)?; + upload_object(bucket, key, &etag, &upload_path, profile)?; Ok(()) } @@ -276,16 +305,18 @@ where /// * `session` - The Vortex session for reading and writing files. /// * `bucket` - The S3 bucket name. /// * `key` - The S3 object key. +/// * `profile` - Optional AWS CLI profile name. /// * `entry` - The benchmark entry to append. pub fn append_benchmark_entry( session: &VortexSession, bucket: &str, key: &str, + profile: Option<&str>, entry: &BenchmarkEntry, ) -> VortexResult<()> { let scalar = entry.into_scalar(); - update_s3_object(session, bucket, key, |existing_array| { + update_s3_object(session, bucket, key, profile, |existing_array| { let existing_len = existing_array.len(); let dtype = existing_array.dtype().clone(); From 11f6eeb560c38804a93537b42c31df9a0fd4a5fa Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 3 Dec 2025 10:38:34 -0500 Subject: [PATCH 09/30] updates Signed-off-by: Connor Tsui --- plan.md | 16 +- vortex-wasm/src/website/entry.rs | 75 +++ vortex-wasm/src/website/read_s3.rs | 110 +---- vortex-wasm/wasm-test.html | 759 +++++++++++++++++++++++++++++ 4 files changed, 874 insertions(+), 86 deletions(-) create mode 100644 vortex-wasm/wasm-test.html diff --git a/plan.md b/plan.md index 5ac44435071..9bd48142b5a 100644 --- a/plan.md +++ b/plan.md @@ -34,7 +34,6 @@ _Approximately in order of priority:_ - Migrate the JavaScript code to use the Rust bindings - Test - ### Ideas ```rust @@ -67,15 +66,26 @@ fn main() { - We can simply read the entire file of all benchmarking data into memory, decompress in memory, add a new entry, compress, and then write back to S3 +### 1 file vs many files + +With 1 file, we have to stuff every different kind of benchmark into the same place, which isnt great +for compression and it means we have to do more work on read time to group data correctly (by benchmark group, chart, then series). + +The seemingly obvious alternative here is to have a different file per "same" data. But what exactly would these be grouped by? We definitely do not want to group by series as that makes it pretty +difficult to add a new series to a chart (maybe it's not terrible with some more engineering). It +also would mean that we would start to approach 1000+ files. + +We could also do a file per chart, as that maps much closer to how we generate these chart. One +program is generating all the data for one chart, but that program might also generate data for +multiple charts. This is definitely something we should look into later, but for now having a single +file that has all the data (all with the same schema) is the most flexible. ### Things to update Start with just the random access benchmark - generate a bunch of fake data and upload it to S3 - Add bindings to read and write `BenchmarkEntry` vortex arrays to and from S3 -USE IPC FORMAT INSTEAD - `query_bench` to post directly to S3 - `random_access` and `compress` to also post directly to S3 diff --git a/vortex-wasm/src/website/entry.rs b/vortex-wasm/src/website/entry.rs index 3c0ccb1caa4..6f814ec24b3 100644 --- a/vortex-wasm/src/website/entry.rs +++ b/vortex-wasm/src/website/entry.rs @@ -12,6 +12,13 @@ use vortex::dtype::Nullability::NonNullable; use vortex::dtype::PType; use vortex::dtype::StructFields; use vortex::scalar::Scalar; +use vortex_array::Array; +use vortex_array::ToCanonical; +use vortex_array::arrays::FixedSizeListArray; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::arrays::StructArray; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; /// A benchmark entry, grouped by benchmark group, then chart name, then series name. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] @@ -98,6 +105,74 @@ impl BenchmarkEntry { ], ) } + + /// Converts a Vortex array (expected to be a struct array) into a vector of [`BenchmarkEntry`]. + /// + /// The array must have the following schema: + /// - `commit_id`: FixedSizeList + /// - `benchmark_group`: u32 + /// - `chart_name`: u32 + /// - `series_name`: u32 + /// - `value`: u64 + pub fn vec_from_array(array: &dyn Array) -> VortexResult> { + // Convert to canonical struct array. + let struct_array: StructArray = array.to_struct(); + + let len = struct_array.len(); + let mut entries = Vec::with_capacity(len); + + // Extract each field. + let commit_id_field = struct_array.field_by_name("commit_id")?; + let benchmark_group_field = struct_array.field_by_name("benchmark_group")?; + let chart_name_field = struct_array.field_by_name("chart_name")?; + let series_name_field = struct_array.field_by_name("series_name")?; + let value_field = struct_array.field_by_name("value")?; + + // Convert commit_id to canonical fixed-size list and get the underlying bytes. + let commit_id_fsl: FixedSizeListArray = commit_id_field.to_fixed_size_list(); + if commit_id_fsl.list_size() != 20 { + vortex_bail!( + "Expected commit_id to have list_size 20, got {}", + commit_id_fsl.list_size() + ); + } + + // Get the elements as a primitive array of u8. + let commit_id_elements: PrimitiveArray = commit_id_fsl.elements().to_primitive(); + let commit_id_bytes: &[u8] = commit_id_elements.as_slice(); + + // Convert primitive fields. + let benchmark_group_prim: PrimitiveArray = benchmark_group_field.to_primitive(); + let benchmark_groups: &[u32] = benchmark_group_prim.as_slice(); + + let chart_name_prim: PrimitiveArray = chart_name_field.to_primitive(); + let chart_names: &[u32] = chart_name_prim.as_slice(); + + let series_name_prim: PrimitiveArray = series_name_field.to_primitive(); + let series_names: &[u32] = series_name_prim.as_slice(); + + let value_prim: PrimitiveArray = value_field.to_primitive(); + let values: &[u64] = value_prim.as_slice(); + + // Build the entries. + for i in 0..len { + // Extract the 20-byte commit_id for this row. + let start = i * 20; + let end = start + 20; + let mut commit_id_arr = [0u8; 20]; + commit_id_arr.copy_from_slice(&commit_id_bytes[start..end]); + + entries.push(BenchmarkEntry { + commit_id: CommitId(commit_id_arr), + benchmark_group: NameId(benchmark_groups[i]), + chart_name: NameId(chart_names[i]), + series_name: NameId(series_names[i]), + value: values[i], + }); + } + + Ok(entries) + } } /// String ID lookup so that we don't have to store the string every time. diff --git a/vortex-wasm/src/website/read_s3.rs b/vortex-wasm/src/website/read_s3.rs index df51d00ecdf..1d1abb5ff44 100644 --- a/vortex-wasm/src/website/read_s3.rs +++ b/vortex-wasm/src/website/read_s3.rs @@ -3,29 +3,23 @@ //! Functions for reading benchmark data from S3. -use vortex::array::Array; -use vortex::array::ToCanonical; -use vortex::array::arrays::FixedSizeListArray; -use vortex::array::arrays::PrimitiveArray; -use vortex::array::arrays::StructArray; use vortex::array::stream::ArrayStreamExt; use vortex::error::VortexResult; use vortex::error::vortex_bail; use vortex::error::vortex_err; use vortex::file::OpenOptionsSessionExt; use vortex::session::VortexSession; +use vortex_array::ArrayRef; use super::entry::BenchmarkEntry; -use super::entry::CommitId; -use super::entry::NameId; /// Base URL for the S3 bucket containing benchmark data. const S3_BASE_URL: &str = "https://vortex-benchmark-results-database.s3.amazonaws.com"; -/// Reads benchmark entries from an S3 object containing a Vortex file. +/// Reads a Vortex array from an S3 object. /// -/// This function downloads the Vortex file from S3 using HTTP (the bucket is public), parses the -/// columnar struct array, and converts it to a vector of row-wise [`BenchmarkEntry`] structs. +/// This function downloads the Vortex file from S3 using HTTP (the bucket is public) and +/// returns the parsed array. /// /// # Arguments /// @@ -37,11 +31,7 @@ const S3_BASE_URL: &str = "https://vortex-benchmark-results-database.s3.amazonaw /// Returns an error if: /// - The HTTP request fails. /// - The file is not a valid Vortex file. -/// - The schema does not match the expected [`BenchmarkEntry`] schema. -pub async fn read_benchmark_entries( - session: &VortexSession, - key: &str, -) -> VortexResult> { +pub async fn read_s3_array(session: &VortexSession, key: &str) -> VortexResult { let url = format!("{}/{}", S3_BASE_URL, key); let response = reqwest::get(&url) @@ -73,75 +63,29 @@ pub async fn read_benchmark_entries( .open_read_at(buffer) .await?; - let array = file.scan()?.into_array_stream()?.read_all().await?; - - array_to_benchmark_entries(&array) + file.scan()?.into_array_stream()?.read_all().await } -/// Converts a Vortex array (expected to be a struct array) into a vector of [`BenchmarkEntry`]. +/// Reads benchmark entries from an S3 object containing a Vortex file. /// -/// The array must have the following schema: -/// - `commit_id`: FixedSizeList -/// - `benchmark_group`: u32 -/// - `chart_name`: u32 -/// - `series_name`: u32 -/// - `value`: u64 -pub fn array_to_benchmark_entries(array: &dyn Array) -> VortexResult> { - // Convert to canonical struct array. - let struct_array: StructArray = array.to_struct(); - - let len = struct_array.len(); - let mut entries = Vec::with_capacity(len); - - // Extract each field. - let commit_id_field = struct_array.field_by_name("commit_id")?; - let benchmark_group_field = struct_array.field_by_name("benchmark_group")?; - let chart_name_field = struct_array.field_by_name("chart_name")?; - let series_name_field = struct_array.field_by_name("series_name")?; - let value_field = struct_array.field_by_name("value")?; - - // Convert commit_id to canonical fixed-size list and get the underlying bytes. - let commit_id_fsl: FixedSizeListArray = commit_id_field.to_fixed_size_list(); - if commit_id_fsl.list_size() != 20 { - vortex_bail!( - "Expected commit_id to have list_size 20, got {}", - commit_id_fsl.list_size() - ); - } - - // Get the elements as a primitive array of u8. - let commit_id_elements: PrimitiveArray = commit_id_fsl.elements().to_primitive(); - let commit_id_bytes: &[u8] = commit_id_elements.as_slice(); - - // Convert primitive fields. - let benchmark_group_prim: PrimitiveArray = benchmark_group_field.to_primitive(); - let benchmark_groups: &[u32] = benchmark_group_prim.as_slice(); - - let chart_name_prim: PrimitiveArray = chart_name_field.to_primitive(); - let chart_names: &[u32] = chart_name_prim.as_slice(); - - let series_name_prim: PrimitiveArray = series_name_field.to_primitive(); - let series_names: &[u32] = series_name_prim.as_slice(); - - let value_prim: PrimitiveArray = value_field.to_primitive(); - let values: &[u64] = value_prim.as_slice(); - - // Build the entries. - for i in 0..len { - // Extract the 20-byte commit_id for this row. - let start = i * 20; - let end = start + 20; - let mut commit_id_arr = [0u8; 20]; - commit_id_arr.copy_from_slice(&commit_id_bytes[start..end]); - - entries.push(BenchmarkEntry { - commit_id: CommitId(commit_id_arr), - benchmark_group: NameId(benchmark_groups[i]), - chart_name: NameId(chart_names[i]), - series_name: NameId(series_names[i]), - value: values[i], - }); - } - - Ok(entries) +/// This function downloads the Vortex file from S3 using HTTP (the bucket is public), parses the +/// columnar struct array, and converts it to a vector of row-wise [`BenchmarkEntry`] structs. +/// +/// # Arguments +/// +/// * `session` - The Vortex session for reading files. +/// * `key` - The S3 object key (e.g., "test/random_access.vortex"). +/// +/// # Errors +/// +/// Returns an error if: +/// - The HTTP request fails. +/// - The file is not a valid Vortex file. +/// - The schema does not match the expected [`BenchmarkEntry`] schema. +pub async fn read_benchmark_entries( + session: &VortexSession, + key: &str, +) -> VortexResult> { + let array = read_s3_array(session, key).await?; + BenchmarkEntry::vec_from_array(&array) } diff --git a/vortex-wasm/wasm-test.html b/vortex-wasm/wasm-test.html new file mode 100644 index 00000000000..a36f0b107be --- /dev/null +++ b/vortex-wasm/wasm-test.html @@ -0,0 +1,759 @@ + + + + + + Vortex WASM Random Access Benchmark + + + + +
+

Vortex WASM Random Access Benchmark

+

Loading benchmark data directly from a Vortex file using WebAssembly

+ +
+ + Initializing WASM module... +
+ + + + +
+ + + + From 059c685f2afc392d53fe685af2234b32fe03d2ff Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 3 Dec 2025 11:11:22 -0500 Subject: [PATCH 10/30] change to string names instead of IDs for simplicity Signed-off-by: Connor Tsui --- Cargo.lock | 1 - vortex-wasm/Cargo.toml | 1 - vortex-wasm/src/bin/migrate_random_access.rs | 54 +++++++++------ vortex-wasm/src/bin/test_s3_read.rs | 11 ++- vortex-wasm/src/lib.rs | 5 +- vortex-wasm/src/website/entry.rs | 71 ++++++++++---------- vortex-wasm/src/website/mod.rs | 1 - vortex-wasm/src/website/names.rs | 26 ------- 8 files changed, 76 insertions(+), 94 deletions(-) delete mode 100644 vortex-wasm/src/website/names.rs diff --git a/Cargo.lock b/Cargo.lock index 8a6d457289a..665d20cc1b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9645,7 +9645,6 @@ dependencies = [ "async-fs", "console_error_panic_hook", "hex", - "phf 0.13.1", "reqwest", "serde", "serde-wasm-bindgen", diff --git a/vortex-wasm/Cargo.toml b/vortex-wasm/Cargo.toml index 855fa842854..6ddeeadcc28 100644 --- a/vortex-wasm/Cargo.toml +++ b/vortex-wasm/Cargo.toml @@ -32,7 +32,6 @@ native = [ # Core dependencies for both WASM and native. console_error_panic_hook = "0.1" hex = { workspace = true } -phf = { workspace = true } reqwest = { workspace = true } serde = { workspace = true, features = ["derive"] } serde-wasm-bindgen = "0.6" diff --git a/vortex-wasm/src/bin/migrate_random_access.rs b/vortex-wasm/src/bin/migrate_random_access.rs index f1385a125ec..6ba435e6f71 100644 --- a/vortex-wasm/src/bin/migrate_random_access.rs +++ b/vortex-wasm/src/bin/migrate_random_access.rs @@ -17,14 +17,16 @@ use vortex::array::IntoArray; use vortex::array::arrays::FixedSizeListArray; use vortex::array::arrays::PrimitiveArray; use vortex::array::arrays::StructArray; +use vortex::array::arrays::VarBinArray; use vortex::array::validity::Validity; use vortex::buffer::Buffer; use vortex::compressor::CompactCompressor; +use vortex::dtype::DType; use vortex::dtype::FieldNames; +use vortex::dtype::Nullability; use vortex::file::WriteOptionsSessionExt; use vortex::file::WriteStrategyBuilder; use vortex::session::VortexSession; -use vortex_wasm::website::names; /// Represents a benchmark entry from the JSON file. #[derive(Debug, Deserialize)] @@ -37,12 +39,12 @@ struct JsonEntry { _extra: serde_json::Value, } -/// Maps the JSON `name` field to a series name ID. -fn series_name_id(name: &str) -> u32 { +/// Maps the JSON `name` field to the series name string. +fn series_name(name: &str) -> &'static str { match name { - "random-access/vortex-tokio-local-disk" => names::VORTEX_NVME, - "random-access/parquet-tokio-local-disk" => names::PARQUET_NVME, - "random-access/lance-tokio-local-disk" => names::LANCE_NVME, + "random-access/vortex-tokio-local-disk" => "vortex-nvme", + "random-access/parquet-tokio-local-disk" => "parquet-nvme", + "random-access/lance-tokio-local-disk" => "lance-nvme", _ => panic!("Unknown benchmark name: {}", name), } } @@ -74,9 +76,9 @@ async fn async_main() { // Extract fields into separate vectors. let mut commit_id_bytes: Vec = Vec::with_capacity(num_entries * 20); - let mut benchmark_groups: Vec = Vec::with_capacity(num_entries); - let mut chart_names: Vec = Vec::with_capacity(num_entries); - let mut series_names: Vec = Vec::with_capacity(num_entries); + let mut benchmark_groups: Vec<&str> = Vec::with_capacity(num_entries); + let mut chart_names: Vec<&str> = Vec::with_capacity(num_entries); + let mut series_names: Vec<&str> = Vec::with_capacity(num_entries); let mut values: Vec = Vec::with_capacity(num_entries); for entry in &entries { @@ -86,11 +88,11 @@ async fn async_main() { commit_id_bytes.extend_from_slice(&bytes); // All entries have the same benchmark_group and chart_name. - benchmark_groups.push(names::RANDOM_ACCESS); - chart_names.push(names::RANDOM_ACCESS); + benchmark_groups.push("random-access"); + chart_names.push("random-access"); - // Map name to series_name ID. - series_names.push(series_name_id(&entry.name)); + // Map name to series_name string. + series_names.push(series_name(&entry.name)); values.push(entry.value); } @@ -108,15 +110,23 @@ async fn async_main() { ) .expect("Failed to create commit_id array"); - // benchmark_group: u32 - let benchmark_group_array = - PrimitiveArray::new(Buffer::from(benchmark_groups), Validity::NonNullable); - - // chart_name: u32 - let chart_name_array = PrimitiveArray::new(Buffer::from(chart_names), Validity::NonNullable); - - // series_name: u32 - let series_name_array = PrimitiveArray::new(Buffer::from(series_names), Validity::NonNullable); + // benchmark_group: utf8 + let benchmark_group_array = VarBinArray::from_iter( + benchmark_groups.iter().map(|s| Some(*s)), + DType::Utf8(Nullability::NonNullable), + ); + + // chart_name: utf8 + let chart_name_array = VarBinArray::from_iter( + chart_names.iter().map(|s| Some(*s)), + DType::Utf8(Nullability::NonNullable), + ); + + // series_name: utf8 + let series_name_array = VarBinArray::from_iter( + series_names.iter().map(|s| Some(*s)), + DType::Utf8(Nullability::NonNullable), + ); // value: u64 let value_array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable); diff --git a/vortex-wasm/src/bin/test_s3_read.rs b/vortex-wasm/src/bin/test_s3_read.rs index b637cf6e0fd..64a44e3e592 100644 --- a/vortex-wasm/src/bin/test_s3_read.rs +++ b/vortex-wasm/src/bin/test_s3_read.rs @@ -7,7 +7,6 @@ use vortex::VortexSessionDefault; use vortex::session::VortexSession; -use vortex_wasm::website::names::NAMES; use vortex_wasm::website::read_s3::read_benchmark_entries; const KEY: &str = "test/random_access.vortex"; @@ -41,13 +40,13 @@ async fn async_main() { for (i, entry) in entries.iter().enumerate() { if i < show_first || i >= entries.len() - show_last { - let benchmark_group = NAMES.get(&entry.benchmark_group.0).unwrap_or(&"unknown"); - let chart_name = NAMES.get(&entry.chart_name.0).unwrap_or(&"unknown"); - let series_name = NAMES.get(&entry.series_name.0).unwrap_or(&"unknown"); - println!( "{} {:>15} {:>15} {:>15} {:>12}", - entry.commit_id, benchmark_group, chart_name, series_name, entry.value + entry.commit_id, + entry.benchmark_group, + entry.chart_name, + entry.series_name, + entry.value ); } else if i == show_first { println!( diff --git a/vortex-wasm/src/lib.rs b/vortex-wasm/src/lib.rs index efcb7d6ebd1..b486052839e 100644 --- a/vortex-wasm/src/lib.rs +++ b/vortex-wasm/src/lib.rs @@ -17,7 +17,6 @@ mod wasm_bindings { use vortex::session::VortexSession; use wasm_bindgen::prelude::*; - use crate::website::names::NAMES; use crate::website::read_s3::read_benchmark_entries; const KEY: &str = "test/random_access.vortex"; @@ -33,7 +32,7 @@ mod wasm_bindings { #[derive(Serialize)] pub struct JsEntry { pub commit_id: String, - pub series_name: &'static str, + pub series_name: String, pub value_ms: f64, } @@ -66,7 +65,7 @@ mod wasm_bindings { .iter() .map(|e| JsEntry { commit_id: e.commit_id.to_string(), - series_name: NAMES.get(&e.series_name.0).copied().unwrap_or("unknown"), + series_name: e.series_name.clone(), value_ms: e.value as f64 / 1_000_000.0, }) .collect(); diff --git a/vortex-wasm/src/website/entry.rs b/vortex-wasm/src/website/entry.rs index 6f814ec24b3..f48c73a3019 100644 --- a/vortex-wasm/src/website/entry.rs +++ b/vortex-wasm/src/website/entry.rs @@ -24,18 +24,18 @@ use vortex_error::vortex_bail; #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct BenchmarkEntry { pub commit_id: CommitId, - pub benchmark_group: NameId, - pub chart_name: NameId, - pub series_name: NameId, + pub benchmark_group: String, + pub chart_name: String, + pub series_name: String, pub value: u64, } impl BenchmarkEntry { pub fn new( commit_id: CommitId, - benchmark_group: NameId, - chart_name: NameId, - series_name: NameId, + benchmark_group: String, + chart_name: String, + series_name: String, value: u64, ) -> Self { Self { @@ -51,9 +51,9 @@ impl BenchmarkEntry { /// /// The schema is: /// - `commit_id`: `FixedSizeList` (20-byte binary SHA-1) - /// - `benchmark_group`: `u32` - /// - `chart_name`: `u32` - /// - `series_name`: `u32` + /// - `benchmark_group`: `Utf8` + /// - `chart_name`: `Utf8` + /// - `series_name`: `Utf8` /// - `value`: `u64` pub fn dtype() -> DType { DType::Struct( @@ -71,9 +71,9 @@ impl BenchmarkEntry { 20, NonNullable, ), - DType::Primitive(PType::U32, NonNullable), - DType::Primitive(PType::U32, NonNullable), - DType::Primitive(PType::U32, NonNullable), + DType::Utf8(NonNullable), + DType::Utf8(NonNullable), + DType::Utf8(NonNullable), DType::Primitive(PType::U64, NonNullable), ], ), @@ -98,9 +98,9 @@ impl BenchmarkEntry { BenchmarkEntry::dtype(), vec![ commit_id_scalar, - Scalar::primitive(self.benchmark_group.0, NonNullable), - Scalar::primitive(self.chart_name.0, NonNullable), - Scalar::primitive(self.series_name.0, NonNullable), + Scalar::utf8(self.benchmark_group.as_str(), NonNullable), + Scalar::utf8(self.chart_name.as_str(), NonNullable), + Scalar::utf8(self.series_name.as_str(), NonNullable), Scalar::primitive(self.value, NonNullable), ], ) @@ -110,9 +110,9 @@ impl BenchmarkEntry { /// /// The array must have the following schema: /// - `commit_id`: FixedSizeList - /// - `benchmark_group`: u32 - /// - `chart_name`: u32 - /// - `series_name`: u32 + /// - `benchmark_group`: Utf8 + /// - `chart_name`: Utf8 + /// - `series_name`: Utf8 /// - `value`: u64 pub fn vec_from_array(array: &dyn Array) -> VortexResult> { // Convert to canonical struct array. @@ -141,16 +141,12 @@ impl BenchmarkEntry { let commit_id_elements: PrimitiveArray = commit_id_fsl.elements().to_primitive(); let commit_id_bytes: &[u8] = commit_id_elements.as_slice(); - // Convert primitive fields. - let benchmark_group_prim: PrimitiveArray = benchmark_group_field.to_primitive(); - let benchmark_groups: &[u32] = benchmark_group_prim.as_slice(); - - let chart_name_prim: PrimitiveArray = chart_name_field.to_primitive(); - let chart_names: &[u32] = chart_name_prim.as_slice(); - - let series_name_prim: PrimitiveArray = series_name_field.to_primitive(); - let series_names: &[u32] = series_name_prim.as_slice(); + // Convert string fields to canonical varbinview arrays. + let benchmark_group_vbv = benchmark_group_field.to_varbinview(); + let chart_name_vbv = chart_name_field.to_varbinview(); + let series_name_vbv = series_name_field.to_varbinview(); + // Convert value field to primitive array. let value_prim: PrimitiveArray = value_field.to_primitive(); let values: &[u64] = value_prim.as_slice(); @@ -162,11 +158,22 @@ impl BenchmarkEntry { let mut commit_id_arr = [0u8; 20]; commit_id_arr.copy_from_slice(&commit_id_bytes[start..end]); + // Read strings using bytes_at() and convert to String. + let benchmark_group = std::str::from_utf8(benchmark_group_vbv.bytes_at(i).as_ref()) + .map_err(|e| vortex_error::vortex_err!("Invalid UTF-8 in benchmark_group: {}", e))? + .to_string(); + let chart_name = std::str::from_utf8(chart_name_vbv.bytes_at(i).as_ref()) + .map_err(|e| vortex_error::vortex_err!("Invalid UTF-8 in chart_name: {}", e))? + .to_string(); + let series_name = std::str::from_utf8(series_name_vbv.bytes_at(i).as_ref()) + .map_err(|e| vortex_error::vortex_err!("Invalid UTF-8 in series_name: {}", e))? + .to_string(); + entries.push(BenchmarkEntry { commit_id: CommitId(commit_id_arr), - benchmark_group: NameId(benchmark_groups[i]), - chart_name: NameId(chart_names[i]), - series_name: NameId(series_names[i]), + benchmark_group, + chart_name, + series_name, value: values[i], }); } @@ -175,10 +182,6 @@ impl BenchmarkEntry { } } -/// String ID lookup so that we don't have to store the string every time. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct NameId(pub u32); - /// The 20-byte binary SHA-1 Git commit ID. #[derive(Clone, PartialEq, Eq)] pub struct CommitId(pub [u8; 20]); diff --git a/vortex-wasm/src/website/mod.rs b/vortex-wasm/src/website/mod.rs index 49df715cfbc..7f55c85544b 100644 --- a/vortex-wasm/src/website/mod.rs +++ b/vortex-wasm/src/website/mod.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors pub mod entry; -pub mod names; pub mod read_s3; // update_s3 uses tokio and std::process::Command which are not available in WASM. diff --git a/vortex-wasm/src/website/names.rs b/vortex-wasm/src/website/names.rs deleted file mode 100644 index 9226f344985..00000000000 --- a/vortex-wasm/src/website/names.rs +++ /dev/null @@ -1,26 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -//! Name ID to string mapping for benchmark data. - -use phf::Map; -use phf::phf_map; - -// Name ID constants. -pub const NULL: u32 = 0; -pub const INVALID: u32 = 1; -pub const RANDOM_ACCESS: u32 = 2; -pub const VORTEX_NVME: u32 = 3; -pub const PARQUET_NVME: u32 = 4; -pub const LANCE_NVME: u32 = 5; - -// TODO(connor): This should probably be generated smarter. -/// Maps name IDs to their string representations. -pub static NAMES: Map = phf_map! { - 0u32 => "null", - 1u32 => "invalid", - 2u32 => "random-access", - 3u32 => "vortex-nvme", - 4u32 => "parquet-nvme", - 5u32 => "lance-nvme", -}; From d6d9c8e127ebc03a45ab224ea99224f2e7c92cb3 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 3 Dec 2025 10:50:20 -0500 Subject: [PATCH 11/30] clean up demo Signed-off-by: Connor Tsui --- vortex-wasm/wasm-test.css | 542 +++++++++++++++++++++++++ vortex-wasm/wasm-test.html | 789 +++---------------------------------- vortex-wasm/wasm-test.js | 746 +++++++++++++++++++++++++++++++++++ 3 files changed, 1339 insertions(+), 738 deletions(-) create mode 100644 vortex-wasm/wasm-test.css create mode 100644 vortex-wasm/wasm-test.js diff --git a/vortex-wasm/wasm-test.css b/vortex-wasm/wasm-test.css new file mode 100644 index 00000000000..fa4de05f18a --- /dev/null +++ b/vortex-wasm/wasm-test.css @@ -0,0 +1,542 @@ +/* CSS Variables for consistent theming */ +:root { + /* Vortex Brand Colors */ + --vortex-black: #101010; + --vortex-gray: #ECECEC; + --vortex-green: #CEE562; + --vortex-blue: #5971FD; + --vortex-pink: #EEB3E1; + + /* Series Colors */ + --series-vortex: #101010; + --series-parquet: #5DADE2; + --series-lance: #ef7f1d; + + /* Theme Colors */ + --primary-color: var(--vortex-blue); + --primary-hover: #4A5FE5; + --bg-color: #ffffff; + --bg-secondary: #FAFAFA; + --text-color: var(--vortex-black); + --text-secondary: #666666; + --border-color: var(--vortex-gray); + + /* Shadows */ + --shadow-sm: 0 1px 3px rgba(16, 16, 16, 0.08); + --shadow-md: 0 4px 8px rgba(16, 16, 16, 0.08); + --shadow-lg: 0 12px 24px rgba(16, 16, 16, 0.12); + + /* Border Radius */ + --radius-sm: 4px; + --radius-md: 8px; + --radius-lg: 12px; +} + +/* Reset and base styles */ +* { + box-sizing: border-box; + margin: 0; + padding: 0; +} + +html { + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "SF Pro Display", Roboto, sans-serif; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; + font-size: 16px; + scroll-behavior: smooth; +} + +body { + color: var(--text-color); + background: var(--bg-secondary); + min-height: 100vh; + padding: 2rem; + padding-bottom: 12rem; + line-height: 1.6; + letter-spacing: -0.01em; +} + +/* Container */ +.container { + max-width: 1400px; + margin: 0 auto; +} + +/* Page Header */ +.page-header { + margin-bottom: 2rem; +} + +.page-header h1 { + font-family: "Funnel Display", -apple-system, BlinkMacSystemFont, sans-serif; + font-size: 2rem; + font-weight: 600; + color: var(--text-color); + margin-bottom: 0.5rem; + letter-spacing: -0.02em; +} + +.subtitle { + color: var(--text-secondary); + font-size: 1rem; +} + +/* Status */ +.status { + background: var(--bg-color); + border-radius: var(--radius-md); + padding: 1rem 1.25rem; + margin-bottom: 1.5rem; + box-shadow: var(--shadow-sm); + display: flex; + align-items: center; + gap: 0.75rem; + border: 1px solid var(--border-color); +} + +.status.loading { + border-left: 4px solid var(--vortex-blue); +} + +.status.success { + border-left: 4px solid var(--series-vortex); +} + +.status.error { + border-left: 4px solid #e74c3c; +} + +.spinner { + display: inline-block; + width: 16px; + height: 16px; + border: 2px solid var(--border-color); + border-top-color: var(--vortex-blue); + border-radius: 50%; + animation: spin 1s linear infinite; +} + +@keyframes spin { + to { transform: rotate(360deg); } +} + +/* Benchmarks Container - full width sections */ +.benchmarks-grid { + display: flex; + flex-direction: column; + gap: 24px; +} + +/* Benchmark Set */ +.benchmark-set { + background: var(--bg-color); + border-radius: var(--radius-lg); + border: 1px solid var(--border-color); + overflow: visible; + transition: all 0.3s ease; + box-shadow: var(--shadow-sm); +} + +.benchmark-set:hover { + box-shadow: var(--shadow-md); +} + +/* Benchmark Header (clickable) */ +.benchmark-header { + display: flex; + align-items: center; + justify-content: space-between; + padding: 16px 24px; + background: var(--bg-secondary); + border-bottom: 1px solid var(--border-color); + cursor: pointer; + user-select: none; + transition: background-color 0.2s; +} + +.benchmark-header:hover { + background: #F0F0F0; +} + +.title-wrapper { + display: flex; + align-items: center; + gap: 12px; +} + +.collapse-icon { + font-size: 0.875rem; + transition: transform 0.3s ease; + color: var(--text-secondary); +} + +.benchmark-set.collapsed .collapse-icon { + transform: rotate(-90deg); +} + +.benchmark-title { + font-family: "Funnel Display", -apple-system, BlinkMacSystemFont, sans-serif; + font-size: 1.25rem; + font-weight: 600; + color: var(--text-color); + margin: 0; + letter-spacing: -0.02em; +} + +.benchmark-meta { + font-size: 0.75rem; + font-weight: 500; + color: var(--text-secondary); + text-transform: uppercase; + letter-spacing: 0.02em; +} + +/* Summary Section (always visible) */ +.summary-section { + padding: 16px 24px; + background: var(--bg-secondary); + border-bottom: 1px solid var(--border-color); +} + +/* Scores List - 2 column layout */ +.scores-list { + column-count: 2; + column-gap: 20px; +} + +/* Single column for smaller screens */ +@media (max-width: 780px) { + .scores-list { + column-count: 1; + } +} + +/* Score Item - card style */ +.score-item { + display: flex; + align-items: center; + background: var(--bg-color); + padding: 10px 12px; + margin-bottom: 8px; + border-radius: var(--radius-sm); + border: 1px solid var(--border-color); + transition: all 0.2s ease; + font-size: 13px; + break-inside: avoid; +} + +.score-item:hover { + border-color: var(--primary-color); + background: var(--bg-secondary); +} + +.score-rank { + font-weight: 600; + color: var(--primary-color); + min-width: 30px; + font-size: 14px; +} + +.score-series { + flex: 1; + font-weight: 500; + color: var(--text-color); + margin: 0 8px; + font-size: 14px; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} + +.score-metrics { + display: flex; + gap: 6px; + align-items: center; +} + +.score-runtime { + font-family: "SF Mono", "Monaco", "Inconsolata", monospace; + font-weight: 600; + color: var(--primary-color); + background: rgba(89, 113, 253, 0.1); + padding: 3px 10px; + border-radius: var(--radius-sm); + font-size: 13px; +} + +.score-ratio { + font-family: "SF Mono", "Monaco", "Inconsolata", monospace; + font-weight: 600; + color: var(--text-secondary); + background: var(--bg-secondary); + padding: 3px 10px; + border-radius: var(--radius-sm); + font-size: 13px; +} + +.scores-explanation { + margin-top: 12px; + font-size: 11px; + color: var(--text-secondary); + font-style: italic; + text-align: center; +} + +/* Benchmark Graphs (collapsible) */ +.benchmark-graphs { + padding: 24px; + background: var(--bg-color); +} + +.benchmark-set.collapsed .benchmark-graphs { + display: none; +} + +/* Chart Container */ +.chart-container { + background: var(--bg-color); + border: 1px solid var(--border-color); + border-radius: var(--radius-md); + padding: 20px; + transition: all 0.3s ease; +} + +.chart-container:hover { + box-shadow: var(--shadow-md); + border-color: var(--primary-color); +} + +.chart-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 1rem; +} + +.chart-title { + font-family: "Funnel Display", -apple-system, BlinkMacSystemFont, sans-serif; + font-size: 1rem; + font-weight: 500; + color: var(--text-color); + margin: 0; + letter-spacing: -0.01em; +} + +.chart-controls { + display: flex; + align-items: center; + gap: 12px; +} + +.control-info-compact { + color: var(--text-secondary); + font-size: 0.75rem; +} + +.zoom-controls { + display: flex; + gap: 4px; +} + +.zoom-btn { + width: 28px; + height: 28px; + border: 1px solid var(--border-color); + border-radius: var(--radius-sm); + background: var(--bg-color); + color: var(--text-color); + font-size: 16px; + font-weight: 600; + cursor: pointer; + display: flex; + align-items: center; + justify-content: center; + transition: all 0.2s; +} + +.zoom-btn:hover { + background: var(--bg-secondary); + border-color: var(--primary-color); +} + +.chart-wrapper { + position: relative; + height: 400px; +} + +.x-axis-label { + text-align: center; + color: var(--text-secondary); + font-size: 0.625rem; + text-transform: uppercase; + letter-spacing: 0.05em; + margin-top: 0.25rem; + margin-bottom: 0.25rem; +} + +/* Custom Tooltip */ +.chartjs-tooltip { + position: absolute; + pointer-events: none; + opacity: 0; + transition: opacity 0.2s ease; + background: rgba(16, 16, 16, 0.9); + color: white; + border-radius: var(--radius-md); + padding: 12px; + font-size: 12px; + z-index: 1000; + box-shadow: var(--shadow-lg); +} + +.chartjs-tooltip.active { + opacity: 1; +} + +.chartjs-tooltip::before { + content: ''; + position: absolute; + top: -8px; + left: 50%; + transform: translateX(-50%); + border-left: 8px solid transparent; + border-right: 8px solid transparent; + border-bottom: 8px solid rgba(16, 16, 16, 0.9); +} + +.chartjs-tooltip-body { + display: flex; + flex-direction: column; + gap: 6px; +} + +.chartjs-tooltip-item { + display: flex; + align-items: center; + gap: 8px; +} + +.chartjs-tooltip-color { + width: 12px; + height: 12px; + border-radius: 2px; +} + +.chartjs-tooltip-footer { + margin-top: 10px; + padding-top: 10px; + border-top: 1px solid rgba(255, 255, 255, 0.2); + font-size: 11px; + opacity: 0.9; +} + +/* Timeline Scrollbar */ +.timeline-scrollbar-container { + overflow-x: auto; + overflow-y: hidden; + height: 16px; + background: transparent; + border-radius: var(--radius-md); + cursor: pointer; + margin-top: 0.5rem; +} + +.timeline-scrollbar-container::-webkit-scrollbar { + height: 16px; +} + +.timeline-scrollbar-container::-webkit-scrollbar-track { + background: rgba(0, 0, 0, 0.05); + border-radius: var(--radius-md); +} + +.timeline-scrollbar-container::-webkit-scrollbar-thumb { + background: rgba(89, 113, 253, 0.3); + border-radius: var(--radius-md); + border: 3px solid transparent; + background-clip: padding-box; +} + +.timeline-scrollbar-container::-webkit-scrollbar-thumb:hover { + background: rgba(89, 113, 253, 0.5); + background-clip: padding-box; +} + +#timeline-scrollbar-content { + height: 1px; + pointer-events: none; +} + +/* Utility Classes */ +.hidden { + display: none !important; +} + +/* Responsive */ +@media (max-width: 768px) { + body { + padding: 1rem; + } + + .page-header h1 { + font-size: 1.5rem; + } + + .benchmark-header { + padding: 12px 16px; + } + + .benchmark-title { + font-size: 1.125rem; + } + + .summary-section { + padding: 12px 16px; + } + + .score-item { + padding: 8px 10px; + margin-bottom: 6px; + font-size: 12px; + } + + .score-rank { + font-size: 13px; + min-width: 28px; + } + + .score-series { + font-size: 13px; + margin: 0 6px; + } + + .score-runtime, + .score-ratio { + font-size: 12px; + padding: 2px 8px; + } + + .scores-explanation { + font-size: 10px; + margin-top: 8px; + } + + .benchmark-graphs { + padding: 16px; + } + + .chart-wrapper { + height: 300px; + } +} + +@media (min-width: 1600px) { + .container { + max-width: 1600px; + } + + body { + padding: 3rem 4rem; + } +} diff --git a/vortex-wasm/wasm-test.html b/vortex-wasm/wasm-test.html index a36f0b107be..e2da7e6df32 100644 --- a/vortex-wasm/wasm-test.html +++ b/vortex-wasm/wasm-test.html @@ -3,757 +3,70 @@ - Vortex WASM Random Access Benchmark + Vortex WASM Benchmarks + + + + -
-

Vortex WASM Random Access Benchmark

-

Loading benchmark data directly from a Vortex file using WebAssembly

+
Initializing WASM module...
- - -