From fbb0b39a29a4b3486adee2aacb38bdc33ac0efd8 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 4 May 2026 22:14:28 +0000 Subject: [PATCH] Benchmarks Website v3 Rewrites the benchmarks website. Replaces the static `data.json.gz` model with a single Rust server binary that owns a DuckDB database and accepts `POST /api/ingest` from CI. Design: - Single binary: axum + maud (SSR HTML) + DuckDB + Chart.js. All static assets `include_bytes!`'d. - 5 fact tables (compression time, query measurement, vector search, RAG, random access). Backup is a file copy. - Ingest: versioned JSON envelopes, bearer-token gated. - Migrator ports v2 history forward via a classifier that routes each record to a fact table or skips it with a typed reason. - Charts/groups slug-addressed, URL round-trip with no DB lookup. - Routes: `/`, `/chart/{slug}`, `/group/{slug}`, `GET /api/chart/{slug}`. - Deploy: one binary, one DuckDB file, one `INGEST_BEARER_TOKEN`. Signed-off-by: Claude --- .github/workflows/ci.yml | 3 +- .github/workflows/publish-bench-server.yml | 46 + .gitignore | 3 + Cargo.lock | 487 +++-- Cargo.toml | 3 + REUSE.toml | 2 +- _typos.toml | 2 +- benchmarks-website/docker-compose.yml | 14 + benchmarks-website/ec2-init.txt | 55 +- benchmarks-website/migrate/Cargo.toml | 41 + benchmarks-website/migrate/build.rs | 8 + benchmarks-website/migrate/src/classifier.rs | 853 ++++++++ benchmarks-website/migrate/src/commits.rs | 95 + benchmarks-website/migrate/src/lib.rs | 27 + benchmarks-website/migrate/src/main.rs | 114 + .../migrate/src/migrate/accum.rs | 357 ++++ benchmarks-website/migrate/src/migrate/mod.rs | 637 ++++++ benchmarks-website/migrate/src/source.rs | 140 ++ benchmarks-website/migrate/src/v2.rs | 165 ++ benchmarks-website/migrate/src/verify.rs | 352 +++ .../migrate/tests/classifier.rs | 531 +++++ .../migrate/tests/end_to_end.rs | 450 ++++ benchmarks-website/planning/00-overview.md | 104 + benchmarks-website/planning/01-schema.md | 228 ++ benchmarks-website/planning/02-contracts.md | 227 ++ benchmarks-website/planning/AGENTS.md | 172 ++ benchmarks-website/planning/README.md | 188 ++ .../planning/benchmark-mapping.md | 147 ++ .../planning/components/emitter.md | 86 + .../planning/components/server.md | 70 + .../planning/components/web-ui.md | 62 + benchmarks-website/planning/decisions.md | 95 + benchmarks-website/planning/deferred.md | 118 ++ benchmarks-website/server/Cargo.toml | 48 + benchmarks-website/server/Dockerfile | 46 + benchmarks-website/server/build.rs | 8 + .../server/fixtures/envelope.json | 71 + benchmarks-website/server/scripts/backup.sh | 46 + benchmarks-website/server/src/api/charts.rs | 614 ++++++ .../server/src/api/descriptions.rs | 214 ++ benchmarks-website/server/src/api/dto.rs | 357 ++++ benchmarks-website/server/src/api/filter.rs | 50 + benchmarks-website/server/src/api/groups.rs | 353 +++ benchmarks-website/server/src/api/mod.rs | 142 ++ benchmarks-website/server/src/api/summary.rs | 377 ++++ benchmarks-website/server/src/api/window.rs | 200 ++ benchmarks-website/server/src/app.rs | 76 + benchmarks-website/server/src/auth.rs | 43 + benchmarks-website/server/src/db.rs | 161 ++ benchmarks-website/server/src/error.rs | 117 + benchmarks-website/server/src/html/chart.rs | 89 + benchmarks-website/server/src/html/filter.rs | 96 + benchmarks-website/server/src/html/landing.rs | 154 ++ benchmarks-website/server/src/html/mod.rs | 409 ++++ benchmarks-website/server/src/html/render.rs | 236 +++ .../server/src/html/static_assets.rs | 67 + benchmarks-website/server/src/html/summary.rs | 150 ++ benchmarks-website/server/src/html/toolbar.rs | 58 + benchmarks-website/server/src/ingest.rs | 393 ++++ benchmarks-website/server/src/lib.rs | 19 + benchmarks-website/server/src/main.rs | 42 + benchmarks-website/server/src/records.rs | 173 ++ benchmarks-website/server/src/schema.rs | 93 + benchmarks-website/server/src/slug.rs | 237 +++ .../static/CHARTJS_PLUGIN_ZOOM_LICENSE.md | 9 + .../server/static/CHART_JS_LICENSE.md | 9 + .../server/static/chart-init.js | 1885 +++++++++++++++++ benchmarks-website/server/static/chart.umd.js | 14 + .../static/chartjs-plugin-zoom.umd.min.js | 7 + benchmarks-website/server/static/style.css | 1072 ++++++++++ benchmarks-website/server/tests/chart_api.rs | 295 +++ benchmarks-website/server/tests/common/mod.rs | 477 +++++ benchmarks-website/server/tests/group_api.rs | 148 ++ benchmarks-website/server/tests/ingest.rs | 314 +++ benchmarks-website/server/tests/landing.rs | 504 +++++ benchmarks-website/server/tests/permalinks.rs | 120 ++ .../tests/snapshots/chart_page_query.snap | 5 + .../tests/snapshots/group_page_query.snap | 5 + .../server/tests/snapshots/landing_page.snap | 5 + .../snapshots/landing_page_filter_bar.snap | 5 + .../server/tests/static_assets.rs | 158 ++ benchmarks-website/server/tests/web_ui.rs | 502 +++++ benchmarks-website/src/styles/index.css | 6 +- ...ement_clickbench_no_memory@clickbench.snap | 1 - vortex-bench/src/v3.rs | 191 +- 85 files changed, 16483 insertions(+), 270 deletions(-) create mode 100644 .github/workflows/publish-bench-server.yml create mode 100644 benchmarks-website/migrate/Cargo.toml create mode 100644 benchmarks-website/migrate/build.rs create mode 100644 benchmarks-website/migrate/src/classifier.rs create mode 100644 benchmarks-website/migrate/src/commits.rs create mode 100644 benchmarks-website/migrate/src/lib.rs create mode 100644 benchmarks-website/migrate/src/main.rs create mode 100644 benchmarks-website/migrate/src/migrate/accum.rs create mode 100644 benchmarks-website/migrate/src/migrate/mod.rs create mode 100644 benchmarks-website/migrate/src/source.rs create mode 100644 benchmarks-website/migrate/src/v2.rs create mode 100644 benchmarks-website/migrate/src/verify.rs create mode 100644 benchmarks-website/migrate/tests/classifier.rs create mode 100644 benchmarks-website/migrate/tests/end_to_end.rs create mode 100644 benchmarks-website/planning/00-overview.md create mode 100644 benchmarks-website/planning/01-schema.md create mode 100644 benchmarks-website/planning/02-contracts.md create mode 100644 benchmarks-website/planning/AGENTS.md create mode 100644 benchmarks-website/planning/README.md create mode 100644 benchmarks-website/planning/benchmark-mapping.md create mode 100644 benchmarks-website/planning/components/emitter.md create mode 100644 benchmarks-website/planning/components/server.md create mode 100644 benchmarks-website/planning/components/web-ui.md create mode 100644 benchmarks-website/planning/decisions.md create mode 100644 benchmarks-website/planning/deferred.md create mode 100644 benchmarks-website/server/Cargo.toml create mode 100644 benchmarks-website/server/Dockerfile create mode 100644 benchmarks-website/server/build.rs create mode 100644 benchmarks-website/server/fixtures/envelope.json create mode 100755 benchmarks-website/server/scripts/backup.sh create mode 100644 benchmarks-website/server/src/api/charts.rs create mode 100644 benchmarks-website/server/src/api/descriptions.rs create mode 100644 benchmarks-website/server/src/api/dto.rs create mode 100644 benchmarks-website/server/src/api/filter.rs create mode 100644 benchmarks-website/server/src/api/groups.rs create mode 100644 benchmarks-website/server/src/api/mod.rs create mode 100644 benchmarks-website/server/src/api/summary.rs create mode 100644 benchmarks-website/server/src/api/window.rs create mode 100644 benchmarks-website/server/src/app.rs create mode 100644 benchmarks-website/server/src/auth.rs create mode 100644 benchmarks-website/server/src/db.rs create mode 100644 benchmarks-website/server/src/error.rs create mode 100644 benchmarks-website/server/src/html/chart.rs create mode 100644 benchmarks-website/server/src/html/filter.rs create mode 100644 benchmarks-website/server/src/html/landing.rs create mode 100644 benchmarks-website/server/src/html/mod.rs create mode 100644 benchmarks-website/server/src/html/render.rs create mode 100644 benchmarks-website/server/src/html/static_assets.rs create mode 100644 benchmarks-website/server/src/html/summary.rs create mode 100644 benchmarks-website/server/src/html/toolbar.rs create mode 100644 benchmarks-website/server/src/ingest.rs create mode 100644 benchmarks-website/server/src/lib.rs create mode 100644 benchmarks-website/server/src/main.rs create mode 100644 benchmarks-website/server/src/records.rs create mode 100644 benchmarks-website/server/src/schema.rs create mode 100644 benchmarks-website/server/src/slug.rs create mode 100644 benchmarks-website/server/static/CHARTJS_PLUGIN_ZOOM_LICENSE.md create mode 100644 benchmarks-website/server/static/CHART_JS_LICENSE.md create mode 100644 benchmarks-website/server/static/chart-init.js create mode 100644 benchmarks-website/server/static/chart.umd.js create mode 100644 benchmarks-website/server/static/chartjs-plugin-zoom.umd.min.js create mode 100644 benchmarks-website/server/static/style.css create mode 100644 benchmarks-website/server/tests/chart_api.rs create mode 100644 benchmarks-website/server/tests/common/mod.rs create mode 100644 benchmarks-website/server/tests/group_api.rs create mode 100644 benchmarks-website/server/tests/ingest.rs create mode 100644 benchmarks-website/server/tests/landing.rs create mode 100644 benchmarks-website/server/tests/permalinks.rs create mode 100644 benchmarks-website/server/tests/snapshots/chart_page_query.snap create mode 100644 benchmarks-website/server/tests/snapshots/group_page_query.snap create mode 100644 benchmarks-website/server/tests/snapshots/landing_page.snap create mode 100644 benchmarks-website/server/tests/snapshots/landing_page_filter_bar.snap create mode 100644 benchmarks-website/server/tests/static_assets.rs create mode 100644 benchmarks-website/server/tests/web_ui.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7ce22431213..7da0f8a015a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -358,7 +358,8 @@ jobs: if: matrix.os == 'windows-x64' run: | cargo nextest run --cargo-profile ci --locked --workspace --all-features --no-fail-fast ` - --exclude vortex-bench --exclude vortex-python --exclude vortex-duckdb ` + --exclude vortex-bench --exclude vortex-bench-server ` + --exclude vortex-python --exclude vortex-duckdb ` --exclude vortex-fuzz --exclude vortex-cuda --exclude vortex-nvcomp ` --exclude vortex-cub --exclude vortex-test-e2e-cuda --exclude duckdb-bench ` --exclude lance-bench --exclude datafusion-bench --exclude random-access-bench ` diff --git a/.github/workflows/publish-bench-server.yml b/.github/workflows/publish-bench-server.yml new file mode 100644 index 00000000000..22aad1135b8 --- /dev/null +++ b/.github/workflows/publish-bench-server.yml @@ -0,0 +1,46 @@ +name: Publish Bench Server + +on: + push: + branches: [develop] + paths: + - "benchmarks-website/server/**" + - "vortex-bench/**" + - "Cargo.lock" + - ".github/workflows/publish-bench-server.yml" + workflow_dispatch: { } + +jobs: + publish: + runs-on: ubuntu-latest + timeout-minutes: 30 + permissions: + contents: read + packages: write + id-token: write + steps: + - uses: actions/checkout@v6 + + - name: Log in to GHCR + uses: docker/login-action@v4 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v4 + + - name: Build and push + uses: docker/build-push-action@v7 + with: + context: . + file: ./benchmarks-website/server/Dockerfile + platforms: linux/arm64 + push: true + tags: | + ghcr.io/${{ github.repository }}/vortex-bench-server:latest + ghcr.io/${{ github.repository }}/vortex-bench-server:${{ github.sha }} diff --git a/.gitignore b/.gitignore index 7fa79fb2162..bcc8ef746ee 100644 --- a/.gitignore +++ b/.gitignore @@ -242,3 +242,6 @@ trace*.pb # pytest-benchmark output vortex-python/.benchmarks/ +# For local benchmarks website server and things like the WAL +**.duckdb* +.bench-env diff --git a/Cargo.lock b/Cargo.lock index c624e3ae1e9..2c71ecfc28c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -197,6 +197,9 @@ name = "arbitrary" version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] [[package]] name = "arc-swap" @@ -687,9 +690,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.41" +version = "0.4.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0f9ee0f6e02ffd7ad5816e9464499fba7b3effd01123b515c41d1697c43dad1" +checksum = "e79b3f8a79cccc2898f31920fc69f304859b3bd567490f75ebf51ae1c792a9ac" dependencies = [ "compression-codecs", "compression-core", @@ -900,6 +903,58 @@ dependencies = [ "fs_extra", ] +[[package]] +name = "axum" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31b698c5f9a010f6573133b09e0de5408834d0c82f8d7475a89fc1867a71cd90" +dependencies = [ + "axum-core", + "bytes", + "form_urlencoded", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "serde_core", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" +dependencies = [ + "bytes", + "futures-core", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "sync_wrapper", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "base16ct" version = "1.0.0" @@ -1025,9 +1080,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.8.4" +version = "1.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d2d5991425dfd0785aed03aedcf0b321d61975c9b5b3689c774a2610ae0b51e" +checksum = "0aa83c34e62843d924f905e0f5c866eb1dd6545fc4d719e803d9ba6030371fce" dependencies = [ "arrayref", "arrayvec", @@ -1314,12 +1369,6 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" -[[package]] -name = "cesu8" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" - [[package]] name = "cexpr" version = "0.6.0" @@ -1606,10 +1655,11 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.2.2" +version = "7.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" +checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" dependencies = [ + "crossterm 0.28.1", "unicode-segmentation", "unicode-width 0.2.2", ] @@ -1652,10 +1702,11 @@ dependencies = [ [[package]] name = "compression-codecs" -version = "0.4.37" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb7b51a7d9c967fc26773061ba86150f19c50c0d65c887cb1fbe295fd16619b7" +checksum = "ce2548391e9c1929c21bf6aa2680af86fe4c1b33e6cea9ac1cfeec0bd11218cf" dependencies = [ + "brotli", "bzip2", "compression-core", "flate2", @@ -1667,9 +1718,9 @@ dependencies = [ [[package]] name = "compression-core" -version = "0.4.31" +version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" +checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789" [[package]] name = "concurrent-queue" @@ -1910,6 +1961,19 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crossterm" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" +dependencies = [ + "bitflags", + "crossterm_winapi", + "parking_lot", + "rustix 0.38.44", + "winapi", +] + [[package]] name = "crossterm" version = "0.29.0" @@ -3550,6 +3614,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "derive_more" version = "2.1.1" @@ -3660,6 +3735,25 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab23e69df104e2fd85ee63a533a22d2132ef5975dc6b36f9f3e5a7305e4a8ed7" +[[package]] +name = "duckdb" +version = "1.10502.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fdc796383b176dd5a45353fbb5e64583c0ee4da12cb62c9e510b785324b2488" +dependencies = [ + "arrow 58.1.0", + "cast", + "comfy-table", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libduckdb-sys", + "num", + "num-integer", + "rust_decimal", + "strum 0.27.2", +] + [[package]] name = "duckdb-bench" version = "0.1.0" @@ -3861,6 +3955,18 @@ dependencies = [ "ext-trait", ] +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fast-float2" version = "0.2.3" @@ -4356,6 +4462,15 @@ dependencies = [ "foldhash 0.2.0", ] +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown 0.15.5", +] + [[package]] name = "heck" version = "0.5.0" @@ -4428,6 +4543,12 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + [[package]] name = "humansize" version = "2.1.3" @@ -4445,9 +4566,9 @@ checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "hybrid-array" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3944cf8cf766b40e2a1a333ee5e9b563f854d5fa49d6a8ca2764e97c6eddb214" +checksum = "08d46837a0ed51fe95bd3b05de33cd64a1ee88fc797477ca48446872504507c5" dependencies = [ "typenum", ] @@ -4466,6 +4587,7 @@ dependencies = [ "http", "http-body", "httparse", + "httpdate", "itoa", "pin-project-lite", "smallvec", @@ -4487,6 +4609,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", + "webpki-roots", ] [[package]] @@ -4654,9 +4777,9 @@ dependencies = [ [[package]] name = "idna_adapter" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" dependencies = [ "icu_normalizer", "icu_properties", @@ -4891,22 +5014,6 @@ dependencies = [ "jiff-tzdb", ] -[[package]] -name = "jni" -version = "0.21.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" -dependencies = [ - "cesu8", - "cfg-if", - "combine", - "jni-sys 0.3.1", - "log", - "thiserror 1.0.69", - "walkdir", - "windows-sys 0.45.0", -] - [[package]] name = "jni" version = "0.22.4" @@ -4917,7 +5024,7 @@ dependencies = [ "combine", "java-locator", "jni-macros", - "jni-sys 0.4.1", + "jni-sys", "libloading 0.8.9", "log", "simd_cesu8", @@ -4939,15 +5046,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "jni-sys" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41a652e1f9b6e0275df1f15b32661cf0d4b78d4d87ddec5e0c3c20f097433258" -dependencies = [ - "jni-sys 0.4.1", -] - [[package]] name = "jni-sys" version = "0.4.1" @@ -5634,9 +5732,26 @@ checksum = "b3a6a8c165077efc8f3a971534c50ea6a1a18b329ef4a66e897a7e3a1494565f" [[package]] name = "libc" -version = "0.2.185" +version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "libduckdb-sys" +version = "1.10502.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d7401630ae2abcff642f7156294289e50f2d222e061c026ad797b01bf20c215" +dependencies = [ + "cc", + "flate2", + "pkg-config", + "reqwest 0.12.28", + "serde", + "serde_json", + "tar", + "vcpkg", + "zip 6.0.0", +] [[package]] name = "libfuzzer-sys" @@ -5898,6 +6013,12 @@ dependencies = [ "regex-automata", ] +[[package]] +name = "matchit" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" + [[package]] name = "matrixmultiply" version = "0.3.10" @@ -5911,6 +6032,30 @@ dependencies = [ "thread-tree", ] +[[package]] +name = "maud" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8156733e27020ea5c684db5beac5d1d611e1272ab17901a49466294b84fc217e" +dependencies = [ + "axum-core", + "http", + "itoa", + "maud_macros", +] + +[[package]] +name = "maud_macros" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7261b00f3952f617899bc012e3dbd56e4f0110a038175929fa5d18e5a19913ca" +dependencies = [ + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn 2.0.117", +] + [[package]] name = "md-5" version = "0.10.6" @@ -6221,6 +6366,20 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -6256,6 +6415,28 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -7064,6 +7245,18 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "version_check", +] + [[package]] name = "prost" version = "0.12.6" @@ -7594,7 +7787,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "577c9b9f652b4c121fb25c6a391dd06406d3b092ba68827e6d2f09550edc54b3" dependencies = [ "cfg-if", - "crossterm", + "crossterm 0.29.0", "instability", "ratatui-core", ] @@ -7833,13 +8026,14 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams 0.4.2", "web-sys", + "webpki-roots", ] [[package]] name = "reqwest" -version = "0.13.2" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab3f43e3283ab1488b624b44b0e988d0acea0b3214e694730a055cb6b2efa801" +checksum = "62e0021ea2c22aed41653bc7e1419abb2c97e038ff2c33d0e1309e49a97deec0" dependencies = [ "base64", "bytes", @@ -7863,6 +8057,8 @@ dependencies = [ "rustls", "rustls-pki-types", "rustls-platform-verifier", + "serde", + "serde_json", "sync_wrapper", "tokio", "tokio-rustls", @@ -8056,9 +8252,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.38" +version = "0.23.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21" +checksum = "7c2c118cb077cca2822033836dfb1b975355dfb784b5e8da48f7b6c5db74e60e" dependencies = [ "aws-lc-rs", "once_cell", @@ -8083,9 +8279,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.14.0" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" dependencies = [ "web-time", "zeroize", @@ -8093,13 +8289,13 @@ dependencies = [ [[package]] name = "rustls-platform-verifier" -version = "0.6.2" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784" +checksum = "26d1e2536ce4f35f4846aa13bff16bd0ff40157cdb14cc056c7b14ba41233ba0" dependencies = [ "core-foundation 0.10.1", "core-foundation-sys", - "jni 0.21.1", + "jni", "log", "once_cell", "rustls", @@ -8338,6 +8534,17 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + [[package]] name = "serde_repr" version = "0.1.20" @@ -9246,7 +9453,7 @@ dependencies = [ "chrono", "num_cpus", "ping", - "reqwest 0.13.2", + "reqwest 0.13.3", "sysinfo", "test-with-derive", "uzers", @@ -9267,7 +9474,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "reqwest 0.13.2", + "reqwest 0.13.3", "syn 2.0.117", "sysinfo", "uzers", @@ -9608,6 +9815,7 @@ dependencies = [ "tokio", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -9631,6 +9839,7 @@ dependencies = [ "tower", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -9966,6 +10175,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "vector-search-bench" version = "0.1.0" @@ -10165,7 +10380,7 @@ dependencies = [ "parquet 58.1.0", "rand 0.10.1", "regex", - "reqwest 0.13.2", + "reqwest 0.13.3", "serde", "serde_json", "sysinfo", @@ -10185,6 +10400,54 @@ dependencies = [ "vortex-tensor", ] +[[package]] +name = "vortex-bench-migrate" +version = "0.1.0-alpha.0" +dependencies = [ + "anyhow", + "arrow-array 58.1.0", + "arrow-buffer 58.1.0", + "arrow-schema 58.1.0", + "clap", + "duckdb", + "flate2", + "reqwest 0.13.3", + "rstest", + "serde", + "serde_json", + "tempfile", + "tokio", + "tracing", + "tracing-subscriber", + "vortex-bench-server", + "vortex-utils", +] + +[[package]] +name = "vortex-bench-server" +version = "0.1.0-alpha.0" +dependencies = [ + "anyhow", + "axum", + "base64", + "duckdb", + "flate2", + "insta", + "maud", + "reqwest 0.13.3", + "serde", + "serde_json", + "subtle", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tower", + "tower-http", + "tracing", + "tracing-subscriber", + "twox-hash", +] + [[package]] name = "vortex-btrblocks" version = "0.1.0" @@ -10261,7 +10524,7 @@ dependencies = [ "clap", "futures", "parquet 58.1.0", - "reqwest 0.13.2", + "reqwest 0.13.3", "serde", "serde_json", "sha2 0.11.0", @@ -10443,7 +10706,7 @@ dependencies = [ "object_store 0.13.2", "parking_lot", "paste", - "reqwest 0.13.2", + "reqwest 0.13.3", "rstest", "tempfile", "tracing", @@ -10453,7 +10716,7 @@ dependencies = [ "vortex-runend", "vortex-sequence", "vortex-utils", - "zip", + "zip 8.6.0", ] [[package]] @@ -10665,7 +10928,7 @@ dependencies = [ "arrow-array 58.1.0", "arrow-schema 58.1.0", "futures", - "jni 0.22.4", + "jni", "object_store 0.13.2", "parking_lot", "thiserror 2.0.18", @@ -10746,7 +11009,7 @@ dependencies = [ "bindgen", "libloading 0.8.9", "liblzma", - "reqwest 0.13.2", + "reqwest 0.13.3", "tar", "vortex-cuda-macros", ] @@ -10958,7 +11221,7 @@ dependencies = [ "arrow-schema 58.1.0", "clap", "console_error_panic_hook", - "crossterm", + "crossterm 0.29.0", "datafusion 53.1.0", "env_logger", "flatbuffers", @@ -11091,6 +11354,7 @@ dependencies = [ "cfg-if", "once_cell", "rustversion", + "serde", "wasm-bindgen-macro", "wasm-bindgen-shared", ] @@ -11226,6 +11490,15 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "webpki-roots" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "which" version = "8.0.2" @@ -11378,15 +11651,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-sys" -version = "0.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" -dependencies = [ - "windows-targets 0.42.2", -] - [[package]] name = "windows-sys" version = "0.52.0" @@ -11423,21 +11687,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-targets" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows-targets" version = "0.52.6" @@ -11480,12 +11729,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -11498,12 +11741,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" - [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -11516,12 +11753,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" - [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -11546,12 +11777,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" -[[package]] -name = "windows_i686_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -11564,12 +11789,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" - [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -11582,12 +11801,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -11600,12 +11813,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" - [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -11903,6 +12110,20 @@ dependencies = [ "num-traits", ] +[[package]] +name = "zip" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb2a05c7c36fde6c09b08576c9f7fb4cda705990f73b58fe011abf7dfb24168b" +dependencies = [ + "arbitrary", + "crc32fast", + "flate2", + "indexmap", + "memchr", + "zopfli", +] + [[package]] name = "zip" version = "8.6.0" diff --git a/Cargo.toml b/Cargo.toml index 35179561e14..129ef446e4c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,6 +60,9 @@ members = [ "benchmarks/duckdb-bench", "benchmarks/random-access-bench", "benchmarks/vector-search-bench", + # Benchmarks website v3 (alpha) - leaf binary, not part of vortex-* API + "benchmarks-website/server", + "benchmarks-website/migrate", ] exclude = ["java/testfiles", "wasm-test"] resolver = "2" diff --git a/REUSE.toml b/REUSE.toml index 161f6e3086a..8e406c95c90 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -36,7 +36,7 @@ SPDX-FileCopyrightText = "Copyright the Vortex contributors" SPDX-License-Identifier = "CC-BY-4.0" [[annotations]] -path = ["**/.gitignore", ".gitmodules", ".python-version", "**/*.lock", "**/*.lockfile", "**/*.toml", "**/*.json", ".idea/**", ".github/**", "codecov.yml", "java/gradle/wrapper/gradle-wrapper.properties"] +path = ["**/.gitignore", ".gitmodules", ".python-version", "**/*.lock", "**/*.lockfile", "**/*.toml", "**/*.json", ".idea/**", ".github/**", "codecov.yml", "java/gradle/wrapper/gradle-wrapper.properties", "**.duckdb*"] precedence = "override" SPDX-FileCopyrightText = "Copyright the Vortex contributors" SPDX-License-Identifier = "Apache-2.0" diff --git a/_typos.toml b/_typos.toml index 62c3b0d6358..e2a5ff330b8 100644 --- a/_typos.toml +++ b/_typos.toml @@ -8,7 +8,7 @@ extend-ignore-re = [ ] [files] -extend-exclude = ["/vortex-bench/**", "/docs/references.bib", "benchmarks/**", "vortex-sqllogictest/slt/**", "encodings/fsst/src/dfa/tests.rs", "encodings/fsst/src/dfa/flat_contains.rs"] +extend-exclude = ["/vortex-bench/**", "/docs/references.bib", "benchmarks/**", "vortex-sqllogictest/slt/**", "encodings/fsst/src/dfa/tests.rs", "encodings/fsst/src/dfa/flat_contains.rs", "benchmarks-website/server/static/**", "benchmarks-website/server/tests/snapshots/**"] [type.py] extend-ignore-identifiers-re = [ diff --git a/benchmarks-website/docker-compose.yml b/benchmarks-website/docker-compose.yml index 4c2e9682329..b97482a230a 100644 --- a/benchmarks-website/docker-compose.yml +++ b/benchmarks-website/docker-compose.yml @@ -5,6 +5,20 @@ services: - "80:3000" restart: unless-stopped + vortex-bench-server: + image: ghcr.io/vortex-data/vortex/vortex-bench-server:latest + ports: + - "3001:3000" + environment: + VORTEX_BENCH_DB: "/app/data/bench.duckdb" + VORTEX_BENCH_BIND: "0.0.0.0:3000" + VORTEX_BENCH_LOG: "info,vortex_bench_server=debug" + env_file: + - /etc/vortex-bench/secrets.env + volumes: + - /opt/benchmarks-website/data:/app/data + restart: unless-stopped + watchtower: image: containrrr/watchtower volumes: diff --git a/benchmarks-website/ec2-init.txt b/benchmarks-website/ec2-init.txt index 1c2459b3bee..4e1377cc014 100644 --- a/benchmarks-website/ec2-init.txt +++ b/benchmarks-website/ec2-init.txt @@ -14,4 +14,57 @@ sudo mkdir -p /opt/benchmarks-website sudo cp docker-compose.yml /opt/benchmarks-website/ cd /opt/benchmarks-website - docker compose up -d \ No newline at end of file + docker compose up -d + + ==================================================================== + v3 (vortex-bench-server) — additive setup, runs alongside v2 + ==================================================================== + + v2 stays on port 80 until DNS is flipped. v3 runs on port 3001 from + the same docker-compose.yml on this host. + + 4. Create the bearer-token env file (root:root, mode 600) + sudo mkdir -p /etc/vortex-bench + sudo install -m 600 -o root -g root /dev/null /etc/vortex-bench/secrets.env + # Edit and set INGEST_BEARER_TOKEN=: + sudo vi /etc/vortex-bench/secrets.env + # File contents: + # INGEST_BEARER_TOKEN= + + 5. Create the EBS-backed DuckDB data directory + # Assumes an EBS volume is already mounted at /opt/benchmarks-website/data. + sudo mkdir -p /opt/benchmarks-website/data + sudo chown root:root /opt/benchmarks-website/data + sudo chmod 755 /opt/benchmarks-website/data + + 6. Pull and start v3 (watchtower already polls ghcr.io for refreshes) + cd /opt/benchmarks-website + docker compose pull vortex-bench-server + docker compose up -d vortex-bench-server + # Smoke-check on the host: + curl -sf http://127.0.0.1:3001/health || echo "v3 not responding" + + 7. Install the daily DuckDB backup cron + # Copy the backup script from the repo checkout to a stable location. + sudo install -m 755 -o root -g root \ + benchmarks-website/server/scripts/backup.sh \ + /usr/local/bin/vortex-bench-backup.sh + # Cron entry: 06:00 UTC daily, after the nightly bench finishes. + sudo tee /etc/cron.d/vortex-bench-backup >/dev/null <<'CRON' + 0 6 * * * root /usr/local/bin/vortex-bench-backup.sh >> /var/log/vortex-bench-backup.log 2>&1 + CRON + sudo chmod 644 /etc/cron.d/vortex-bench-backup + # The instance IAM role already permits writes to + # s3://vortex-ci-benchmark-results/ (same role v2's cat-s3.sh uses). + + 8. Bearer-token rotation procedure + # When rotating INGEST_BEARER_TOKEN: + # a. Generate a new token (e.g. `openssl rand -hex 32`). + # b. Update the GitHub Actions Environment secret INGEST_BEARER_TOKEN + # so CI dual-writes use the new value. + # c. On this EC2 host, edit the env file and restart only the v3 + # container so v2 traffic on port 80 is unaffected: + # sudo vi /etc/vortex-bench/secrets.env + # cd /opt/benchmarks-website + # docker compose up -d --force-recreate vortex-bench-server + # d. Verify with `curl` against /health and a token-gated endpoint. \ No newline at end of file diff --git a/benchmarks-website/migrate/Cargo.toml b/benchmarks-website/migrate/Cargo.toml new file mode 100644 index 00000000000..45a752df397 --- /dev/null +++ b/benchmarks-website/migrate/Cargo.toml @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright the Vortex contributors + +[package] +name = "vortex-bench-migrate" +version = "0.1.0-alpha.0" +edition = "2024" +rust-version = "1.91.0" +license = "Apache-2.0" +description = "One-shot historical migrator from the v2 benchmarks S3 dataset to a v3 DuckDB file" +publish = false + +[[bin]] +name = "vortex-bench-migrate" +path = "src/main.rs" + +# Throwaway binary, not part of the vortex-* public API surface. +# Errors use anyhow, and the crate is intentionally outside the +# workspace public-api lockfile set. + +[dependencies] +anyhow = { workspace = true } +arrow-array = { workspace = true } +arrow-buffer = { workspace = true } +arrow-schema = { workspace = true } +clap = { workspace = true, features = ["derive"] } +# track vortex-duckdb's bundled engine version (build.rs) +duckdb = { version = "1.10502", features = ["bundled", "appender-arrow"] } +flate2 = "1.1" +reqwest = { workspace = true, features = ["json"] } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } +tracing = { workspace = true, features = ["std"] } +tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] } +vortex-bench-server = { path = "../server" } +vortex-utils = { workspace = true } + +[dev-dependencies] +rstest = { workspace = true } +tempfile = { workspace = true } diff --git a/benchmarks-website/migrate/build.rs b/benchmarks-website/migrate/build.rs new file mode 100644 index 00000000000..37bb34d013a --- /dev/null +++ b/benchmarks-website/migrate/build.rs @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +fn main() { + if std::env::var("CARGO_CFG_TARGET_OS").as_deref() == Ok("windows") { + println!("cargo:rustc-link-lib=dylib=rstrtmgr"); + } +} diff --git a/benchmarks-website/migrate/src/classifier.rs b/benchmarks-website/migrate/src/classifier.rs new file mode 100644 index 00000000000..8e1c1e2a110 --- /dev/null +++ b/benchmarks-website/migrate/src/classifier.rs @@ -0,0 +1,853 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Bug-for-bug port of v2's `getGroup`, `formatQuery`, and +//! `normalizeChartName` from `benchmarks-website/server.js`, plus the +//! mapping from v2 group + name pattern to a v3 fact-table bin. +//! +//! The v2 classifier was the source of truth for what historical +//! records mean. It groups records by name prefix into one of: +//! "Random Access", "Compression", "Compression Size", or one of the +//! SQL query suites (with optional fan-out by storage and scale +//! factor for TPC-H/TPC-DS). This module reproduces that logic and +//! then hops to a v3 fact-table bin, since v3 stores dim values as +//! columns instead of name fragments. +//! +//! Engine and format strings stored in v3 columns are pulled from the +//! raw, pre-rename v2 record name. v2's `ENGINE_RENAMES` was a v2 +//! read-time UI concern (e.g. `vortex-file-compressed` rendered as +//! `vortex` and `parquet-tokio-local-disk` rendered as `parquet-nvme`). +//! v3 stores canonical `Format::name()` strings to match what the v3 +//! live emitter writes, so historical and live records share series. + +use crate::v2::V2Record; +use crate::v2::dataset_scale_factor; + +/// Static port of v2's `QUERY_SUITES`. +pub const QUERY_SUITES: &[QuerySuite] = &[ + QuerySuite { + prefix: "clickbench", + display_name: "Clickbench", + query_prefix: "CLICKBENCH", + dataset_key: None, + fan_out: false, + skip: false, + }, + QuerySuite { + prefix: "statpopgen", + display_name: "Statistical and Population Genetics", + query_prefix: "STATPOPGEN", + dataset_key: None, + fan_out: false, + skip: false, + }, + QuerySuite { + prefix: "polarsignals", + display_name: "PolarSignals Profiling", + query_prefix: "POLARSIGNALS", + dataset_key: None, + fan_out: false, + skip: false, + }, + QuerySuite { + prefix: "gharchive", + display_name: "GhArchive", + query_prefix: "GHARCHIVE", + dataset_key: None, + fan_out: false, + skip: false, + }, + QuerySuite { + prefix: "tpch", + display_name: "TPC-H", + query_prefix: "TPC-H", + dataset_key: Some("tpch"), + fan_out: true, + skip: false, + }, + QuerySuite { + prefix: "tpcds", + display_name: "TPC-DS", + query_prefix: "TPC-DS", + dataset_key: Some("tpcds"), + fan_out: true, + skip: false, + }, + QuerySuite { + prefix: "fineweb", + display_name: "Fineweb", + query_prefix: "FINEWEB", + dataset_key: None, + fan_out: false, + skip: false, + }, +]; + +/// Static port of v2's `ENGINE_RENAMES`. Applied to the "series" half +/// of a benchmark name (the part after the first `/`) before splitting +/// on `:` into engine/format. Order doesn't matter — keys are unique. +const ENGINE_RENAMES: &[(&str, &str)] = &[ + ("datafusion:vortex-file-compressed", "datafusion:vortex"), + ("datafusion:parquet", "datafusion:parquet"), + ("datafusion:arrow", "datafusion:in-memory-arrow"), + ("datafusion:lance", "datafusion:lance"), + ("datafusion:vortex-compact", "datafusion:vortex-compact"), + ("duckdb:vortex-file-compressed", "duckdb:vortex"), + ("duckdb:parquet", "duckdb:parquet"), + ("duckdb:duckdb", "duckdb:duckdb"), + ("duckdb:vortex-compact", "duckdb:vortex-compact"), + ("vortex-tokio-local-disk", "vortex-nvme"), + ("vortex-compact-tokio-local-disk", "vortex-compact-nvme"), + ("lance-tokio-local-disk", "lance-nvme"), + ("parquet-tokio-local-disk", "parquet-nvme"), + ("lance", "lance"), +]; + +/// One entry of `QUERY_SUITES`. +#[derive(Debug, Clone, Copy)] +pub struct QuerySuite { + pub prefix: &'static str, + pub display_name: &'static str, + pub query_prefix: &'static str, + pub dataset_key: Option<&'static str>, + pub fan_out: bool, + pub skip: bool, +} + +/// Group a v2 record falls into. Mirrors `getGroup` in `server.js`, +/// including the fan-out group naming for TPC-H/TPC-DS. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum V2Group { + RandomAccess, + Compression, + CompressionSize, + Query { + suite_index: usize, + /// `Some` for fan-out suites only. + storage: Option, + /// `Some` for fan-out suites only. + scale_factor: Option, + }, +} + +impl V2Group { + /// Display name as v2 served it from `/api/metadata`. + pub fn display_name(&self) -> String { + match self { + V2Group::RandomAccess => "Random Access".into(), + V2Group::Compression => "Compression".into(), + V2Group::CompressionSize => "Compression Size".into(), + V2Group::Query { + suite_index, + storage, + scale_factor, + } => { + let suite = &QUERY_SUITES[*suite_index]; + if let (Some(storage), Some(sf)) = (storage, scale_factor) { + format!("{} ({}) (SF={})", suite.display_name, storage, sf) + } else { + suite.display_name.to_string() + } + } + } + } +} + +/// Apply v2's `ENGINE_RENAMES`. Reproduces the JS `rename`: +/// `RENAMES[s.toLowerCase()] || RENAMES[s] || s`. +pub fn rename_engine(s: &str) -> String { + let lower = s.to_lowercase(); + for (k, v) in ENGINE_RENAMES { + if *k == lower { + return (*v).to_string(); + } + } + for (k, v) in ENGINE_RENAMES { + if *k == s { + return (*v).to_string(); + } + } + s.to_string() +} + +/// Faithful port of v2's `formatQuery`: maps `clickbench_q07` → +/// `"CLICKBENCH Q7"`. Returns the original (uppercased, +/// `-` and `_` replaced with spaces) when no suite matches. +pub fn format_query(q: &str) -> String { + let lower = q.to_lowercase(); + for suite in QUERY_SUITES { + if suite.skip { + continue; + } + let prefix = suite.prefix; + if let Some(rest) = lower.strip_prefix(prefix) + && let Some(idx) = parse_query_index(rest) + { + return format!("{} Q{}", suite.query_prefix, idx); + } + } + let mut out = q.to_uppercase(); + out = out.replace(['_', '-'], " "); + out +} + +/// Parse the `_q07` / ` q7` / `q42` tail used by `format_query`. +/// Returns the integer query index if the tail matches the v2 regex +/// `^[_ ]?q(\d+)`. +fn parse_query_index(rest: &str) -> Option { + let after_sep = rest + .strip_prefix('_') + .or_else(|| rest.strip_prefix(' ')) + .unwrap_or(rest); + let after_q = after_sep + .strip_prefix('q') + .or_else(|| after_sep.strip_prefix('Q'))?; + let digits: String = after_q.chars().take_while(|c| c.is_ascii_digit()).collect(); + if digits.is_empty() { + return None; + } + digits.parse().ok() +} + +/// Faithful port of v2's `normalizeChartName`. +pub fn normalize_chart_name(group: &V2Group, chart_name: &str) -> String { + if matches!(group, V2Group::CompressionSize) && chart_name == "VORTEX FILE COMPRESSED SIZE" { + return "VORTEX SIZE".into(); + } + chart_name.to_string() +} + +/// Port of v2's `getGroup`. Returns `None` for skipped suites +/// (e.g. `fineweb`) or names that match nothing. +pub fn get_group(record: &V2Record) -> Option { + let lower = record.name.to_lowercase(); + + if lower.starts_with("random-access/") || lower.starts_with("random access/") { + return Some(V2Group::RandomAccess); + } + + if lower.starts_with("vortex size/") + || lower.starts_with("vortex-file-compressed size/") + || lower.starts_with("parquet size/") + || lower.starts_with("parquet-zstd size/") + || lower.starts_with("lance size/") + || lower.contains(":raw size/") + || lower.contains(":parquet-zstd size/") + || lower.contains(":lance size/") + { + return Some(V2Group::CompressionSize); + } + + if lower.starts_with("compress time/") + || lower.starts_with("decompress time/") + || lower.starts_with("parquet_rs-zstd compress") + || lower.starts_with("parquet_rs-zstd decompress") + || lower.starts_with("lance compress") + || lower.starts_with("lance decompress") + || lower.starts_with("vortex:lance ratio") + || lower.starts_with("vortex:parquet-zstd ratio") + // Typo'd v2 emitter wrote `parquet-zst` (no `d`) for some + // ratio records; match both spellings so they classify as + // derived ratios instead of falling through to Unknown. + || lower.starts_with("vortex:parquet-zst ratio") + || lower.starts_with("vortex:raw ratio") + { + return Some(V2Group::Compression); + } + + for (i, suite) in QUERY_SUITES.iter().enumerate() { + let prefix_q = format!("{}_q", suite.prefix); + let prefix_slash = format!("{}/", suite.prefix); + if !lower.starts_with(&prefix_q) && !lower.starts_with(&prefix_slash) { + continue; + } + if suite.skip { + return None; + } + if !suite.fan_out { + return Some(V2Group::Query { + suite_index: i, + storage: None, + scale_factor: None, + }); + } + let storage = match record.storage.as_deref().map(str::to_uppercase).as_deref() { + Some("S3") => "S3", + _ => "NVMe", + }; + let dataset_key = suite.dataset_key.unwrap_or(suite.prefix); + let raw_sf = record + .dataset + .as_ref() + .and_then(|d| dataset_scale_factor(d, dataset_key)); + let sf = raw_sf + .as_deref() + .and_then(|s| s.parse::().ok()) + .map(|f| f.round() as i64) + .unwrap_or(1); + return Some(V2Group::Query { + suite_index: i, + storage: Some(storage.into()), + scale_factor: Some(sf.to_string()), + }); + } + + None +} + +/// Group + chart + series breakdown for a v2 record, using the same +/// rules `server.js` applies in `refresh()`. Equivalent to v2's +/// `(group, chartName, seriesName)` triple after rename / skip rules. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct V2Classification { + pub group: V2Group, + pub chart: String, + pub series: String, +} + +/// Apply the same chart / series naming v2's `refresh()` does, plus +/// the throughput / `PARQUET-UNC` skip rules. +pub fn classify_v2(record: &V2Record) -> Option { + if record.name.contains(" throughput") { + return None; + } + let group = get_group(record)?; + let parts: Vec<&str> = record.name.split('/').collect(); + let (chart, series) = match (&group, parts.len()) { + (V2Group::RandomAccess, 4) => { + let chart = format!("{}/{}", parts[1], parts[2]) + .to_uppercase() + .replace(['_', '-'], " "); + let series = rename_engine(if parts[3].is_empty() { + "default" + } else { + parts[3] + }); + (chart, series) + } + (V2Group::RandomAccess, 2) => ( + "RANDOM ACCESS".to_string(), + rename_engine(if parts[1].is_empty() { + "default" + } else { + parts[1] + }), + ), + (V2Group::RandomAccess, _) => return None, + _ => { + let series_raw = if parts.len() >= 2 && !parts[1].is_empty() { + parts[1] + } else { + "default" + }; + let series = rename_engine(series_raw); + let chart = format_query(parts[0]); + (chart, series) + } + }; + let chart = normalize_chart_name(&group, &chart); + if chart.contains("PARQUET-UNC") { + return None; + } + Some(V2Classification { + group, + chart, + series, + }) +} + +/// Mapping target: which v3 fact table a v2 record lands in, plus the +/// dim values that table needs. +#[derive(Debug, Clone, PartialEq)] +pub enum V3Bin { + Query { + dataset: String, + dataset_variant: Option, + scale_factor: Option, + query_idx: i32, + storage: String, + engine: String, + format: String, + }, + CompressionTime { + dataset: String, + dataset_variant: Option, + format: String, + op: String, + }, + CompressionSize { + dataset: String, + dataset_variant: Option, + format: String, + }, + RandomAccess { + dataset: String, + format: String, + }, +} + +/// Top-level entry point. Combines `classify_v2` with the v3 fact-table +/// mapping. Returns `None` for records that: +/// +/// - Don't match any v2 group (uncategorized prefix). +/// - Are explicitly skipped by v2 (throughput, PARQUET-UNC, fineweb). +/// - Are computed-at-read-time ratios that v3 derives from +/// `compression_sizes` (`vortex:parquet-zstd ratio …`, +/// `vortex:lance ratio …`, `vortex:raw ratio …`, +/// `vortex:* size/…`). +pub fn classify(record: &V2Record) -> Option { + let cls = classify_v2(record)?; + match &cls.group { + V2Group::RandomAccess => bin_random_access(record), + V2Group::Compression => bin_compression_time(&cls, record), + V2Group::CompressionSize => bin_compression_size(&cls, record), + V2Group::Query { .. } => bin_query(&cls, record), + } +} + +/// Reason the classifier dropped a record. Intentional skips (v2 +/// patterns v3 deliberately doesn't store) are NOT errors; they don't +/// count against the uncategorized gate. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Skip { + /// `vortex:* ratio …` and `vortex:* size` — derived in v3 from + /// `compression_sizes` joined to itself. + DerivedRatio, + /// `throughput` records — v2 derived these from latencies. + Throughput, + /// A v2 query suite marked `skip: true` in QUERY_SUITES. + SkippedSuite, + /// random-access record with an unsupported part count. + UnsupportedShape, + /// Record had no `value` field. + NoValue, + /// Dim outside the v3 emitter's allowlist (e.g. `parquet-zstd`, + /// historical-only suites no longer in CI). + Deprecated, + /// v2 memory measurements (`*_memory/*` records). Carry top-level + /// `peak_physical_memory` / `peak_virtual_memory` / + /// `physical_memory_delta` / `virtual_memory_delta` fields that + /// `V2Record` doesn't deserialize. Not migrated for alpha; merging + /// into the corresponding QueryMeasurement row is future work. + HistoricalMemory, +} + +/// Engines the v3 emitter produces today. Mirrors +/// `vortex-bench/src/lib.rs::Engine`. Anything else is historical and gets +/// bucketed as `Skip::Deprecated`. +const V3_ENGINES: &[&str] = &["datafusion", "duckdb", "vortex", "arrow"]; + +/// Formats the v3 emitter produces today (`Format::name()` values from +/// `vortex-bench/src/lib.rs`). +const V3_FORMATS: &[&str] = &[ + "vortex-file-compressed", + "vortex-compact", + "parquet", + "lance", + "csv", + "arrow", + "duckdb", +]; + +/// Query suites the v3 CI runs today. Suites outside this list still +/// classify (so historical analyses stay coherent) but get bucketed +/// as `Skip::Deprecated` so they don't render as orphan charts in v3. +/// +/// `fineweb` is included because `.github/workflows/sql-benchmarks.yml` +/// still has `fineweb` and `fineweb-s3` matrix entries. `gharchive` +/// stays excluded — it's defined in `vortex-bench` but no current +/// workflow runs it. +const V3_QUERY_SUITES: &[&str] = &[ + "clickbench", + "tpch", + "tpcds", + "statpopgen", + "polarsignals", + "fineweb", +]; + +/// Returns true if every dim that v3 stores as a column is on the +/// emitter's current allowlist. Dim values outside the allowlist mean +/// historical-only formats / engines that the v3 UI has nothing to +/// render against. +fn is_v3_dim(bin: &V3Bin) -> bool { + match bin { + V3Bin::Query { engine, format, .. } => { + V3_ENGINES.contains(&engine.as_str()) && V3_FORMATS.contains(&format.as_str()) + } + V3Bin::CompressionTime { format, .. } + | V3Bin::CompressionSize { format, .. } + | V3Bin::RandomAccess { format, .. } => V3_FORMATS.contains(&format.as_str()), + } +} + +/// Outcome of running the classifier on a v2 record. Distinguishes +/// "we know we don't want this" (`Skip`) from "we don't recognize this" +/// (`Unknown`); the migrator's 5% gate fires only on the latter. +#[derive(Debug, Clone)] +pub enum Outcome { + Bin(V3Bin), + Skip(Skip), + Unknown, +} + +/// Like [`classify`], but reports *why* a record was dropped. Intended +/// for the migrator so the 5% uncategorized gate doesn't trip on +/// records v2 deliberately doesn't render (ratios, throughput, +/// skipped suites). +pub fn classify_outcome(record: &V2Record) -> Outcome { + if record.name.contains(" throughput") { + return Outcome::Skip(Skip::Throughput); + } + // v2 memory records: e.g. "clickbench_q07_memory/datafusion:parquet". + // Match the `_memory/` infix BEFORE the engine/format split, so they + // route to a known Skip variant instead of slipping through to + // Outcome::Unknown and tripping the 5% gate. + let lower = record.name.to_lowercase(); + if let Some((head, _)) = lower.split_once('/') + && head.ends_with("_memory") + { + return Outcome::Skip(Skip::HistoricalMemory); + } + let Some(group) = get_group(record) else { + return Outcome::Unknown; + }; + if let V2Group::Query { suite_index, .. } = &group + && QUERY_SUITES[*suite_index].skip + { + return Outcome::Skip(Skip::SkippedSuite); + } + let Some(cls) = classify_v2(record) else { + // get_group succeeded but classify_v2 didn't — shape mismatch. + return Outcome::Skip(Skip::UnsupportedShape); + }; + let derived = match &cls.group { + V2Group::Compression => { + let lc = cls.chart.to_lowercase(); + lc.contains("ratio") || lc.contains(':') + } + V2Group::CompressionSize => cls.chart.to_lowercase().contains(':'), + _ => false, + }; + if derived { + return Outcome::Skip(Skip::DerivedRatio); + } + let bin = match &cls.group { + V2Group::RandomAccess => match bin_random_access(record) { + Some(b) => Some(b), + // `bin_random_access` only returns None for malformed + // shapes (empty dataset/pattern segment, empty/`default` + // format). Route them to Skip so the `Outcome::Unknown` + // arm below — and the 5% uncategorized gate in + // `migrate::run` — don't trip on them. + None => return Outcome::Skip(Skip::UnsupportedShape), + }, + V2Group::Compression => bin_compression_time(&cls, record), + V2Group::CompressionSize => bin_compression_size(&cls, record), + V2Group::Query { .. } => bin_query(&cls, record), + }; + let Some(bin) = bin else { + return Outcome::Unknown; + }; + if !is_v3_dim(&bin) { + return Outcome::Skip(Skip::Deprecated); + } + if let V2Group::Query { suite_index, .. } = &group + && !V3_QUERY_SUITES.contains(&QUERY_SUITES[*suite_index].prefix) + { + return Outcome::Skip(Skip::Deprecated); + } + Outcome::Bin(bin) +} + +fn bin_random_access(record: &V2Record) -> Option { + // Pull dataset and format from the raw, pre-rename v2 name so v3 + // stores meaningful values. Two raw shapes are supported: + // + // - 4-part `random-access///-tokio-local-disk` + // - 2-part legacy `random-access/-tokio-local-disk` + // + // The 2-part shape is what `random-access-bench`'s `measurement_name` + // emits when called without an `AccessPattern`, and per its source + // comment that path is only taken for the legacy taxi run + // (`if dataset.name() == "taxi"` in `benchmarks/random-access-bench/ + // src/main.rs`). The live v3 emitter `random_access_record` writes + // `dataset="taxi"` for those same measurements, so the historical + // 2-part records are taxi too — assigning `dataset="taxi"` here + // recovers the time series instead of letting it disappear under + // v2's "RANDOM ACCESS" placeholder. Deriving from the raw name + // (rather than `cls.chart`) keeps this independent of v2's + // `normalizeChartName`. + // + // After stripping the `-tokio-local-disk` suffix, map the v2 + // random-access ext label (`vortex`, from `Format::ext()`) to the + // canonical name (`vortex-file-compressed`, from `Format::name()`). + // `parquet` and `lance` match between ext and name. The `vortex` + // ext is shared by both `OnDiskVortex` (name + // `vortex-file-compressed`) and `VortexCompact` (name + // `vortex-compact`), but v2's random-access bench only emitted + // `OnDiskVortex`, so mapping to `vortex-file-compressed` is + // correct for all historical data. + // + // Records whose `` segment ends in `-footer` (the bench's + // reopen-mode variant, e.g. `parquet-tokio-local-disk-footer`) + // intentionally do not strip clean to a v3-allowlisted format; the + // outer `is_v3_dim` filter then routes them to `Skip::Deprecated`. + // The live v3 emitter doesn't distinguish reopen vs cached either + // (`random_access_record` uses `format.name()` for both), so + // dropping `-footer` here keeps migration consistent with what + // v3 ingests live. + let parts: Vec<&str> = record.name.split('/').collect(); + let (dataset, raw_format) = match parts.as_slice() { + [_, ds, pat, format] => { + if ds.is_empty() || pat.is_empty() { + return None; + } + (format!("{ds}/{pat}").to_lowercase(), *format) + } + [_, format] => ("taxi".to_string(), *format), + _ => return None, + }; + if raw_format.is_empty() || raw_format == "default" { + return None; + } + let stripped = raw_format + .strip_suffix("-tokio-local-disk") + .unwrap_or(raw_format); + let format = match stripped { + "vortex" => "vortex-file-compressed".to_string(), + other => other.to_lowercase(), + }; + Some(V3Bin::RandomAccess { dataset, format }) +} + +fn bin_compression_time(cls: &V2Classification, _record: &V2Record) -> Option { + // v2 compression chart names look like (after format_query): + // "COMPRESS TIME" [vortex/encode] + // "DECOMPRESS TIME" [vortex/decode] + // "PARQUET RS ZSTD COMPRESS TIME" [parquet/encode] + // "PARQUET RS ZSTD DECOMPRESS TIME" [parquet/decode] + // "LANCE COMPRESS TIME" [lance/encode] + // "LANCE DECOMPRESS TIME" [lance/decode] + // "VORTEX:LANCE RATIO COMPRESS TIME" [drop] + // "VORTEX:PARQUET-ZSTD RATIO COMPRESS TIME" [drop] + // "VORTEX:RAW RATIO COMPRESS TIME" [drop] + let lc = cls.chart.to_lowercase(); + if lc.contains("ratio") || lc.contains(':') { + // Ratios are computed at read time from compression_sizes. + return None; + } + let (format, op) = if lc.starts_with("compress time") { + ("vortex-file-compressed", "encode") + } else if lc.starts_with("decompress time") { + ("vortex-file-compressed", "decode") + } else if lc.starts_with("parquet rs zstd compress time") { + ("parquet", "encode") + } else if lc.starts_with("parquet rs zstd decompress time") { + ("parquet", "decode") + } else if lc.starts_with("lance compress time") { + ("lance", "encode") + } else if lc.starts_with("lance decompress time") { + ("lance", "decode") + } else { + return None; + }; + let dataset = cls.series.to_lowercase(); + if dataset.is_empty() || dataset == "default" { + return None; + } + Some(V3Bin::CompressionTime { + dataset, + dataset_variant: None, + format: format.to_string(), + op: op.to_string(), + }) +} + +fn bin_compression_size(cls: &V2Classification, record: &V2Record) -> Option { + let lc = cls.chart.to_lowercase(); + // Ratios like "VORTEX:PARQUET ZSTD SIZE" / "VORTEX:LANCE SIZE" / + // "VORTEX:RAW SIZE" are derived from compression_sizes at read + // time, not stored. + if lc.contains(':') { + return None; + } + // `parquet-zstd size` shares a leading "parquet" with `parquet size`, + // so check the more specific prefix first. `format_query` upper-cases + // and replaces `-`/`_` with spaces, so the chart we match against is + // `"PARQUET ZSTD SIZE"` (no hyphen) — same convention as the existing + // `"parquet rs zstd compress time"` branches above. + let format = if lc.starts_with("vortex size") { + "vortex-file-compressed" + } else if lc.starts_with("parquet zstd size") { + "parquet-zstd" + } else if lc.starts_with("parquet size") { + "parquet" + } else if lc.starts_with("lance size") { + "lance" + } else { + return None; + }; + let dataset = cls.series.to_lowercase(); + if dataset.is_empty() || dataset == "default" { + return None; + } + // Mirror the file-sizes ingest path's dataset_variant derivation + // (see `migrate::migrate_file_sizes`): pull the SF out of the v2 + // record's `dataset` object when present and run it through + // `canonical_scale_factor` so `"1"`, `"1.0"`, `"10"` and `"10.0"` + // collapse to one canonical form. Without this both code paths + // produce the same `mid` only by accident, so SF=10 file-sizes + // rows wouldn't merge with the matching data.json.gz + // "vortex size/tpch" rows when one side wrote `"10"` and the + // other wrote `"10.0"`. + let dataset_variant = crate::v2::canonical_scale_factor( + record + .dataset + .as_ref() + .and_then(|d| crate::v2::dataset_scale_factor(d, dataset.as_str())) + .as_deref(), + ); + Some(V3Bin::CompressionSize { + dataset, + dataset_variant, + format: format.to_string(), + }) +} + +fn bin_query(cls: &V2Classification, record: &V2Record) -> Option { + let V2Group::Query { + suite_index, + storage, + scale_factor, + } = &cls.group + else { + return None; + }; + let suite = &QUERY_SUITES[*suite_index]; + + // Pull the query index from the *raw* name's first part instead of + // the formatted chart, so we don't have to round-trip "Q07". + let raw_first = record.name.split('/').next().unwrap_or(""); + let query_idx = parse_query_index_from_first(raw_first)?; + + // Pull engine:format from the raw, pre-rename second segment so v3 + // stores canonical `Format::name()` strings (e.g. + // `vortex-file-compressed`) that match what the v3 live emitter + // writes. `cls.series` has been through v2's `ENGINE_RENAMES` for + // UI display and is not appropriate for v3 columns. + // + // Older v2 records emitted display-case engines (e.g. `DataFusion`, + // `DuckDB`); newer ones emit lowercase. Lowercase here so dedup + // collapses both spellings into a single canonical row. + let raw_series = record.name.split('/').nth(1)?; + let (engine, format) = split_engine_format(raw_series)?; + let engine = engine.to_lowercase(); + let format = format.to_lowercase(); + + let storage_v3 = match storage.as_deref() { + Some("S3") => "s3".to_string(), + Some("NVMe") => "nvme".to_string(), + _ => "nvme".to_string(), + }; + + // ClickBench's "flavor" lives in dataset_variant per benchmark-mapping.md + // - we don't have it from a v2 name string, so we leave it None. + Some(V3Bin::Query { + dataset: suite.prefix.to_string(), + dataset_variant: None, + scale_factor: scale_factor.clone(), + query_idx, + storage: storage_v3, + engine, + format, + }) +} + +/// Pull the integer query index out of the leading name part, which is +/// always `_q` or ` q` for SQL query records. +fn parse_query_index_from_first(first: &str) -> Option { + let lower = first.to_lowercase(); + for suite in QUERY_SUITES { + if let Some(rest) = lower.strip_prefix(suite.prefix) + && let Some(idx) = parse_query_index(rest) + { + return Some(idx as i32); + } + } + None +} + +/// Split a renamed series like `datafusion:parquet` into +/// `(engine, format)`. Returns `None` for series with no `:` since +/// v3 requires both columns. +fn split_engine_format(series: &str) -> Option<(String, String)> { + let mut split = series.splitn(2, ':'); + let engine = split.next()?.trim().to_string(); + let format = split.next()?.trim().to_string(); + if engine.is_empty() || format.is_empty() { + return None; + } + Some((engine, format)) +} + +#[cfg(test)] +mod tests { + use anyhow::Context as _; + + use super::*; + + fn record(name: &str) -> V2Record { + V2Record { + name: name.to_string(), + commit_id: Some("deadbeef".into()), + unit: None, + value: None, + storage: None, + dataset: None, + all_runtimes: None, + env_triple: None, + } + } + + #[test] + fn format_query_round_trips() { + assert_eq!(format_query("clickbench_q07"), "CLICKBENCH Q7"); + assert_eq!(format_query("tpch_q01"), "TPC-H Q1"); + assert_eq!(format_query("tpcds_q42"), "TPC-DS Q42"); + assert_eq!(format_query("statpopgen_q3"), "STATPOPGEN Q3"); + assert_eq!(format_query("foo bar"), "FOO BAR"); + } + + #[test] + fn rename_engine_canonicalizes_disk_names() { + assert_eq!(rename_engine("vortex-tokio-local-disk"), "vortex-nvme"); + assert_eq!( + rename_engine("datafusion:vortex-file-compressed"), + "datafusion:vortex" + ); + assert_eq!(rename_engine("unknown-engine"), "unknown-engine"); + } + + #[test] + fn parse_query_index_handles_separators() { + assert_eq!(parse_query_index("_q07"), Some(7)); + assert_eq!(parse_query_index(" q7"), Some(7)); + assert_eq!(parse_query_index("q42"), Some(42)); + assert_eq!(parse_query_index("xq7"), None); + } + + #[test] + fn random_access_bins_dataset_pattern() -> anyhow::Result<()> { + let bin = classify(&record("random-access/taxi/take/parquet")) + .context("classify returned None for a known-good 4-part random-access name")?; + assert_eq!( + bin, + V3Bin::RandomAccess { + dataset: "taxi/take".into(), + format: "parquet".into(), + } + ); + Ok(()) + } +} diff --git a/benchmarks-website/migrate/src/commits.rs b/benchmarks-website/migrate/src/commits.rs new file mode 100644 index 00000000000..87c53caa41b --- /dev/null +++ b/benchmarks-website/migrate/src/commits.rs @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Commit upserts. Adapts a [`crate::v2::V2Commit`] into the v3 +//! `commits` row shape (a [`vortex_bench_server::records::CommitInfo`]). + +use anyhow::Context as _; +use anyhow::Result; +use duckdb::Transaction; +use duckdb::params; + +use crate::v2::V2Commit; + +/// Insert a v3 `commits` row for one v2 commit. `tree_sha` and `url` +/// remain required and use a warning-bearing empty-string fallback; +/// the human-input fields (message, author/committer name and email) +/// are nullable in the v3 schema, so empty / missing values map to +/// SQL `NULL` instead of an empty string the UI would render as a +/// blank cell. +pub fn upsert_commit(tx: &Transaction<'_>, commit: &V2Commit) -> Result { + let mut warnings = Vec::new(); + let timestamp = require_field(&commit.timestamp, "timestamp", &commit.id, &mut warnings); + let message = optional_field(&commit.message); + let author_name = optional_field(&commit.author.as_ref().and_then(|p| p.name.clone())); + let author_email = optional_field(&commit.author.as_ref().and_then(|p| p.email.clone())); + let committer_name = optional_field(&commit.committer.as_ref().and_then(|p| p.name.clone())); + let committer_email = optional_field(&commit.committer.as_ref().and_then(|p| p.email.clone())); + let tree_sha = require_field(&commit.tree_id, "tree_id", &commit.id, &mut warnings); + let url = require_field(&commit.url, "url", &commit.id, &mut warnings); + + tx.execute( + r#" + INSERT INTO commits ( + commit_sha, timestamp, message, author_name, author_email, + committer_name, committer_email, tree_sha, url + ) VALUES (?, CAST(? AS TIMESTAMPTZ), ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT (commit_sha) DO UPDATE SET + timestamp = excluded.timestamp, + message = excluded.message, + author_name = excluded.author_name, + author_email = excluded.author_email, + committer_name = excluded.committer_name, + committer_email = excluded.committer_email, + tree_sha = excluded.tree_sha, + url = excluded.url + "#, + params![ + commit.id, + timestamp, + message, + author_name, + author_email, + committer_name, + committer_email, + tree_sha, + url, + ], + ) + .with_context(|| format!("upserting commit {}", commit.id))?; + Ok(UpsertOutcome { warnings }) +} + +fn require_field( + field: &Option, + name: &str, + sha: &str, + warnings: &mut Vec, +) -> String { + match field { + Some(s) => s.clone(), + None => { + warnings.push(format!("commit {sha} missing {name}")); + String::new() + } + } +} + +/// Coerce a v2-supplied `Option` into a SQL-bindable +/// `Option`, treating an empty / whitespace-only value as +/// missing. v2 sometimes wrote `""` for blank author / committer / +/// message fields; storing those as actual `NULL` lets the UI +/// distinguish "missing metadata" from "deliberately blank". +fn optional_field(field: &Option) -> Option { + field + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string) +} + +/// Per-call warning bag returned to the caller for logging. +#[derive(Debug, Default)] +pub struct UpsertOutcome { + pub warnings: Vec, +} diff --git a/benchmarks-website/migrate/src/lib.rs b/benchmarks-website/migrate/src/lib.rs new file mode 100644 index 00000000000..b5aa72bc97d --- /dev/null +++ b/benchmarks-website/migrate/src/lib.rs @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! One-shot historical migrator from v2's S3-hosted benchmark dataset +//! to a v3 DuckDB file. +//! +//! The v2 dataset is JSONL of bare benchmark records keyed by name string. +//! v3 uses five typed fact tables with explicit dim columns. This crate +//! ports v2's `getGroup` classifier (in `benchmarks-website/server.js`) +//! bug-for-bug so that historical rows survive the migration with the +//! same group / chart / series structure as the live v2 server. +//! +//! The migrator is throwaway: once v3 cuts over, both the binary and +//! the classifier go away. + +/// Routing v2 records into v3 fact tables, ported from v2's `getGroup`. +pub mod classifier; +/// V2 commit -> v3 `commits` row upserts. +pub mod commits; +/// End-to-end migration of v2 dumps into a v3 DuckDB. +pub mod migrate; +/// Streaming readers for the v2 S3 bucket and local dumps. +pub mod source; +/// Wire shapes of the v2 benchmark dataset. +pub mod v2; +/// Structural diff between a migrated v3 DuckDB and v2's `/api/metadata`. +pub mod verify; diff --git a/benchmarks-website/migrate/src/main.rs b/benchmarks-website/migrate/src/main.rs new file mode 100644 index 00000000000..366834ed441 --- /dev/null +++ b/benchmarks-website/migrate/src/main.rs @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! `vortex-bench-migrate` CLI: a one-shot historical migrator from +//! v2's S3 dataset into a v3 DuckDB file, plus a structural diff +//! against the live v2 `/api/metadata` endpoint for spotting +//! classifier regressions. + +use std::path::PathBuf; +use std::process::ExitCode; + +use anyhow::Context as _; +use anyhow::Result; +use clap::Parser; +use clap::Subcommand; +use clap::ValueEnum; +use tracing_subscriber::EnvFilter; +use vortex_bench_migrate::migrate; +use vortex_bench_migrate::source::Source; +use vortex_bench_migrate::verify; + +/// One-shot historical migrator from v2's S3 dataset to v3 DuckDB. +#[derive(Debug, Parser)] +#[command(name = "vortex-bench-migrate", version, about)] +struct Cli { + #[command(subcommand)] + command: Command, +} + +#[derive(Debug, Subcommand)] +enum Command { + /// Read v2's data.json.gz / commits.json / file-sizes-*.json.gz + /// and write a fully populated v3 DuckDB at `--output`. + Run { + /// Path to write the v3 DuckDB to. Created if absent. + #[arg(long)] + output: PathBuf, + /// Where to fetch v2 dumps from. + #[arg(long, value_enum, default_value_t = SourceKind::PublicS3)] + source: SourceKind, + /// For `--source=local`, the directory containing + /// `data.json.gz`, `commits.json`, and `file-sizes-*.json.gz`. + #[arg(long, required_if_eq("source", "local"))] + source_dir: Option, + }, + /// Diff a migrated DuckDB against the live v2 `/api/metadata` + /// endpoint. Exits 0 if every v2 group is present in v3, 1 + /// otherwise so this can gate a CI step. + Verify { + /// HTTPS root of a running v2 server (e.g. `https://bench.vortex.dev`). + #[arg(long)] + against: String, + /// Path to the migrated v3 DuckDB. + #[arg(long)] + duckdb: PathBuf, + }, +} + +#[derive(Debug, Clone, Copy, ValueEnum)] +enum SourceKind { + PublicS3, + Local, +} + +fn main() -> ExitCode { + if let Err(err) = run() { + eprintln!("error: {err:#}"); + return ExitCode::from(2); + } + ExitCode::SUCCESS +} + +fn run() -> Result<()> { + tracing_subscriber::fmt() + .with_env_filter( + EnvFilter::try_from_env("VORTEX_BENCH_LOG").unwrap_or_else(|_| EnvFilter::new("info")), + ) + .init(); + + let cli = Cli::parse(); + match cli.command { + Command::Run { + output, + source, + source_dir, + } => { + let source = match source { + SourceKind::PublicS3 => Source::PublicS3, + SourceKind::Local => { + Source::Local(source_dir.context("--source=local requires --source-dir")?) + } + }; + let summary = migrate::run(&source, &output)?; + print!("{summary}"); + if summary.uncategorized_fraction() > 0.05 { + anyhow::bail!( + "uncategorized records ({:.2}%) exceed the 5% gate; \ + stop and report unmatched prefixes (see summary above) \ + before proceeding", + 100.0 * summary.uncategorized_fraction() + ); + } + Ok(()) + } + Command::Verify { against, duckdb } => { + let report = verify::run(&against, &duckdb)?; + print!("{report}"); + if !report.v2_groups_covered() { + std::process::exit(1); + } + Ok(()) + } + } +} diff --git a/benchmarks-website/migrate/src/migrate/accum.rs b/benchmarks-website/migrate/src/migrate/accum.rs new file mode 100644 index 00000000000..69f16fe4b93 --- /dev/null +++ b/benchmarks-website/migrate/src/migrate/accum.rs @@ -0,0 +1,357 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Per-fact-table row accumulators + their `RecordBatch` builders. +//! +//! Each `*Accum` collects classified records during the streaming JSONL +//! pass and then materialises one Arrow `RecordBatch` per fact table at +//! flush time. Three of the four use parallel column vectors with a +//! `seen` map keyed by `measurement_id`; `CompressionSizeAccum` is a +//! `HashMap` because it has two collision semantics +//! (replace from `data.json.gz`, sum from `file-sizes-*.json.gz`). + +use std::sync::Arc; + +use anyhow::Result; +use arrow_array::ArrayRef; +use arrow_array::Int32Array; +use arrow_array::Int64Array; +use arrow_array::ListArray; +use arrow_array::RecordBatch; +use arrow_array::StringArray; +use arrow_buffer::OffsetBuffer; +use arrow_schema::DataType; +use arrow_schema::Field; +use arrow_schema::Schema; +use vortex_bench_server::records::CompressionSize; +use vortex_bench_server::records::CompressionTime; +use vortex_bench_server::records::QueryMeasurement; +use vortex_bench_server::records::RandomAccessTime; +use vortex_utils::aliases::hash_map::HashMap; + +use super::MigrationSummary; + +/// `query_measurements` accumulator. Parallel column vectors plus a +/// `measurement_id`-keyed seen map; first-write wins on collision. +#[derive(Default)] +pub(super) struct QueryAccum { + pub(super) measurement_id: Vec, + pub(super) commit_sha: Vec, + pub(super) dataset: Vec, + pub(super) dataset_variant: Vec>, + pub(super) scale_factor: Vec>, + pub(super) query_idx: Vec, + pub(super) storage: Vec, + pub(super) engine: Vec, + pub(super) format: Vec, + pub(super) value_ns: Vec, + pub(super) all_runtimes_ns: Vec>, + pub(super) peak_physical: Vec>, + pub(super) peak_virtual: Vec>, + pub(super) physical_delta: Vec>, + pub(super) virtual_delta: Vec>, + pub(super) env_triple: Vec>, + /// `mid` -> index in the parallel column vecs. Lets us look up the + /// kept row's `value_ns` on collision so we can flag conflicts. + pub(super) seen: HashMap, +} + +impl QueryAccum { + pub(super) fn push(&mut self, mid: i64, r: QueryMeasurement, summary: &mut MigrationSummary) { + if let Some(&idx) = self.seen.get(&mid) { + summary.deduped += 1; + if self.value_ns[idx] != r.value_ns { + summary.deduped_with_conflict += 1; + } + return; + } + let idx = self.measurement_id.len(); + self.seen.insert(mid, idx); + self.measurement_id.push(mid); + self.commit_sha.push(r.commit_sha); + self.dataset.push(r.dataset); + self.dataset_variant.push(r.dataset_variant); + self.scale_factor.push(r.scale_factor); + self.query_idx.push(r.query_idx); + self.storage.push(r.storage); + self.engine.push(r.engine); + self.format.push(r.format); + self.value_ns.push(r.value_ns); + self.all_runtimes_ns.push(r.all_runtimes_ns); + self.peak_physical.push(r.peak_physical); + self.peak_virtual.push(r.peak_virtual); + self.physical_delta.push(r.physical_delta); + self.virtual_delta.push(r.virtual_delta); + self.env_triple.push(r.env_triple); + } +} + +/// `compression_times` accumulator. Same shape as [`QueryAccum`] minus the +/// query-only columns. +#[derive(Default)] +pub(super) struct CompressionTimeAccum { + pub(super) measurement_id: Vec, + pub(super) commit_sha: Vec, + pub(super) dataset: Vec, + pub(super) dataset_variant: Vec>, + pub(super) format: Vec, + pub(super) op: Vec, + pub(super) value_ns: Vec, + pub(super) all_runtimes_ns: Vec>, + pub(super) env_triple: Vec>, + pub(super) seen: HashMap, +} + +impl CompressionTimeAccum { + pub(super) fn push(&mut self, mid: i64, r: CompressionTime, summary: &mut MigrationSummary) { + if let Some(&idx) = self.seen.get(&mid) { + summary.deduped += 1; + if self.value_ns[idx] != r.value_ns { + summary.deduped_with_conflict += 1; + } + return; + } + let idx = self.measurement_id.len(); + self.seen.insert(mid, idx); + self.measurement_id.push(mid); + self.commit_sha.push(r.commit_sha); + self.dataset.push(r.dataset); + self.dataset_variant.push(r.dataset_variant); + self.format.push(r.format); + self.op.push(r.op); + self.value_ns.push(r.value_ns); + self.all_runtimes_ns.push(r.all_runtimes_ns); + self.env_triple.push(r.env_triple); + } +} + +/// `random_access_times` accumulator. Smallest of the three parallel-vec +/// accumulators. +#[derive(Default)] +pub(super) struct RandomAccessAccum { + pub(super) measurement_id: Vec, + pub(super) commit_sha: Vec, + pub(super) dataset: Vec, + pub(super) format: Vec, + pub(super) value_ns: Vec, + pub(super) all_runtimes_ns: Vec>, + pub(super) env_triple: Vec>, + pub(super) seen: HashMap, +} + +impl RandomAccessAccum { + pub(super) fn push(&mut self, mid: i64, r: RandomAccessTime, summary: &mut MigrationSummary) { + if let Some(&idx) = self.seen.get(&mid) { + summary.deduped += 1; + if self.value_ns[idx] != r.value_ns { + summary.deduped_with_conflict += 1; + } + return; + } + let idx = self.measurement_id.len(); + self.seen.insert(mid, idx); + self.measurement_id.push(mid); + self.commit_sha.push(r.commit_sha); + self.dataset.push(r.dataset); + self.format.push(r.format); + self.value_ns.push(r.value_ns); + self.all_runtimes_ns.push(r.all_runtimes_ns); + self.env_triple.push(r.env_triple); + } +} + +/// `compression_sizes` is fed by both `data.json.gz` (replace-on-collision) +/// and `file-sizes-*.json.gz` (sum-on-collision). Stored as a map; converted +/// to a `RecordBatch` at flush time. +#[derive(Default)] +pub(super) struct CompressionSizeAccum { + pub(super) rows: HashMap, +} + +impl CompressionSizeAccum { + /// data.json.gz path: latest write wins, mirroring the prior + /// `ON CONFLICT DO UPDATE SET value_bytes = excluded.value_bytes`. + /// Bumps `deduped_with_conflict` when an existing row's + /// `value_bytes` differs from the incoming row's, so silent + /// value-corruption is observable. + pub(super) fn push_replace( + &mut self, + mid: i64, + r: CompressionSize, + summary: &mut MigrationSummary, + ) { + if let Some(existing) = self.rows.get(&mid) + && existing.value_bytes != r.value_bytes + { + summary.deduped_with_conflict += 1; + } + self.rows.insert(mid, r); + } + + /// file-sizes-*.json.gz path: per-file rows aggregate into one + /// `(commit, dataset, dataset_variant, format)` row by summing, + /// mirroring the prior `value_bytes = compression_sizes.value_bytes + /// + excluded.value_bytes`. + pub(super) fn push_sum(&mut self, mid: i64, r: CompressionSize) { + let add = r.value_bytes; + self.rows + .entry(mid) + .and_modify(|x| x.value_bytes += add) + .or_insert(r); + } +} + +pub(super) fn build_query_batch(a: QueryAccum) -> Result { + let schema = Arc::new(Schema::new(vec![ + Field::new("measurement_id", DataType::Int64, false), + Field::new("commit_sha", DataType::Utf8, false), + Field::new("dataset", DataType::Utf8, false), + Field::new("dataset_variant", DataType::Utf8, true), + Field::new("scale_factor", DataType::Utf8, true), + Field::new("query_idx", DataType::Int32, false), + Field::new("storage", DataType::Utf8, false), + Field::new("engine", DataType::Utf8, false), + Field::new("format", DataType::Utf8, false), + Field::new("value_ns", DataType::Int64, false), + Field::new( + "all_runtimes_ns", + DataType::List(Arc::new(Field::new("item", DataType::Int64, false))), + false, + ), + Field::new("peak_physical", DataType::Int64, true), + Field::new("peak_virtual", DataType::Int64, true), + Field::new("physical_delta", DataType::Int64, true), + Field::new("virtual_delta", DataType::Int64, true), + Field::new("env_triple", DataType::Utf8, true), + ])); + let cols: Vec = vec![ + Arc::new(Int64Array::from(a.measurement_id)), + Arc::new(StringArray::from(a.commit_sha)), + Arc::new(StringArray::from(a.dataset)), + Arc::new(StringArray::from(a.dataset_variant)), + Arc::new(StringArray::from(a.scale_factor)), + Arc::new(Int32Array::from(a.query_idx)), + Arc::new(StringArray::from(a.storage)), + Arc::new(StringArray::from(a.engine)), + Arc::new(StringArray::from(a.format)), + Arc::new(Int64Array::from(a.value_ns)), + Arc::new(build_list_int64(a.all_runtimes_ns)), + Arc::new(Int64Array::from(a.peak_physical)), + Arc::new(Int64Array::from(a.peak_virtual)), + Arc::new(Int64Array::from(a.physical_delta)), + Arc::new(Int64Array::from(a.virtual_delta)), + Arc::new(StringArray::from(a.env_triple)), + ]; + Ok(RecordBatch::try_new(schema, cols)?) +} + +pub(super) fn build_compression_time_batch(a: CompressionTimeAccum) -> Result { + let schema = Arc::new(Schema::new(vec![ + Field::new("measurement_id", DataType::Int64, false), + Field::new("commit_sha", DataType::Utf8, false), + Field::new("dataset", DataType::Utf8, false), + Field::new("dataset_variant", DataType::Utf8, true), + Field::new("format", DataType::Utf8, false), + Field::new("op", DataType::Utf8, false), + Field::new("value_ns", DataType::Int64, false), + Field::new( + "all_runtimes_ns", + DataType::List(Arc::new(Field::new("item", DataType::Int64, false))), + false, + ), + Field::new("env_triple", DataType::Utf8, true), + ])); + let cols: Vec = vec![ + Arc::new(Int64Array::from(a.measurement_id)), + Arc::new(StringArray::from(a.commit_sha)), + Arc::new(StringArray::from(a.dataset)), + Arc::new(StringArray::from(a.dataset_variant)), + Arc::new(StringArray::from(a.format)), + Arc::new(StringArray::from(a.op)), + Arc::new(Int64Array::from(a.value_ns)), + Arc::new(build_list_int64(a.all_runtimes_ns)), + Arc::new(StringArray::from(a.env_triple)), + ]; + Ok(RecordBatch::try_new(schema, cols)?) +} + +pub(super) fn build_random_access_batch(a: RandomAccessAccum) -> Result { + let schema = Arc::new(Schema::new(vec![ + Field::new("measurement_id", DataType::Int64, false), + Field::new("commit_sha", DataType::Utf8, false), + Field::new("dataset", DataType::Utf8, false), + Field::new("format", DataType::Utf8, false), + Field::new("value_ns", DataType::Int64, false), + Field::new( + "all_runtimes_ns", + DataType::List(Arc::new(Field::new("item", DataType::Int64, false))), + false, + ), + Field::new("env_triple", DataType::Utf8, true), + ])); + let cols: Vec = vec![ + Arc::new(Int64Array::from(a.measurement_id)), + Arc::new(StringArray::from(a.commit_sha)), + Arc::new(StringArray::from(a.dataset)), + Arc::new(StringArray::from(a.format)), + Arc::new(Int64Array::from(a.value_ns)), + Arc::new(build_list_int64(a.all_runtimes_ns)), + Arc::new(StringArray::from(a.env_triple)), + ]; + Ok(RecordBatch::try_new(schema, cols)?) +} + +pub(super) fn build_compression_size_batch(a: CompressionSizeAccum) -> Result { + let n = a.rows.len(); + let mut measurement_id = Vec::with_capacity(n); + let mut commit_sha = Vec::with_capacity(n); + let mut dataset = Vec::with_capacity(n); + let mut dataset_variant = Vec::with_capacity(n); + let mut format = Vec::with_capacity(n); + let mut value_bytes = Vec::with_capacity(n); + for (mid, cs) in a.rows { + measurement_id.push(mid); + commit_sha.push(cs.commit_sha); + dataset.push(cs.dataset); + dataset_variant.push(cs.dataset_variant); + format.push(cs.format); + value_bytes.push(cs.value_bytes); + } + let schema = Arc::new(Schema::new(vec![ + Field::new("measurement_id", DataType::Int64, false), + Field::new("commit_sha", DataType::Utf8, false), + Field::new("dataset", DataType::Utf8, false), + Field::new("dataset_variant", DataType::Utf8, true), + Field::new("format", DataType::Utf8, false), + Field::new("value_bytes", DataType::Int64, false), + ])); + let cols: Vec = vec![ + Arc::new(Int64Array::from(measurement_id)), + Arc::new(StringArray::from(commit_sha)), + Arc::new(StringArray::from(dataset)), + Arc::new(StringArray::from(dataset_variant)), + Arc::new(StringArray::from(format)), + Arc::new(Int64Array::from(value_bytes)), + ]; + Ok(RecordBatch::try_new(schema, cols)?) +} + +/// Build a non-nullable `List` Arrow array from one inner Vec +/// per row. The outer list is non-null; inner i64 values are non-null. +fn build_list_int64(values: Vec>) -> ListArray { + let mut offsets: Vec = Vec::with_capacity(values.len() + 1); + offsets.push(0); + let mut flat: Vec = Vec::new(); + for inner in values { + flat.extend_from_slice(&inner); + offsets.push(flat.len() as i32); + } + let values_arr = Int64Array::from(flat); + let field = Arc::new(Field::new("item", DataType::Int64, false)); + ListArray::new( + field, + OffsetBuffer::new(offsets.into()), + Arc::new(values_arr), + None, + ) +} diff --git a/benchmarks-website/migrate/src/migrate/mod.rs b/benchmarks-website/migrate/src/migrate/mod.rs new file mode 100644 index 00000000000..e4156c884f3 --- /dev/null +++ b/benchmarks-website/migrate/src/migrate/mod.rs @@ -0,0 +1,637 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! End-to-end migration of one v2 dataset into a v3 DuckDB file. +//! +//! Streams `data.json.gz` line-by-line, runs each record through the +//! [`crate::classifier`], and writes one row per record into the appropriate +//! v3 fact table. Every row's `measurement_id` is computed via the server's +//! `measurement_id_*` functions so the result is byte-compatible with what +//! fresh `/api/ingest` would have produced. +//! +//! Bulk-load shape: rows are accumulated in memory as parallel column +//! vectors, deduplicated by `measurement_id`, then flushed to DuckDB +//! via `Appender::append_record_batch` as one Arrow `RecordBatch` per +//! fact table. + +mod accum; + +use std::collections::BTreeMap; +use std::io::BufRead; +use std::path::Path; +use std::time::Duration; +use std::time::Instant; + +use anyhow::Context as _; +use anyhow::Result; +use arrow_array::RecordBatch; +use duckdb::Connection; +use tracing::info; +use tracing::warn; +use vortex_bench_server::db::measurement_id_compression_size; +use vortex_bench_server::db::measurement_id_compression_time; +use vortex_bench_server::db::measurement_id_query; +use vortex_bench_server::db::measurement_id_random_access; +use vortex_bench_server::records::CompressionSize; +use vortex_bench_server::records::CompressionTime; +use vortex_bench_server::records::QueryMeasurement; +use vortex_bench_server::records::RandomAccessTime; +use vortex_bench_server::schema::SCHEMA_DDL; + +use self::accum::CompressionSizeAccum; +use self::accum::CompressionTimeAccum; +use self::accum::QueryAccum; +use self::accum::RandomAccessAccum; +use self::accum::build_compression_size_batch; +use self::accum::build_compression_time_batch; +use self::accum::build_query_batch; +use self::accum::build_random_access_batch; +use crate::classifier; +use crate::classifier::V3Bin; +use crate::commits::upsert_commit; +use crate::source::KNOWN_FILE_SIZES_SUITES; +use crate::source::Source; +use crate::v2::V2Commit; +use crate::v2::V2FileSize; +use crate::v2::V2Record; +use crate::v2::canonical_scale_factor; +use crate::v2::index_commits; +use crate::v2::runtime_as_i64; +use crate::v2::value_as_f64; + +/// Per-table insert counts, plus skip / missing counts. +#[derive(Debug, Default, Clone)] +pub struct MigrationSummary { + /// Lines read from `data.json.gz`. + pub records_read: u64, + /// Rows successfully inserted into `query_measurements`. + pub query_inserted: u64, + /// Rows successfully inserted into `compression_times`. + pub compression_time_inserted: u64, + /// Rows successfully inserted into `compression_sizes`. + pub compression_size_inserted: u64, + /// Rows successfully inserted into `random_access_times`. + pub random_access_inserted: u64, + /// `file-sizes-*.json.gz` lines folded into `compression_sizes`. + pub file_size_inserted: u64, + /// Records the classifier returned `Unknown` for. + pub uncategorized: u64, + /// Top-level prefix histogram of uncategorised records, for triage. + pub uncategorized_prefixes: BTreeMap, + /// Records whose `commit_id` doesn't match any commit in `commits.jsonl`. + pub missing_commit: u64, + /// Warnings emitted while upserting commits (e.g. missing tree SHA). + pub commit_warnings: u64, + /// Records dropped because their `value` was missing or non-numeric. + pub skipped_no_value: u64, + /// Records the classifier returned `Skip(reason)` for. + pub skipped_intentional: u64, + /// Commits upserted into the `commits` dim table. + pub commits_inserted: u64, + /// Records dropped by dedup because their `measurement_id` collided + /// with a previously kept row. + pub deduped: u64, + /// Number of records dropped by dedup whose `value_ns` (or + /// `value_bytes` for compression_sizes' replace path) differed + /// from the kept row's. Non-zero is a smell worth investigating. + pub deduped_with_conflict: u64, +} + +impl MigrationSummary { + /// Total `data.json.gz` records that landed in some v3 fact table. + pub fn total_inserted(&self) -> u64 { + self.query_inserted + + self.compression_time_inserted + + self.compression_size_inserted + + self.random_access_inserted + } + + /// Fraction of records that were uncategorized. The orchestrator + /// stops if this exceeds the documented 5% threshold. + pub fn uncategorized_fraction(&self) -> f64 { + if self.records_read == 0 { + return 0.0; + } + self.uncategorized as f64 / self.records_read as f64 + } +} + +/// Open or create a DuckDB at `path` and apply the v3 schema. The +/// migrator is a one-shot fresh load; the bulk-append flush is pure +/// insert (no `ON CONFLICT`), so any stale rows in `path` would clash +/// with the next run on the same primary keys. Delete both the +/// database file and its WAL companion up front so every run starts +/// from a known-empty state. +pub fn open_target_db(path: &Path) -> Result { + remove_if_exists(path)?; + let wal = wal_path(path); + remove_if_exists(&wal)?; + let conn = + Connection::open(path).with_context(|| format!("opening DuckDB at {}", path.display()))?; + conn.execute_batch(SCHEMA_DDL) + .context("applying v3 schema DDL")?; + Ok(conn) +} + +fn remove_if_exists(path: &Path) -> Result<()> { + match std::fs::remove_file(path) { + Ok(()) => { + info!(path = %path.display(), "removed pre-existing target file"); + Ok(()) + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()), + Err(e) => Err(e).with_context(|| format!("removing {}", path.display())), + } +} + +/// DuckDB writes its write-ahead log next to the database file with a +/// `.wal` suffix appended (e.g. `v3.duckdb` -> `v3.duckdb.wal`). +fn wal_path(path: &Path) -> std::path::PathBuf { + let mut name = path.as_os_str().to_owned(); + name.push(".wal"); + std::path::PathBuf::from(name) +} + +/// Run the whole migration: commits, data.json.gz, and every +/// file-sizes-*.json.gz under the source. +pub fn run(source: &Source, target: &Path) -> Result { + let mut conn = open_target_db(target)?; + let mut summary = MigrationSummary::default(); + + info!(source = %source.describe(), "Reading commits.json"); + let commits = read_commits(source)?; + info!(commits = commits.len(), "Loaded commits"); + summary.commits_inserted = upsert_all_commits(&mut conn, &commits, &mut summary)?; + + let mut q = QueryAccum::default(); + let mut ct = CompressionTimeAccum::default(); + let mut cs = CompressionSizeAccum::default(); + let mut ra = RandomAccessAccum::default(); + + info!("Migrating data.json.gz"); + migrate_data_jsonl( + source, + &commits, + &mut summary, + &mut q, + &mut ct, + &mut cs, + &mut ra, + )?; + info!(records = summary.records_read, "data.json.gz done"); + + for name in source.list_file_sizes()? { + info!(name = %name, "Migrating file-sizes"); + if let Err(e) = migrate_file_sizes(source, &name, &commits, &mut summary, &mut cs) { + warn!("file-sizes file {name} failed: {e:#}"); + } + } + + info!("Flushing accumulators to DuckDB"); + flush_all(&conn, q, ct, ra, cs, &mut summary)?; + + Ok(summary) +} + +/// Flush each accumulator's batch and bump the matching per-fact +/// summary counter only AFTER the flush succeeds. This way a flush +/// failure leaves the counter at zero (or its previous value) rather +/// than reporting rows that never landed in DuckDB. +fn flush_all( + conn: &Connection, + q: QueryAccum, + ct: CompressionTimeAccum, + ra: RandomAccessAccum, + cs: CompressionSizeAccum, + summary: &mut MigrationSummary, +) -> Result<()> { + let batch = build_query_batch(q)?; + let n = batch.num_rows() as u64; + flush(conn, "query_measurements", batch)?; + summary.query_inserted = n; + + let batch = build_compression_time_batch(ct)?; + let n = batch.num_rows() as u64; + flush(conn, "compression_times", batch)?; + summary.compression_time_inserted = n; + + let batch = build_random_access_batch(ra)?; + let n = batch.num_rows() as u64; + flush(conn, "random_access_times", batch)?; + summary.random_access_inserted = n; + + let batch = build_compression_size_batch(cs)?; + let n = batch.num_rows() as u64; + flush(conn, "compression_sizes", batch)?; + summary.compression_size_inserted = n; + + Ok(()) +} + +fn read_commits(source: &Source) -> Result> { + let reader = source.open_commits_jsonl()?; + let mut commits: Vec = Vec::new(); + for line in reader.lines() { + let line = line?; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + match serde_json::from_str::(trimmed) { + Ok(c) => commits.push(c), + Err(e) => warn!("skipping malformed commits.json line: {e}"), + } + } + Ok(index_commits(commits)) +} + +fn upsert_all_commits( + conn: &mut Connection, + commits: &BTreeMap, + summary: &mut MigrationSummary, +) -> Result { + let tx = conn.transaction().context("begin commits transaction")?; + let mut count = 0u64; + for commit in commits.values() { + let outcome = upsert_commit(&tx, commit)?; + for w in outcome.warnings { + warn!("{w}"); + summary.commit_warnings += 1; + } + count += 1; + } + tx.commit().context("commit commits transaction")?; + Ok(count) +} + +/// Stream `data.json.gz` and push classified records into the +/// per-table accumulators. Dedup happens inside each accumulator's +/// `push` method by `measurement_id`. +fn migrate_data_jsonl( + source: &Source, + commits: &BTreeMap, + summary: &mut MigrationSummary, + q: &mut QueryAccum, + ct: &mut CompressionTimeAccum, + cs: &mut CompressionSizeAccum, + ra: &mut RandomAccessAccum, +) -> Result<()> { + let reader = source.open_data_jsonl()?; + let started = Instant::now(); + let mut last_log = Instant::now(); + for line in reader.lines() { + let line = line?; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + summary.records_read += 1; + let record: V2Record = match serde_json::from_str(trimmed) { + Ok(r) => r, + Err(e) => { + warn!("skipping malformed data.json line: {e}"); + continue; + } + }; + apply_v2_record(&record, commits, summary, q, ct, cs, ra); + if last_log.elapsed() >= Duration::from_secs(5) { + let elapsed = started.elapsed().as_secs_f64(); + let rate = summary.records_read as f64 / elapsed.max(0.001); + info!( + records = summary.records_read, + rate = format!("{rate:.0}/s"), + query = q.measurement_id.len(), + compression_time = ct.measurement_id.len(), + compression_size = cs.rows.len(), + random_access = ra.measurement_id.len(), + "migration progress", + ); + last_log = Instant::now(); + } + } + Ok(()) +} + +fn apply_v2_record( + record: &V2Record, + commits: &BTreeMap, + summary: &mut MigrationSummary, + q: &mut QueryAccum, + ct: &mut CompressionTimeAccum, + cs: &mut CompressionSizeAccum, + ra: &mut RandomAccessAccum, +) { + let Some(sha) = record.commit_id.clone() else { + summary.missing_commit += 1; + return; + }; + if !commits.contains_key(&sha) { + summary.missing_commit += 1; + return; + } + + let bin = match classifier::classify_outcome(record) { + classifier::Outcome::Bin(b) => b, + classifier::Outcome::Skip(_) => { + summary.skipped_intentional += 1; + return; + } + classifier::Outcome::Unknown => { + summary.uncategorized += 1; + let prefix = record.name.split('/').next().unwrap_or("").to_string(); + *summary.uncategorized_prefixes.entry(prefix).or_insert(0) += 1; + return; + } + }; + + let env_triple = record.env_triple.as_ref().and_then(|t| t.to_triple()); + let runtimes = record + .all_runtimes + .as_ref() + .map(|v| v.iter().filter_map(runtime_as_i64).collect::>()) + .unwrap_or_default(); + let value_f64 = match record.value.as_ref().and_then(value_as_f64) { + Some(v) => v, + None => { + summary.skipped_no_value += 1; + return; + } + }; + + match bin { + V3Bin::Query { + dataset, + dataset_variant, + scale_factor, + query_idx, + storage, + engine, + format, + } => { + let qm = QueryMeasurement { + commit_sha: sha, + dataset, + dataset_variant, + scale_factor, + query_idx, + storage, + engine, + format, + value_ns: value_f64 as i64, + all_runtimes_ns: runtimes, + peak_physical: None, + peak_virtual: None, + physical_delta: None, + virtual_delta: None, + env_triple, + }; + let mid = measurement_id_query(&qm); + q.push(mid, qm, summary); + } + V3Bin::CompressionTime { + dataset, + dataset_variant, + format, + op, + } => { + let ctr = CompressionTime { + commit_sha: sha, + dataset, + dataset_variant, + format, + op, + value_ns: value_f64 as i64, + all_runtimes_ns: runtimes, + env_triple, + }; + let mid = measurement_id_compression_time(&ctr); + ct.push(mid, ctr, summary); + } + V3Bin::CompressionSize { + dataset, + dataset_variant, + format, + } => { + let csr = CompressionSize { + commit_sha: sha, + dataset, + dataset_variant, + format, + value_bytes: value_f64 as i64, + }; + let mid = measurement_id_compression_size(&csr); + cs.push_replace(mid, csr, summary); + } + V3Bin::RandomAccess { dataset, format } => { + let rar = RandomAccessTime { + commit_sha: sha, + dataset, + format, + value_ns: value_f64 as i64, + all_runtimes_ns: runtimes, + env_triple, + }; + let mid = measurement_id_random_access(&rar); + ra.push(mid, rar, summary); + } + } +} + +fn migrate_file_sizes( + source: &Source, + name: &str, + commits: &BTreeMap, + summary: &mut MigrationSummary, + cs: &mut CompressionSizeAccum, +) -> Result<()> { + let reader = source.open_file_sizes(name)?; + // Prefix unknown-id fallbacks with `unknown:` so they're clearly + // labeled in the UI rather than masquerading as a dataset name. + let dataset_fallback = { + let stripped = name + .strip_prefix("file-sizes-") + .and_then(|s| s.strip_suffix(".json.gz")) + .unwrap_or(name); + if KNOWN_FILE_SIZES_SUITES.contains(&stripped) { + stripped.to_string() + } else { + format!("unknown:{stripped}") + } + }; + let started = Instant::now(); + let mut last_log = Instant::now(); + for line in reader.lines() { + let line = line?; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + let sz: V2FileSize = match serde_json::from_str(trimmed) { + Ok(r) => r, + Err(e) => { + warn!("skipping malformed {name} line: {e}"); + continue; + } + }; + if !commits.contains_key(&sz.commit_id) { + summary.missing_commit += 1; + continue; + } + let dataset = if sz.benchmark.is_empty() { + dataset_fallback.clone() + } else { + sz.benchmark.clone() + }; + // Run SF through canonical_scale_factor so `"1"`, `"1.0"`, `"10"` + // and `"10.0"` collapse to one form, matching what + // `bin_compression_size` writes for the data.json.gz path. + let dataset_variant = canonical_scale_factor(sz.scale_factor.as_deref()); + let csr = CompressionSize { + commit_sha: sz.commit_id.clone(), + dataset, + dataset_variant, + format: sz.format.clone(), + value_bytes: sz.size_bytes, + }; + let mid = measurement_id_compression_size(&csr); + cs.push_sum(mid, csr); + summary.file_size_inserted += 1; + if last_log.elapsed() >= Duration::from_secs(5) { + let elapsed = started.elapsed().as_secs_f64(); + let rate = summary.file_size_inserted as f64 / elapsed.max(0.001); + info!( + name = %name, + file_sizes = summary.file_size_inserted, + rate = format!("{rate:.0}/s"), + "file-sizes progress", + ); + last_log = Instant::now(); + } + } + Ok(()) +} + +/// Append an Arrow `RecordBatch` to a DuckDB table via `Appender`. +fn flush(conn: &Connection, table: &str, batch: RecordBatch) -> Result<()> { + let mut app = conn + .appender(table) + .with_context(|| format!("opening appender for {table}"))?; + app.append_record_batch(batch) + .with_context(|| format!("appending record batch to {table}"))?; + drop(app); + Ok(()) +} + +/// Print the summary in a human-readable form. Returned by the CLI. +impl std::fmt::Display for MigrationSummary { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "Records read: {}", self.records_read)?; + writeln!(f, "Commits upserted: {}", self.commits_inserted)?; + writeln!(f, "Commit warnings: {}", self.commit_warnings)?; + writeln!(f, "Inserted (query): {}", self.query_inserted)?; + writeln!( + f, + "Inserted (compress t): {}", + self.compression_time_inserted + )?; + writeln!( + f, + "Inserted (compress s): {}", + self.compression_size_inserted + )?; + writeln!(f, "Inserted (random acc): {}", self.random_access_inserted)?; + writeln!(f, "Inserted (file sizes): {}", self.file_size_inserted)?; + writeln!(f, "Missing commit: {}", self.missing_commit)?; + writeln!(f, "Skipped (no value): {}", self.skipped_no_value)?; + writeln!(f, "Skipped (intentional): {}", self.skipped_intentional)?; + writeln!(f, "Deduplicated: {}", self.deduped)?; + writeln!(f, "Dedup w/ value diff: {}", self.deduped_with_conflict)?; + writeln!( + f, + "Uncategorized: {} ({:.2}%)", + self.uncategorized, + 100.0 * self.uncategorized_fraction() + )?; + if !self.uncategorized_prefixes.is_empty() { + let mut top: Vec<_> = self.uncategorized_prefixes.iter().collect(); + top.sort_by(|a, b| b.1.cmp(a.1)); + writeln!(f, "Top uncategorized prefixes:")?; + for (prefix, n) in top.iter().take(20) { + writeln!(f, " {prefix:>32} : {n}")?; + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use vortex_bench_server::records::QueryMeasurement; + + use super::*; + + fn open_db_without(table: &str) -> Result<(tempfile::TempDir, Connection)> { + let dir = tempfile::TempDir::new()?; + let path = dir.path().join("v3.duckdb"); + let conn = open_target_db(&path)?; + conn.execute_batch(&format!("DROP TABLE {table}"))?; + Ok((dir, conn)) + } + + fn one_query_row() -> QueryMeasurement { + QueryMeasurement { + commit_sha: "deadbeef".into(), + dataset: "clickbench".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 7, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "parquet".into(), + value_ns: 100, + all_runtimes_ns: vec![100], + peak_physical: None, + peak_virtual: None, + physical_delta: None, + virtual_delta: None, + env_triple: None, + } + } + + #[test] + fn flush_all_does_not_overcount_on_failure() -> Result<()> { + // Drop `compression_times` before flushing so the second + // flush in `flush_all` fails. The first (queries) succeeded, + // so its counter must be set; the failed table's counter and + // every later table's counter must stay at zero. + let (_dir, conn) = open_db_without("compression_times")?; + + let mut summary = MigrationSummary::default(); + let mut q = QueryAccum::default(); + let qm = one_query_row(); + let mid = vortex_bench_server::db::measurement_id_query(&qm); + q.push(mid, qm, &mut summary); + + let ct = CompressionTimeAccum::default(); + let ra = RandomAccessAccum::default(); + let cs = CompressionSizeAccum::default(); + + let result = flush_all(&conn, q, ct, ra, cs, &mut summary); + assert!(result.is_err(), "expected flush to fail on missing table"); + + assert_eq!( + summary.query_inserted, 1, + "query flushed before the failure must be counted" + ); + assert_eq!( + summary.compression_time_inserted, 0, + "failed flush must not bump the counter" + ); + assert_eq!(summary.random_access_inserted, 0, "later flushes never ran"); + assert_eq!( + summary.compression_size_inserted, 0, + "later flushes never ran" + ); + Ok(()) + } +} diff --git a/benchmarks-website/migrate/src/source.rs b/benchmarks-website/migrate/src/source.rs new file mode 100644 index 00000000000..acd53c7626e --- /dev/null +++ b/benchmarks-website/migrate/src/source.rs @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Streaming readers for v2's public S3 bucket. +//! +//! The bucket is `--no-sign-request`, so we fetch the underlying +//! HTTPS URL directly and stream-decompress with `flate2`. The +//! downloads are wrapped in [`reqwest::blocking`] to keep the read +//! path synchronous; the binary's hot path is single-threaded +//! per-source already (DuckDB is a single-writer). +//! +//! For tests and offline runs, [`Source::Local`](crate::source::Source::Local) accepts a local +//! directory of dumps; the migrator's `--source` flag picks the +//! variant. + +use std::fs::File; +use std::io::BufRead; +use std::io::BufReader; +use std::io::Read; +use std::path::Path; +use std::path::PathBuf; + +use anyhow::Context as _; +use anyhow::Result; +use flate2::read::GzDecoder; +use tracing::info; + +/// Public S3 bucket the live v2 server reads from. +pub const PUBLIC_BUCKET_BASE: &str = "https://vortex-ci-benchmark-results.s3.amazonaws.com"; + +/// Where to read the v2 dataset from. Either the public S3 bucket +/// (the live deployment) or a local directory of dumps. +#[derive(Debug, Clone)] +pub enum Source { + /// HTTPS GETs against `s3.amazonaws.com`. + PublicS3, + /// A directory containing `data.json.gz`, `commits.json`, and + /// `file-sizes-*.json.gz` files. + Local(PathBuf), +} + +impl Source { + /// Short human-readable description for log messages. + pub fn describe(&self) -> String { + match self { + Source::PublicS3 => "public S3 bucket".to_string(), + Source::Local(p) => format!("local dir {}", p.display()), + } + } + + /// Open `data.json.gz` for streaming, decompressing on the fly. + pub fn open_data_jsonl(&self) -> Result> { + let stream = self.open_raw("data.json.gz")?; + Ok(Box::new(BufReader::new(GzDecoder::new(stream)))) + } + + /// Open `commits.json` (uncompressed). + pub fn open_commits_jsonl(&self) -> Result> { + let stream = self.open_raw("commits.json")?; + Ok(Box::new(BufReader::new(stream))) + } + + /// Enumerate `file-sizes-*.json.gz` files. For local sources this + /// is a directory glob; for the public bucket we hit the documented + /// suite ids. + pub fn list_file_sizes(&self) -> Result> { + match self { + Source::Local(dir) => { + let mut out = Vec::new(); + for entry in std::fs::read_dir(dir)? { + let entry = entry?; + let name = entry.file_name(); + let s = name.to_string_lossy(); + if s.starts_with("file-sizes-") && s.ends_with(".json.gz") { + out.push(s.into_owned()); + } + } + out.sort(); + Ok(out) + } + Source::PublicS3 => { + // The S3 bucket's ListObjects is denied for unsigned + // requests, so we hit the documented per-suite keys + // emitted by `.github/workflows/sql-benchmarks.yml`. + Ok(KNOWN_FILE_SIZES_SUITES + .iter() + .map(|id| format!("file-sizes-{id}.json.gz")) + .collect()) + } + } + } + + /// Open one `file-sizes-*.json.gz` for streaming. + pub fn open_file_sizes(&self, name: &str) -> Result> { + let stream = self.open_raw(name)?; + Ok(Box::new(BufReader::new(GzDecoder::new(stream)))) + } + + fn open_raw(&self, name: &str) -> Result> { + match self { + Source::Local(dir) => open_local(&dir.join(name)), + Source::PublicS3 => open_s3(name), + } + } +} + +fn open_local(path: &Path) -> Result> { + let f = File::open(path).with_context(|| format!("opening {}", path.display()))?; + Ok(Box::new(f)) +} + +fn open_s3(name: &str) -> Result> { + let url = format!("{PUBLIC_BUCKET_BASE}/{name}"); + info!(url = %url, "GET"); + let resp = reqwest::blocking::get(&url).with_context(|| format!("GET {url}"))?; + if !resp.status().is_success() { + anyhow::bail!("GET {url} returned {}", resp.status()); + } + Ok(Box::new(resp)) +} + +/// Suite IDs we know publish a `file-sizes-{id}.json.gz` to S3. +/// +/// Source of truth: the `matrix.id` values in +/// `.github/workflows/sql-benchmarks.yml`'s `benchmark_matrix` default. +/// The post-bench `file-sizes` step uploads `file-sizes-${{ matrix.id +/// }}.json.gz`, so this list must match those IDs verbatim. Adding a +/// new matrix entry to that workflow means adding the same ID here. +pub(crate) const KNOWN_FILE_SIZES_SUITES: &[&str] = &[ + "clickbench-nvme", + "tpch-nvme", + "tpch-s3", + "tpch-nvme-10", + "tpch-s3-10", + "tpcds-nvme", + "statpopgen", + "fineweb", + "fineweb-s3", + "polarsignals", +]; diff --git a/benchmarks-website/migrate/src/v2.rs b/benchmarks-website/migrate/src/v2.rs new file mode 100644 index 00000000000..dd8190346bb --- /dev/null +++ b/benchmarks-website/migrate/src/v2.rs @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Wire shapes of the v2 benchmark dataset on S3. +//! +//! These types capture only the fields the migrator reads. v2 records +//! are serialized by `vortex-bench` (see `vortex-bench/src/measurements.rs`) +//! and by older non-Rust scripts; the union of fields is loose, so we +//! deserialize permissively (`serde(default)`, untyped `serde_json::Value` +//! for the polymorphic `dataset` field). + +use std::collections::BTreeMap; + +use serde::Deserialize; + +/// One JSONL line of `data.json.gz`. +/// +/// The shape is the union of every emitter's output. Most fields are +/// optional because different benches emit different subsets. +#[derive(Debug, Clone, Deserialize)] +pub struct V2Record { + pub name: String, + #[serde(default)] + pub commit_id: Option, + #[serde(default)] + pub unit: Option, + #[serde(default)] + pub value: Option, + #[serde(default)] + pub storage: Option, + #[serde(default)] + pub dataset: Option, + #[serde(default)] + pub all_runtimes: Option>, + #[serde(default)] + pub env_triple: Option, +} + +/// `dataset` in v2 records is sometimes a string, sometimes an object +/// keyed by suite name (`{ "tpch": { "scale_factor": "10" } }`). +/// This helper looks up the scale factor for a given suite without +/// assuming a particular shape. +pub fn dataset_scale_factor(dataset: &serde_json::Value, key: &str) -> Option { + let obj = dataset.as_object()?; + let entry = obj.get(key)?; + let sf = entry.get("scale_factor")?; + match sf { + serde_json::Value::String(s) => Some(s.clone()), + serde_json::Value::Number(n) => Some(n.to_string()), + _ => None, + } +} + +/// Canonicalize a v2 scale-factor string for use in `dataset_variant`. +/// +/// v2 emitters wrote scale factors as either `"1"`, `"1.0"`, `"10"`, or +/// `"10.0"` for the same logical SF, so the data.json.gz path +/// (`bin_compression_size`) and the file-sizes-*.json.gz path +/// (`migrate_file_sizes`) would otherwise produce different +/// `dataset_variant` strings and never collapse onto the same +/// `measurement_id`. Parse to f64 and format with no trailing zeros so +/// every shape collapses to one canonical form (`"1"`, `"10"`, `"0.1"`). +/// SF=1 is the implicit default and folds to `None`. +pub fn canonical_scale_factor(raw: Option<&str>) -> Option { + let s = raw?.trim(); + if s.is_empty() { + return None; + } + let value: f64 = s.parse().ok()?; + if value == 1.0 { + return None; + } + Some(format!("{value}")) +} + +/// Best-effort numeric coercion for the polymorphic `value` field. +pub fn value_as_f64(value: &serde_json::Value) -> Option { + match value { + serde_json::Value::Number(n) => n.as_f64(), + serde_json::Value::String(s) => s.parse().ok(), + _ => None, + } +} + +/// Best-effort coercion of a runtime entry to nanoseconds. +pub fn runtime_as_i64(value: &serde_json::Value) -> Option { + match value { + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + Some(i) + } else { + n.as_f64().map(|f| f as i64) + } + } + serde_json::Value::String(s) => s.parse().ok(), + _ => None, + } +} + +/// Triple block as emitted by `vortex-bench`'s `--gh-json` path. v2 +/// stored it as an object; we serialize it back out as `arch-os-env`. +#[derive(Debug, Clone, Deserialize)] +pub struct V2EnvTriple { + #[serde(default)] + pub architecture: Option, + #[serde(default)] + pub operating_system: Option, + #[serde(default)] + pub environment: Option, +} + +impl V2EnvTriple { + /// Format as the `arch-os-env` triple used by v3's `env_triple` column. + pub fn to_triple(&self) -> Option { + let arch = self.architecture.as_deref()?; + let os = self.operating_system.as_deref()?; + let env = self.environment.as_deref()?; + Some(format!("{arch}-{os}-{env}")) + } +} + +/// One JSONL line of `commits.json`. +#[derive(Debug, Clone, Deserialize)] +pub struct V2Commit { + pub id: String, + #[serde(default)] + pub timestamp: Option, + #[serde(default)] + pub message: Option, + #[serde(default)] + pub author: Option, + #[serde(default)] + pub committer: Option, + #[serde(default)] + pub tree_id: Option, + #[serde(default)] + pub url: Option, +} + +/// Author or committer block on a v2 commit record. +#[derive(Debug, Clone, Deserialize)] +pub struct V2Person { + #[serde(default)] + pub name: Option, + #[serde(default)] + pub email: Option, +} + +/// One JSONL line of `file-sizes-*.json.gz` produced by +/// `scripts/capture-file-sizes.py`. +#[derive(Debug, Clone, Deserialize)] +pub struct V2FileSize { + pub commit_id: String, + pub benchmark: String, + #[serde(default)] + pub scale_factor: Option, + pub format: String, + pub file: String, + pub size_bytes: i64, +} + +/// Build a sha-keyed map of commits. +pub fn index_commits(commits: Vec) -> BTreeMap { + commits.into_iter().map(|c| (c.id.clone(), c)).collect() +} diff --git a/benchmarks-website/migrate/src/verify.rs b/benchmarks-website/migrate/src/verify.rs new file mode 100644 index 00000000000..743dff1e528 --- /dev/null +++ b/benchmarks-website/migrate/src/verify.rs @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Structural diff between a migrated v3 DuckDB and the live v2 +//! `/api/metadata` endpoint. +//! +//! Compares group / chart structure only; values aren't compared +//! because v2 converts ns → ms and bytes → MiB on read while v3 +//! stores raw and the chart query divides. Group/chart structural +//! equivalence is enough to spot classifier regressions before +//! cutover. + +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::path::Path; + +use anyhow::Context as _; +use anyhow::Result; +use duckdb::Connection; +use serde::Deserialize; + +use crate::classifier::QUERY_SUITES; + +/// Result of one `verify` run. +#[derive(Debug, Default)] +pub struct VerifyReport { + pub matched_groups: Vec, + pub only_in_v3: Vec, + pub only_in_v2: Vec, + pub chart_diffs: Vec, +} + +/// One group's chart-count divergence between v2 and v3, captured when the +/// group is structurally present on both sides but the counts differ. +#[derive(Debug, Clone)] +pub struct ChartDiff { + pub group: String, + pub v2_count: usize, + pub v3_count: usize, +} + +impl VerifyReport { + /// True if every v2 group is represented in v3. The CLI's exit + /// code reflects this. + pub fn v2_groups_covered(&self) -> bool { + self.only_in_v2.is_empty() + } +} + +impl std::fmt::Display for VerifyReport { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "Groups in both v2 and v3:")?; + for g in &self.matched_groups { + writeln!(f, " + {g}")?; + } + if !self.only_in_v2.is_empty() { + writeln!(f, "Groups only in v2 (regression candidates):")?; + for g in &self.only_in_v2 { + writeln!(f, " - {g}")?; + } + } + if !self.only_in_v3.is_empty() { + writeln!(f, "Groups only in v3:")?; + for g in &self.only_in_v3 { + writeln!(f, " + {g}")?; + } + } + if !self.chart_diffs.is_empty() { + writeln!(f, "Chart count diffs:")?; + for d in &self.chart_diffs { + writeln!( + f, + " {} : v2={} v3={} (delta={})", + d.group, + d.v2_count, + d.v3_count, + d.v3_count as i64 - d.v2_count as i64, + )?; + } + } + Ok(()) + } +} + +/// v2's `/api/metadata` reply — only the fields we need. +#[derive(Debug, Deserialize)] +struct V2Metadata { + groups: BTreeMap, +} + +#[derive(Debug, Deserialize)] +struct V2GroupMeta { + #[serde(default)] + charts: Vec, +} + +#[derive(Debug, Deserialize)] +struct V2ChartMeta { + #[serde(default)] + name: String, +} + +/// Open the migrated DuckDB at `duckdb_path`, fetch `/api/metadata`, +/// and produce a structural diff. +pub fn run(v2_server: &str, duckdb_path: &Path) -> Result { + let v3 = collect_v3_groups(duckdb_path)?; + let v2 = fetch_v2_metadata(v2_server)?; + Ok(diff(&v2, &v3)) +} + +fn collect_v3_groups(duckdb_path: &Path) -> Result>> { + let conn = Connection::open(duckdb_path) + .with_context(|| format!("opening DuckDB at {}", duckdb_path.display()))?; + let mut groups: BTreeMap> = BTreeMap::new(); + + // query_measurements: chart per (dataset, query_idx); group per + // (dataset, dataset_variant, scale_factor, storage). We want v2 + // group display names so the verifier can compare apples to + // apples, so we re-format them here using the same suite table. + let mut stmt = conn.prepare( + r#" + SELECT dataset, dataset_variant, scale_factor, storage, query_idx + FROM query_measurements + GROUP BY dataset, dataset_variant, scale_factor, storage, query_idx + "#, + )?; + let rows = stmt.query_map([], |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, Option>(1)?, + row.get::<_, Option>(2)?, + row.get::<_, String>(3)?, + row.get::<_, i32>(4)?, + )) + })?; + for row in rows { + let (dataset, _variant, sf, storage, query_idx) = row?; + let group_name = display_query_group(&dataset, sf.as_deref(), &storage); + let chart_name = chart_name_query(&dataset, query_idx); + groups + .entry(group_name) + .or_default() + .insert(normalize_chart(&chart_name)); + } + + // compression_times: group "Compression", charts per dataset. + let mut stmt = conn.prepare( + r#" + SELECT dataset, format, op + FROM compression_times + GROUP BY dataset, format, op + "#, + )?; + let rows = stmt.query_map([], |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, String>(1)?, + row.get::<_, String>(2)?, + )) + })?; + for row in rows { + let (dataset, format, op) = row?; + let chart = chart_name_compression_time(&format, &op, &dataset); + groups + .entry("Compression".to_string()) + .or_default() + .insert(normalize_chart(&chart)); + } + + let mut stmt = conn.prepare( + r#" + SELECT dataset, format + FROM compression_sizes + GROUP BY dataset, format + "#, + )?; + let rows = stmt.query_map([], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + })?; + for row in rows { + let (_dataset, format) = row?; + let chart = chart_name_compression_size(&format); + groups + .entry("Compression Size".to_string()) + .or_default() + .insert(normalize_chart(&chart)); + } + + let mut stmt = conn.prepare( + r#" + SELECT DISTINCT dataset + FROM random_access_times + "#, + )?; + let rows = stmt.query_map([], |row| row.get::<_, String>(0))?; + for row in rows { + let dataset = row?; + groups + .entry("Random Access".to_string()) + .or_default() + .insert(normalize_chart(&dataset)); + } + + Ok(groups) +} + +fn fetch_v2_metadata(server: &str) -> Result>> { + let url = format!("{}/api/metadata", server.trim_end_matches('/')); + let body = reqwest::blocking::get(&url) + .with_context(|| format!("GET {url}"))? + .error_for_status() + .with_context(|| format!("non-2xx from {url}"))? + .json::() + .with_context(|| format!("parsing {url} as v2 /api/metadata"))?; + let mut out: BTreeMap> = BTreeMap::new(); + for (name, group) in body.groups { + let charts = group + .charts + .into_iter() + .map(|c| normalize_chart(&c.name)) + .collect(); + out.insert(name, charts); + } + Ok(out) +} + +fn diff( + v2: &BTreeMap>, + v3: &BTreeMap>, +) -> VerifyReport { + let mut report = VerifyReport::default(); + let v2_keys: BTreeSet<&String> = v2.keys().collect(); + let v3_keys: BTreeSet<&String> = v3.keys().collect(); + for g in v2_keys.intersection(&v3_keys) { + report.matched_groups.push((**g).clone()); + let v2_charts = &v2[*g]; + let v3_charts = &v3[*g]; + if v2_charts.len() != v3_charts.len() { + report.chart_diffs.push(ChartDiff { + group: (**g).clone(), + v2_count: v2_charts.len(), + v3_count: v3_charts.len(), + }); + } + } + for g in v3_keys.difference(&v2_keys) { + report.only_in_v3.push((**g).clone()); + } + for g in v2_keys.difference(&v3_keys) { + report.only_in_v2.push((**g).clone()); + } + report.matched_groups.sort(); + report.only_in_v3.sort(); + report.only_in_v2.sort(); + report +} + +fn display_query_group(dataset: &str, scale_factor: Option<&str>, storage: &str) -> String { + let suite = QUERY_SUITES + .iter() + .find(|s| s.prefix.eq_ignore_ascii_case(dataset)) + .copied(); + match suite { + Some(suite) if suite.fan_out => { + let storage_disp = match storage { + "s3" | "S3" => "S3", + _ => "NVMe", + }; + let sf = scale_factor.unwrap_or("1"); + format!("{} ({}) (SF={})", suite.display_name, storage_disp, sf) + } + Some(suite) => suite.display_name.to_string(), + None => format!("{dataset} ({storage})"), + } +} + +fn chart_name_query(dataset: &str, query_idx: i32) -> String { + let suite = QUERY_SUITES + .iter() + .find(|s| s.prefix.eq_ignore_ascii_case(dataset)) + .copied(); + match suite { + Some(suite) => format!("{} Q{}", suite.query_prefix, query_idx), + None => format!("{} Q{}", dataset.to_uppercase(), query_idx), + } +} + +fn chart_name_compression_time(format: &str, op: &str, _dataset: &str) -> String { + // Re-derive the v2 chart name (the metric, not the dataset) so we + // can compare. v2's chart axis is the metric; series is the + // dataset. v3 inverts that. For structural comparison, we project + // back to v2's per-chart key. + match (format, op) { + ("vortex-file-compressed", "encode") => "COMPRESS TIME".into(), + ("vortex-file-compressed", "decode") => "DECOMPRESS TIME".into(), + ("parquet", "encode") => "PARQUET RS ZSTD COMPRESS TIME".into(), + ("parquet", "decode") => "PARQUET RS ZSTD DECOMPRESS TIME".into(), + ("lance", "encode") => "LANCE COMPRESS TIME".into(), + ("lance", "decode") => "LANCE DECOMPRESS TIME".into(), + _ => format!("{} {} TIME", format.to_uppercase(), op.to_uppercase()), + } +} + +fn chart_name_compression_size(format: &str) -> String { + match format { + "vortex-file-compressed" => "VORTEX SIZE".into(), + "parquet" => "PARQUET SIZE".into(), + "lance" => "LANCE SIZE".into(), + _ => format!("{} SIZE", format.to_uppercase()), + } +} + +/// Strip casing and `_-` differences between v2 and v3 chart names. +/// v2 displays uppercase; v3 stores raw values. Comparing in this +/// canonical form is enough for structural verification. +fn normalize_chart(s: &str) -> String { + s.trim() + .to_uppercase() + .replace(['_', '-'], " ") + .split_whitespace() + .collect::>() + .join(" ") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn normalize_chart_canonicalizes() { + assert_eq!(normalize_chart("taxi/take"), "TAXI/TAKE"); + assert_eq!(normalize_chart("TAXI/TAKE"), "TAXI/TAKE"); + assert_eq!(normalize_chart("tpc-h q1"), "TPC H Q1"); + assert_eq!(normalize_chart("tpc h q1"), "TPC H Q1"); + } + + #[test] + fn display_query_group_handles_fan_out() { + assert_eq!( + display_query_group("tpch", Some("10"), "s3"), + "TPC-H (S3) (SF=10)" + ); + assert_eq!( + display_query_group("tpch", Some("100"), "nvme"), + "TPC-H (NVMe) (SF=100)" + ); + assert_eq!( + display_query_group("clickbench", None, "nvme"), + "Clickbench" + ); + } +} diff --git a/benchmarks-website/migrate/tests/classifier.rs b/benchmarks-website/migrate/tests/classifier.rs new file mode 100644 index 00000000000..71e97cb6c9a --- /dev/null +++ b/benchmarks-website/migrate/tests/classifier.rs @@ -0,0 +1,531 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Classifier behavior pinned by representative v2 names from each +//! group in `benchmarks-website/server.js`'s `getGroup`. + +use rstest::rstest; +use serde_json::json; +use vortex_bench_migrate::classifier::Outcome; +use vortex_bench_migrate::classifier::Skip; +use vortex_bench_migrate::classifier::V3Bin; +use vortex_bench_migrate::classifier::classify; +use vortex_bench_migrate::classifier::classify_outcome; +use vortex_bench_migrate::classifier::format_query; +use vortex_bench_migrate::classifier::rename_engine; +use vortex_bench_migrate::v2::V2Record; + +fn record(name: &str) -> V2Record { + V2Record { + name: name.to_string(), + commit_id: Some("deadbeef".into()), + unit: Some("ns".into()), + value: Some(json!(123)), + storage: None, + dataset: None, + all_runtimes: None, + env_triple: None, + } +} + +fn record_with_storage_and_sf(name: &str, storage: &str, suite: &str, sf: &str) -> V2Record { + let mut r = record(name); + r.storage = Some(storage.into()); + r.dataset = Some(json!({ suite: { "scale_factor": sf } })); + r +} + +#[rstest] +#[case::clickbench( + "clickbench_q07/datafusion:parquet", + V3Bin::Query { + dataset: "clickbench".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 7, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "parquet".into(), + }, +)] +#[case::clickbench_vortex_renamed( + "clickbench_q12/datafusion:vortex-file-compressed", + V3Bin::Query { + dataset: "clickbench".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 12, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "vortex-file-compressed".into(), + }, +)] +#[case::statpopgen( + "statpopgen_q3/datafusion:parquet", + V3Bin::Query { + dataset: "statpopgen".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 3, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "parquet".into(), + }, +)] +#[case::polarsignals( + "polarsignals_q1/duckdb:parquet", + V3Bin::Query { + dataset: "polarsignals".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 1, + storage: "nvme".into(), + engine: "duckdb".into(), + format: "parquet".into(), + }, +)] +fn non_fan_out_query_records(#[case] name: &str, #[case] expected: V3Bin) { + let r = record(name); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::tpch_s3_sf100( + "tpch_q01/datafusion:parquet", + "S3", + "tpch", + "100", + V3Bin::Query { + dataset: "tpch".into(), + dataset_variant: None, + scale_factor: Some("100".into()), + query_idx: 1, + storage: "s3".into(), + engine: "datafusion".into(), + format: "parquet".into(), + }, +)] +#[case::tpch_nvme_sf1( + "tpch_q22/duckdb:vortex-file-compressed", + "NVMe", + "tpch", + "1", + V3Bin::Query { + dataset: "tpch".into(), + dataset_variant: None, + scale_factor: Some("1".into()), + query_idx: 22, + storage: "nvme".into(), + engine: "duckdb".into(), + format: "vortex-file-compressed".into(), + }, +)] +#[case::tpcds_nvme_sf10( + "tpcds_q05/datafusion:vortex-file-compressed", + "NVMe", + "tpcds", + "10", + V3Bin::Query { + dataset: "tpcds".into(), + dataset_variant: None, + scale_factor: Some("10".into()), + query_idx: 5, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "vortex-file-compressed".into(), + }, +)] +fn fan_out_query_records( + #[case] name: &str, + #[case] storage: &str, + #[case] suite: &str, + #[case] sf: &str, + #[case] expected: V3Bin, +) { + let r = record_with_storage_and_sf(name, storage, suite, sf); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::random_access_4_part( + "random-access/taxi/take/parquet-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "taxi/take".into(), + format: "parquet".into(), + }, +)] +#[case::random_access_4_part_vortex( + "random-access/chimp/take/vortex-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "chimp/take".into(), + format: "vortex-file-compressed".into(), + }, +)] +#[case::random_access_4_part_lance( + "random-access/taxi/take/lance-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "taxi/take".into(), + format: "lance".into(), + }, +)] +fn random_access_records(#[case] name: &str, #[case] expected: V3Bin) { + let r = record(name); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::compress_time_vortex( + "compress time/clickbench", + V3Bin::CompressionTime { + dataset: "clickbench".into(), + dataset_variant: None, + format: "vortex-file-compressed".into(), + op: "encode".into(), + }, +)] +#[case::decompress_time_vortex( + "decompress time/tpch_lineitem", + V3Bin::CompressionTime { + dataset: "tpch_lineitem".into(), + dataset_variant: None, + format: "vortex-file-compressed".into(), + op: "decode".into(), + }, +)] +#[case::parquet_compress( + "parquet_rs-zstd compress time/clickbench", + V3Bin::CompressionTime { + dataset: "clickbench".into(), + dataset_variant: None, + format: "parquet".into(), + op: "encode".into(), + }, +)] +#[case::lance_decompress( + "lance decompress time/clickbench", + V3Bin::CompressionTime { + dataset: "clickbench".into(), + dataset_variant: None, + format: "lance".into(), + op: "decode".into(), + }, +)] +fn compression_time_records(#[case] name: &str, #[case] expected: V3Bin) { + let r = record(name); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::vortex_size( + "vortex size/clickbench", + V3Bin::CompressionSize { + dataset: "clickbench".into(), + dataset_variant: None, + format: "vortex-file-compressed".into(), + }, +)] +#[case::vortex_file_compressed_size_normalizes( + "vortex-file-compressed size/clickbench", + V3Bin::CompressionSize { + dataset: "clickbench".into(), + dataset_variant: None, + format: "vortex-file-compressed".into(), + }, +)] +#[case::parquet_size( + "parquet size/clickbench", + V3Bin::CompressionSize { + dataset: "clickbench".into(), + dataset_variant: None, + format: "parquet".into(), + }, +)] +#[case::lance_size( + "lance size/tpch_lineitem", + V3Bin::CompressionSize { + dataset: "tpch_lineitem".into(), + dataset_variant: None, + format: "lance".into(), + }, +)] +fn compression_size_records(#[case] name: &str, #[case] expected: V3Bin) { + let r = record(name); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::ratio_vortex_parquet("vortex:parquet-zstd ratio compress time/clickbench")] +#[case::ratio_vortex_lance("vortex:lance ratio decompress time/clickbench")] +#[case::ratio_size_vortex_parquet("vortex:parquet-zstd size/clickbench")] +#[case::ratio_size_vortex_raw("vortex:raw size/clickbench")] +#[case::throughput("compress throughput/clickbench")] +#[case::nonsense_prefix("not-a-known-bench/series")] +#[case::random_access_3_part("random-access/taxi/parquet-tokio-local-disk")] +fn unmapped_records_yield_none(#[case] name: &str) { + let r = record(name); + assert_eq!( + classify(&r), + None, + "expected {name:?} to classify as None (drop)", + ); +} + +#[rstest] +#[case::parquet_2_part( + "random-access/parquet-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "taxi".into(), + format: "parquet".into(), + }, +)] +#[case::vortex_2_part( + "random-access/vortex-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "taxi".into(), + format: "vortex-file-compressed".into(), + }, +)] +#[case::lance_2_part( + "random-access/lance-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "taxi".into(), + format: "lance".into(), + }, +)] +fn random_access_2_part_legacy_recovered_as_taxi(#[case] name: &str, #[case] expected: V3Bin) { + // The 2-part shape `random-access/-tokio-local-disk` is + // emitted by `random-access-bench`'s legacy taxi run (no + // `AccessPattern`, see `measurement_name` in + // `benchmarks/random-access-bench/src/main.rs`). The live v3 + // emitter writes `dataset="taxi"` for those measurements, so the + // historical 2-part records on S3 must land in the same v3 + // chart instead of being dropped as `UnsupportedShape`. + let r = record(name); + assert_eq!( + classify(&r), + Some(expected), + "2-part legacy random-access must recover as dataset=taxi" + ); +} + +#[rstest] +#[case::parquet_footer("random-access/parquet-tokio-local-disk-footer")] +#[case::vortex_footer("random-access/vortex-tokio-local-disk-footer")] +#[case::lance_footer("random-access/lance-tokio-local-disk-footer")] +fn random_access_2_part_footer_is_deprecated(#[case] name: &str) { + // The reopen-mode `-footer` variant is a different access pattern + // (file is reopened per take). The live v3 emitter passes the + // bare `format.name()` for both reopen and cached, so it can't + // distinguish them on the wire. Keep migration consistent with + // that by routing `-footer` 2-part records to Skip::Deprecated + // (they don't strip clean to a v3-allowlisted format). + let r = record(name); + assert!( + matches!(classify_outcome(&r), Outcome::Skip(Skip::Deprecated)), + "2-part `-footer` random-access must be Skip::Deprecated" + ); +} + +#[rstest] +#[case::parquet_footer("random-access/taxi/correlated/parquet-tokio-local-disk-footer")] +#[case::vortex_footer("random-access/feature-vectors/uniform/vortex-tokio-local-disk-footer")] +#[case::lance_footer("random-access/nested-structs/correlated/lance-tokio-local-disk-footer")] +fn random_access_4_part_footer_is_deprecated(#[case] name: &str) { + // Same reasoning as 2-part `-footer`: the format string ends in + // `-tokio-local-disk-footer`, the strip_suffix doesn't match, and + // the unstripped value fails the V3_FORMATS allowlist. + let r = record(name); + assert!( + matches!(classify_outcome(&r), Outcome::Skip(Skip::Deprecated)), + "4-part `-footer` random-access must be Skip::Deprecated" + ); +} + +#[test] +fn parquet_zstd_size_is_deprecated() { + // `parquet-zstd` is not on the v3 emitter's format allowlist, so + // historical `parquet-zstd size/...` records bucket under + // Skip::Deprecated and don't render as orphan charts in v3. + let r = record("parquet-zstd size/clickbench"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::Deprecated) + )); +} + +#[test] +fn vortex_parquet_zstd_ratio_is_intentional_skip() { + let r = record("vortex:parquet-zstd ratio compress time/clickbench"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::DerivedRatio) + )); +} + +#[test] +fn vortex_parquet_zst_typo_ratio_is_intentional_skip() { + // `parquet-zst` (no trailing `d`) was emitted by some v2 runs. + // Both spellings should classify as derived ratios. + for name in [ + "vortex:parquet-zst ratio compress time/clickbench", + "vortex:parquet-zst ratio decompress time/clickbench", + ] { + let r = record(name); + assert!( + matches!(classify_outcome(&r), Outcome::Skip(Skip::DerivedRatio)), + "{name:?} should be DerivedRatio", + ); + } +} + +#[test] +fn throughput_is_intentional_skip() { + let r = record("compress throughput/clickbench"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::Throughput) + )); +} + +#[test] +fn unknown_prefix_is_unknown() { + let r = record("not-a-known-bench/series"); + assert!(matches!(classify_outcome(&r), Outcome::Unknown)); +} + +#[test] +fn gharchive_q00_is_deprecated() { + // gharchive isn't on the v3 query-suite allowlist, so historical + // gharchive query records bucket as Skip::Deprecated. + let r = record("gharchive_q00/datafusion:parquet"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::Deprecated) + )); +} + +#[test] +fn fineweb_q00_classifies() { + // fineweb is on V3_QUERY_SUITES (still emitted by v3 CI per + // .github/workflows/sql-benchmarks.yml's `fineweb` matrix entry), + // so historical fineweb records ingest like any other suite. + let r = record("fineweb_q00/datafusion:parquet"); + assert!(matches!( + classify_outcome(&r), + Outcome::Bin(V3Bin::Query { .. }) + )); +} + +#[test] +fn memory_record_is_historical_memory_skip() { + // v2 emitted `_q_memory/:` records that + // carry top-level memory fields V2Record doesn't deserialize. + // Skip them with a known variant so they don't trip the 5% gate. + let r = record("clickbench_q07_memory/datafusion:parquet"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::HistoricalMemory) + )); +} + +#[test] +fn tpch_compression_size_carries_scale_factor() { + // The data.json.gz "vortex size/tpch" path needs to derive + // dataset_variant from the v2 record's `dataset` object, the same + // way the file-sizes path does. Otherwise SF=10 rows from the two + // sources never collide on `mid` and produce duplicate rows. + let mut r = record("vortex size/tpch"); + r.dataset = Some(serde_json::json!({ "tpch": { "scale_factor": "10" } })); + let outcome = classify_outcome(&r); + let Outcome::Bin(V3Bin::CompressionSize { + dataset, + dataset_variant, + format, + }) = outcome + else { + panic!("expected Bin(CompressionSize), got {outcome:?}"); + }; + assert_eq!(dataset, "tpch"); + assert_eq!(dataset_variant, Some("10".into())); + assert_eq!(format, "vortex-file-compressed"); +} + +#[test] +fn tpch_compression_size_drops_default_scale_factor() { + // SF "1.0" matches the file-sizes path's filter and collapses to + // dataset_variant: None. + let mut r = record("vortex size/tpch"); + r.dataset = Some(serde_json::json!({ "tpch": { "scale_factor": "1.0" } })); + let outcome = classify_outcome(&r); + let Outcome::Bin(V3Bin::CompressionSize { + dataset_variant, .. + }) = outcome + else { + panic!("expected Bin(CompressionSize), got {outcome:?}"); + }; + assert_eq!(dataset_variant, None); +} + +#[rstest] +// SF=1 is the implicit default; both spellings must drop to None so +// `bin_compression_size` and `migrate_file_sizes` agree. +#[case::int_one("1", None)] +#[case::float_one("1.0", None)] +// SF=10 must produce the same canonical string regardless of spelling. +#[case::int_ten("10", Some("10".into()))] +#[case::float_ten("10.0", Some("10".into()))] +#[case::float_fractional("0.1", Some("0.1".into()))] +#[case::whitespace(" 10 ", Some("10".into()))] +#[case::empty("", None)] +fn compression_size_scale_factor_canonicalizes( + #[case] raw_sf: &str, + #[case] expected: Option, +) { + let mut r = record("vortex size/tpch"); + r.dataset = Some(serde_json::json!({ "tpch": { "scale_factor": raw_sf } })); + let outcome = classify_outcome(&r); + let Outcome::Bin(V3Bin::CompressionSize { + dataset_variant, .. + }) = outcome + else { + panic!("expected Bin(CompressionSize) for sf={raw_sf:?}, got {outcome:?}"); + }; + assert_eq!(dataset_variant, expected, "sf={raw_sf:?}"); +} + +#[test] +fn engine_casing_lowercased() { + // Older v2 records emitted display-case engines like `DataFusion` + // and `DuckDB`. The classifier lowercases at push time so dedup + // collapses display-case rows into the canonical lowercase ones. + let r = record("clickbench_q07/DataFusion:parquet"); + let outcome = classify_outcome(&r); + let Outcome::Bin(V3Bin::Query { engine, format, .. }) = outcome else { + panic!("expected Bin(Query), got {outcome:?}"); + }; + assert_eq!(engine, "datafusion"); + assert_eq!(format, "parquet"); +} + +#[test] +fn rename_engine_pins_canonical_outputs() { + assert_eq!(rename_engine("vortex-tokio-local-disk"), "vortex-nvme"); + assert_eq!( + rename_engine("datafusion:vortex-file-compressed"), + "datafusion:vortex" + ); + assert_eq!(rename_engine("LANCE"), "lance"); +} + +#[test] +fn format_query_pins_v2_display() { + assert_eq!(format_query("clickbench_q00"), "CLICKBENCH Q0"); + assert_eq!(format_query("tpch_q22"), "TPC-H Q22"); + assert_eq!(format_query("tpcds_q42"), "TPC-DS Q42"); + assert_eq!(format_query("polarsignals_q1"), "POLARSIGNALS Q1"); + // Names that don't match a suite fall back to upper + " " replace. + assert_eq!( + format_query("vortex-file-compressed size"), + "VORTEX FILE COMPRESSED SIZE" + ); +} diff --git a/benchmarks-website/migrate/tests/end_to_end.rs b/benchmarks-website/migrate/tests/end_to_end.rs new file mode 100644 index 00000000000..25bf5c0ad55 --- /dev/null +++ b/benchmarks-website/migrate/tests/end_to_end.rs @@ -0,0 +1,450 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Inline JSONL fixtures driven through the full migration into a +//! tempdir DuckDB. No live S3. + +use std::fs::File; +use std::io::Write; +use std::path::Path; + +use duckdb::Connection; +use flate2::Compression; +use flate2::write::GzEncoder; +use tempfile::TempDir; +use vortex_bench_migrate::migrate; +use vortex_bench_migrate::source::Source; + +const COMMITS_JSONL: &str = r#"{"id":"deadbeef","timestamp":"2026-04-25T00:00:00Z","message":"fixture commit","author":{"name":"A","email":"a@example.com"},"committer":{"name":"C","email":"c@example.com"},"tree_id":"abcd0001","url":"https://example.com/commit/deadbeef"} +"#; + +const DATA_JSONL: &str = r#"{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":42000,"all_runtimes":[41000,42000,43000]} +{"name":"compress time/clickbench","commit_id":"deadbeef","unit":"ns","value":99} +{"name":"vortex size/clickbench","commit_id":"deadbeef","unit":"bytes","value":1024} +{"name":"random-access/taxi/take/parquet-tokio-local-disk","commit_id":"deadbeef","unit":"ns","value":777,"all_runtimes":[700,777,800]} +"#; + +/// Build a local-source fixture directory. Caller supplies the contents +/// of `commits.json`, `data.json.gz`, and any number of +/// `file-sizes-*.json.gz` files (name → contents). +fn build_fixture(commits: &str, data: &str, file_sizes: &[(&str, &str)]) -> TempDir { + let dir = TempDir::new().expect("tempdir"); + write_text(&dir.path().join("commits.json"), commits); + write_gz(&dir.path().join("data.json.gz"), data); + for (name, body) in file_sizes { + write_gz(&dir.path().join(name), body); + } + dir +} + +fn write_text(path: &Path, body: &str) { + let mut f = File::create(path).unwrap(); + f.write_all(body.as_bytes()).unwrap(); +} + +fn write_gz(path: &Path, body: &str) { + let f = File::create(path).unwrap(); + let mut gz = GzEncoder::new(f, Compression::default()); + gz.write_all(body.as_bytes()).unwrap(); + gz.finish().unwrap(); +} + +#[test] +fn migrate_inline_fixture_populates_each_table() { + let src_dir = build_fixture(COMMITS_JSONL, DATA_JSONL, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.records_read, 4, "summary={summary}"); + assert_eq!(summary.uncategorized, 0, "summary={summary}"); + assert_eq!(summary.commits_inserted, 1); + assert_eq!(summary.query_inserted, 1); + assert_eq!(summary.compression_time_inserted, 1); + assert_eq!(summary.compression_size_inserted, 1); + assert_eq!(summary.random_access_inserted, 1); + + let conn = Connection::open(&target).unwrap(); + let count = |table: &str| -> i64 { + conn.query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |r| r.get(0)) + .unwrap() + }; + assert_eq!(count("commits"), 1); + assert_eq!(count("query_measurements"), 1); + assert_eq!(count("compression_times"), 1); + assert_eq!(count("compression_sizes"), 1); + assert_eq!(count("random_access_times"), 1); + + // Spot-check the v3 column values for each kind. + let (engine, format, query_idx, value_ns): (String, String, i32, i64) = conn + .query_row( + "SELECT engine, format, query_idx, value_ns FROM query_measurements", + [], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)), + ) + .unwrap(); + assert_eq!(engine, "datafusion"); + assert_eq!(format, "parquet"); + assert_eq!(query_idx, 7); + assert_eq!(value_ns, 42000); + + let (dataset, format, op): (String, String, String) = conn + .query_row( + "SELECT dataset, format, op FROM compression_times", + [], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + assert_eq!(dataset, "clickbench"); + assert_eq!(format, "vortex-file-compressed"); + assert_eq!(op, "encode"); + + let (dataset, format, value_bytes): (String, String, i64) = conn + .query_row( + "SELECT dataset, format, value_bytes FROM compression_sizes", + [], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + assert_eq!(dataset, "clickbench"); + assert_eq!(format, "vortex-file-compressed"); + assert_eq!(value_bytes, 1024); + + let (dataset, format): (String, String) = conn + .query_row("SELECT dataset, format FROM random_access_times", [], |r| { + Ok((r.get(0)?, r.get(1)?)) + }) + .unwrap(); + assert_eq!(dataset, "taxi/take"); + assert_eq!(format, "parquet"); +} + +#[test] +fn dedup_collision_keeps_one_row() { + // Two data.json.gz lines whose query-measurement dim columns are + // identical (same commit / dataset / engine / format / query_idx, + // and `storage` collapses to "nvme" since `storage` is unset). + // Different `value`s. The accumulator's HashSet + // should drop the second one and bump `summary.deduped`. + const DATA: &str = r#"{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":111} +{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":222} +"#; + + let src_dir = build_fixture(COMMITS_JSONL, DATA, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.records_read, 2, "summary={summary}"); + assert_eq!(summary.query_inserted, 1, "summary={summary}"); + assert_eq!(summary.deduped, 1, "summary={summary}"); + + let conn = Connection::open(&target).unwrap(); + let n: i64 = conn + .query_row("SELECT COUNT(*) FROM query_measurements", [], |r| r.get(0)) + .unwrap(); + assert_eq!(n, 1); +} + +#[test] +fn dedup_with_conflicting_value_ns_is_counted() { + // Same dim columns, different `value`s. Dedup keeps the first + // and bumps `deduped_with_conflict` because the dropped row's + // value_ns differed from the kept row's. This is the signal we + // care about when watching for silent value-corruption across + // duplicated v2 emissions. + const DATA: &str = r#"{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":111} +{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":222} +"#; + + let src_dir = build_fixture(COMMITS_JSONL, DATA, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.deduped, 1, "summary={summary}"); + assert_eq!(summary.deduped_with_conflict, 1, "summary={summary}"); +} + +#[test] +fn dedup_with_matching_value_ns_does_not_count_conflict() { + // Same dim columns AND identical `value`s. Dedup still drops the + // duplicate, but `deduped_with_conflict` stays 0. + const DATA: &str = r#"{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":111} +{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":111} +"#; + + let src_dir = build_fixture(COMMITS_JSONL, DATA, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.deduped, 1, "summary={summary}"); + assert_eq!(summary.deduped_with_conflict, 0, "summary={summary}"); +} + +#[test] +fn compression_size_data_and_file_sizes_merge() { + // A `vortex size/tpch` record from data.json.gz and a + // file-sizes-tpch-nvme.json.gz row covering the same (commit, + // dataset, format, SF) tuple should produce the *same* + // measurement_id so the in-memory accumulator merges them into + // one row instead of two. + // + // Both sources use scale_factor "1.0", which both code paths + // filter out → dataset_variant: None on both sides → matching mid. + const DATA: &str = r#"{"name":"vortex size/tpch","commit_id":"deadbeef","unit":"bytes","value":200,"dataset":{"tpch":{"scale_factor":"1.0"}}} +"#; + const FILE_SIZES: &str = r#"{"commit_id":"deadbeef","benchmark":"tpch","scale_factor":"1.0","format":"vortex-file-compressed","file":"part-0.vortex","size_bytes":100} +"#; + + let src_dir = build_fixture( + COMMITS_JSONL, + DATA, + &[("file-sizes-tpch-nvme.json.gz", FILE_SIZES)], + ); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.compression_size_inserted, 1, "summary={summary}"); + + let conn = Connection::open(&target).unwrap(); + let (n, value_bytes): (i64, i64) = conn + .query_row( + "SELECT COUNT(*), SUM(value_bytes) FROM compression_sizes", + [], + |r| Ok((r.get(0)?, r.get(1)?)), + ) + .unwrap(); + assert_eq!(n, 1); + // data.json.gz seeds value_bytes=200, file-sizes adds 100. + assert_eq!(value_bytes, 300); +} + +#[test] +fn empty_author_email_stored_as_null() { + // v2 sometimes wrote `""` for blank author/email/message. The + // migrator normalizes those to None so DuckDB stores SQL NULL, + // letting the UI distinguish "missing metadata" from "empty + // string". Here author.email is "" — verify the column is NULL, + // not the empty string. + const COMMITS: &str = r#"{"id":"deadbeef","timestamp":"2026-04-25T00:00:00Z","message":"fixture","author":{"name":"A","email":""},"committer":{"name":"C","email":"c@example.com"},"tree_id":"abcd0001","url":"https://example.com/commit/deadbeef"} +"#; + + let src_dir = build_fixture(COMMITS, "", &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + let conn = Connection::open(&target).unwrap(); + let is_null: bool = conn + .query_row( + "SELECT author_email IS NULL FROM commits WHERE commit_sha = 'deadbeef'", + [], + |r| r.get(0), + ) + .unwrap(); + assert!(is_null, "empty author.email must store as SQL NULL"); + + // Non-empty fields still round-trip as strings. + let committer_email: String = conn + .query_row( + "SELECT committer_email FROM commits WHERE commit_sha = 'deadbeef'", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(committer_email, "c@example.com"); +} + +#[test] +fn open_target_db_removes_orphan_wal() { + // A `.wal` left from a previous crash with no main file present + // must still be removed so the next run starts from a known-empty + // state. Otherwise DuckDB can replay stale WAL into the fresh DB + // and corrupt subsequent inserts. + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + let wal = target_dir.path().join("v3.duckdb.wal"); + std::fs::write(&wal, b"orphan-wal-bytes").unwrap(); + assert!(wal.exists(), "precondition: orphan wal staged"); + assert!(!target.exists(), "precondition: no main db file"); + + let _conn = migrate::open_target_db(&target).unwrap(); + + // The migrator opens the DB after sweeping the WAL; DuckDB may + // recreate its own wal under load, but our pre-existing orphan + // bytes must not survive the sweep. We assert by content: either + // the path is missing, or its contents differ from the orphan we + // staged. + if wal.exists() { + let now = std::fs::read(&wal).unwrap(); + assert_ne!( + now, b"orphan-wal-bytes", + "orphan wal bytes must not survive open_target_db" + ); + } +} + +#[test] +fn file_sizes_unknown_id_falls_back_to_unknown_prefix() { + // A file-sizes-*.json.gz whose id isn't in + // `KNOWN_FILE_SIZES_SUITES`, with an empty `benchmark` field, used + // to surface as a bare id like `mystery-suite` and render as a + // dataset name. The migrator now prefixes those with `unknown:` + // so the UI can flag them. + const FILE_SIZES: &str = r#"{"commit_id":"deadbeef","benchmark":"","scale_factor":"","format":"vortex-file-compressed","file":"part-0.vortex","size_bytes":1000} +"#; + + let src_dir = build_fixture( + COMMITS_JSONL, + "", + &[("file-sizes-mystery-suite.json.gz", FILE_SIZES)], + ); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + let conn = Connection::open(&target).unwrap(); + let dataset: String = conn + .query_row("SELECT dataset FROM compression_sizes", [], |r| r.get(0)) + .unwrap(); + assert_eq!(dataset, "unknown:mystery-suite"); +} + +#[test] +fn file_sizes_known_id_uses_id_directly() { + // For a KNOWN_FILE_SIZES_SUITES id, the fallback path keeps the + // raw id (no `unknown:` prefix). `clickbench-nvme` is on the list. + const FILE_SIZES: &str = r#"{"commit_id":"deadbeef","benchmark":"","scale_factor":"","format":"vortex-file-compressed","file":"part-0.vortex","size_bytes":1000} +"#; + + let src_dir = build_fixture( + COMMITS_JSONL, + "", + &[("file-sizes-clickbench-nvme.json.gz", FILE_SIZES)], + ); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + let conn = Connection::open(&target).unwrap(); + let dataset: String = conn + .query_row("SELECT dataset FROM compression_sizes", [], |r| r.get(0)) + .unwrap(); + assert_eq!(dataset, "clickbench-nvme"); +} + +#[test] +fn compression_size_data_and_file_sizes_merge_with_canonical_sf() { + // Same logical SF written as `"10"` on the data.json.gz side and + // `"10.0"` on the file-sizes side. Both paths must canonicalize + // to `"10"` so the rows share a `measurement_id` and merge into + // one compression_sizes row. + const DATA: &str = r#"{"name":"vortex size/tpch","commit_id":"deadbeef","unit":"bytes","value":200,"dataset":{"tpch":{"scale_factor":"10"}}} +"#; + const FILE_SIZES: &str = r#"{"commit_id":"deadbeef","benchmark":"tpch","scale_factor":"10.0","format":"vortex-file-compressed","file":"part-0.vortex","size_bytes":100} +"#; + + let src_dir = build_fixture( + COMMITS_JSONL, + DATA, + &[("file-sizes-tpch-nvme-10.json.gz", FILE_SIZES)], + ); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.compression_size_inserted, 1, "summary={summary}"); + let conn = Connection::open(&target).unwrap(); + let (n, value_bytes, dataset_variant): (i64, i64, String) = conn + .query_row( + "SELECT COUNT(*), SUM(value_bytes), MAX(dataset_variant) FROM compression_sizes", + [], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + assert_eq!(n, 1); + // data.json.gz seeds 200, file-sizes adds 100. + assert_eq!(value_bytes, 300); + assert_eq!(dataset_variant, "10"); +} + +#[test] +fn summary_counts_match_actual_rows_on_success() { + // Sister test to migrate::tests::flush_all_does_not_overcount_on_failure. + // On a fully successful run, the post-flush summary counters must + // equal `SELECT COUNT(*)` from each fact table. This is the + // invariant the flush-after-count refactor preserves. + let src_dir = build_fixture(COMMITS_JSONL, DATA_JSONL, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + let conn = Connection::open(&target).unwrap(); + let actual = |table: &str| -> u64 { + let n: i64 = conn + .query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |r| r.get(0)) + .unwrap(); + n as u64 + }; + assert_eq!(summary.query_inserted, actual("query_measurements")); + assert_eq!( + summary.compression_time_inserted, + actual("compression_times") + ); + assert_eq!( + summary.compression_size_inserted, + actual("compression_sizes") + ); + assert_eq!( + summary.random_access_inserted, + actual("random_access_times") + ); +} + +#[test] +fn file_sizes_sum_into_one_row() { + // Two file-sizes rows sharing (commit, benchmark, format, + // scale_factor) and value_bytes 100 + 200 must collapse to a + // single compression_sizes row with 300. + const FILE_SIZES: &str = r#"{"commit_id":"deadbeef","benchmark":"clickbench","scale_factor":"1.0","format":"vortex-file-compressed","file":"part-0.vortex","size_bytes":100} +{"commit_id":"deadbeef","benchmark":"clickbench","scale_factor":"1.0","format":"vortex-file-compressed","file":"part-1.vortex","size_bytes":200} +"#; + + let src_dir = build_fixture( + COMMITS_JSONL, + "", + &[("file-sizes-clickbench.json.gz", FILE_SIZES)], + ); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.file_size_inserted, 2, "summary={summary}"); + assert_eq!(summary.compression_size_inserted, 1, "summary={summary}"); + + let conn = Connection::open(&target).unwrap(); + let n: i64 = conn + .query_row("SELECT COUNT(*) FROM compression_sizes", [], |r| r.get(0)) + .unwrap(); + assert_eq!(n, 1); + let value_bytes: i64 = conn + .query_row("SELECT value_bytes FROM compression_sizes", [], |r| { + r.get(0) + }) + .unwrap(); + assert_eq!(value_bytes, 300); +} diff --git a/benchmarks-website/planning/00-overview.md b/benchmarks-website/planning/00-overview.md new file mode 100644 index 00000000000..c6c3e05c57f --- /dev/null +++ b/benchmarks-website/planning/00-overview.md @@ -0,0 +1,104 @@ + + +# 00 - Overview + +## What we're building + +A replacement for the current `bench.vortex.dev` site. The new +stack is a **single Rust binary** (axum + maud + duckdb-rs) that +owns a **DuckDB database** on local disk and serves the website +plus an `/api/ingest` route. CI eventually POSTs new benchmark +results there. There is no separate ingester service, no S3 +coordination layer for writes, no client-side WASM. + +The server crate is `vortex-bench-server` at +`benchmarks-website/server/`. + +## Phasing + +We build this in two phases. **Plan only the first.** + +### Alpha (this plan) + +The smallest end-to-end loop that proves the design: + +1. **Schema** locked enough to ingest one benchmark result. +2. **Server**: open DuckDB, accept a bearer-token-authenticated POST, + serve a couple of read routes. +3. **Emitter**: `vortex-bench --gh-json-v3` + a tiny POST script. +4. **Web UI**: one landing page + one chart page rendered against a + fixture DB. + +That's it. No production deploy, no historical data import, no CI +workflow integration, no admin tooling, no schema migration +framework, no auth beyond the shared bearer token. All of those +live in [`deferred.md`](./deferred.md). + +The alpha runs on a developer machine. v2 keeps running in +production unchanged. There is no cutover in alpha. + +### Phase 2 and beyond + +Once the alpha loop is green, we layer in production deploy, +historical migration, CI dual-write, and the rest of the v2-parity +work. Stubs are in [`deferred.md`](./deferred.md). + +## Architecture (alpha) + +One process, one DB file. The server is the API and the website. +The emitter writes JSONL of bare records; a small POST script +wraps and uploads them. CI isn't wired up yet; ingest happens +manually during alpha. + +## Components + +Three components for alpha. Each is one workstream, one branch, one +PR. + +| Component | Plan | Owns | +|---|---|---| +| Server | [components/server.md](./components/server.md) | DuckDB open + schema, bearer-auth ingest, read routes, HTML routes mounted from web-ui | +| Emitter | [components/emitter.md](./components/emitter.md) | `vortex-bench --gh-json-v3` + the post-ingest script | +| Web UI | [components/web-ui.md](./components/web-ui.md) | Landing page + chart page, against a fixture DuckDB | + +### Dependencies + +The schema feeds all three components. The contracts feed the +server and the emitter. With both stable, **all three components +can be worked on in parallel**. + +## Goals + +In priority order: + +1. **End-to-end alpha loop works.** Emit → POST → store → render. +2. **Schema is the right shape.** Five fact tables (one per + measurement family) plus a `commits` dim. See + [`01-schema.md`](./01-schema.md). +3. **Each component is small enough that one agent can finish it + in one PR.** No mega-PRs. + +Cutover, parity, and "faster than v2" are explicit non-goals at +alpha; they come back in phase 2. + +## Shared docs + +- [`00-overview.md`](./00-overview.md) (this file) +- [`01-schema.md`](./01-schema.md) - the five fact tables + `commits` +- [`02-contracts.md`](./02-contracts.md) - wire shapes + HTTP error + matrix + auth header +- [`benchmark-mapping.md`](./benchmark-mapping.md) - existing + benchmarks → fact tables +- [`decisions.md`](./decisions.md) - resolved decisions +- [`deferred.md`](./deferred.md) - phase-2 stubs + +## Status of v2 during alpha + +v2 stays in production untouched. Do not edit +`benchmarks-website/server.js`, `benchmarks-website/src/`, or any +other v2 files at `benchmarks-website/` top level. v3 lives in the +sibling subdirectory at `benchmarks-website/server/` +(`vortex-bench-server` crate). diff --git a/benchmarks-website/planning/01-schema.md b/benchmarks-website/planning/01-schema.md new file mode 100644 index 00000000000..dfc6b05ba27 --- /dev/null +++ b/benchmarks-website/planning/01-schema.md @@ -0,0 +1,228 @@ + + +# 01 - DuckDB schema (alpha) + +The persistent data model. **One `commits` dim table plus five fact +tables, one per measurement family.** No lookup tables, no views, no +migration framework; those are deferred (see +[`deferred.md`](./deferred.md)). + +## Design principles + +1. **One fact table per (dim shape, value shape).** A row in any + fact table has every value column populated; NULLs only appear + in genuinely optional dimensions. +2. **No discriminator columns spanning families.** No `metric_kind` + enum forcing five shapes into one row. +3. **No JSON escape hatch.** New benchmark parameters become real + columns. Adding a nullable column is cheap; the readability win + is worth it. +4. **Hashed primary key per table.** Each fact table has a + `measurement_id` that is a deterministic 64-bit hash of + `commit_sha` plus that table's dimensional tuple. Including + `commit_sha` makes every (commit, dim) pair a distinct row - + that's what the chart pages render as a time series. + Server-internal; not on the wire. +5. **`commits` is the only dim table.** Engine, format, dataset, + etc. stay as inline strings; DuckDB's dictionary encoding makes + a lookup table pointless. +6. **Ratios are not stored.** Computed at query time from + `compression_sizes`. + +## Why five fact tables, not one + +The five families have genuinely different shapes: + +| Table | Shape sketch | +|---|---| +| `query_measurements` | dataset + query_idx + engine + format + storage → timing **and** memory | +| `compression_times` | dataset + format + op∈{encode,decode} → timing | +| `compression_sizes` | dataset + format → bytes | +| `random_access_times` | dataset + format → timing (different dataset namespace) | +| `vector_search_runs` | dataset + layout + flavor + threshold → timing + counters | + +Forcing them into one table either bloats every row with columns +that are NULL for ~99% of rows (`layout`, `flavor`, `threshold`, +`matches`, `rows_scanned`, `bytes_scanned`) or splits scan results +across multiple rows that have to be re-joined to render one chart. + +## Group / chart / series fit + +The render-time view used by `/api/groups` and `/api/chart/:slug` +is mechanically derivable per table: + +| Table | Group key | Chart key | Series key | +|---|---|---|---| +| `query_measurements` | `(dataset, dataset_variant, scale_factor, storage)` | `(dataset, query_idx)` | `(engine, format)` | +| `compression_times` | constant `"Compression"` | `(dataset, dataset_variant)` | `(format, op)` | +| `compression_sizes` | constant `"Compression Size"` | `(dataset, dataset_variant)` | `format` | +| `random_access_times` | constant `"Random Access"` | `dataset` | `format` | +| `vector_search_runs` | `(dataset, layout)` | `(dataset, layout, threshold)` | `flavor` | + +The classifier logic in v2's `v2-classifier.js` mostly disappears - +each table already knows what suite it represents. + +## Tables + +DDL is the server's call. Below is the column contract: name, type +family, and whether it's NOT NULL. The server agent picks exact +DuckDB types, indexes, and constraint syntax. + +### `commits` (dim) + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `commit_sha` | string | yes (PK) | 40-hex lowercase | +| `timestamp` | timestamptz | yes | | +| `message` | string | optional | first line only | +| `author_name` | string | optional | | +| `author_email` | string | optional | | +| `committer_name` | string | optional | | +| `committer_email` | string | optional | | +| `tree_sha` | string | yes | | +| `url` | string | yes | | + +Populated from the envelope on every `/api/ingest` call. + +### `query_measurements` + +SQL query suites: TPC-H, TPC-DS, ClickBench, StatPopGen, +PolarSignals, Fineweb, GhArchive, Public-BI. Memory columns are +populated when the run was instrumented for memory; NULL otherwise. +Timing and memory share the row because they're produced together +for the same query execution. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | hash of dim tuple | +| `commit_sha` | string | yes | FK to `commits` | +| `dataset` | string | yes | `tpch`, `tpcds`, `clickbench`, ... | +| `dataset_variant` | string | optional | ClickBench flavor, Public-BI name | +| `scale_factor` | string | optional | TPC SF; n_rows for StatPopGen / PolarSignals | +| `query_idx` | int32 | yes | 1-based | +| `storage` | string | yes | `nvme` or `s3` | +| `engine` | string | yes | `datafusion`, `duckdb`, `vortex`, `arrow` | +| `format` | string | yes | `vortex-file-compressed`, `parquet`, `lance`, ... | +| `value_ns` | int64 | yes | median timing, ns | +| `all_runtimes_ns` | list<int64> | yes | per-iteration timings | +| `peak_physical` | int64 | optional | bytes | +| `peak_virtual` | int64 | optional | bytes | +| `physical_delta` | int64 | optional | bytes | +| `virtual_delta` | int64 | optional | bytes | +| `env_triple` | string | optional | e.g. `x86_64-linux-gnu` | + +### `compression_times` + +Encode/decode timings from `compress-bench`. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | | +| `commit_sha` | string | yes | FK | +| `dataset` | string | yes | | +| `dataset_variant` | string | optional | | +| `format` | string | yes | | +| `op` | string | yes | `encode` or `decode` | +| `value_ns` | int64 | yes | | +| `all_runtimes_ns` | list<int64> | yes | | +| `env_triple` | string | optional | | + +### `compression_sizes` + +On-disk sizes from `compress-bench`. One-shot, no per-iteration data. +Compression ratios in v2 (`vortex:parquet-zstd ratio/...`) are a +SELECT over this table joined to itself; they're not stored. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | | +| `commit_sha` | string | yes | FK | +| `dataset` | string | yes | | +| `dataset_variant` | string | optional | | +| `format` | string | yes | | +| `value_bytes` | int64 | yes | | + +### `random_access_times` + +Take-time timings from `random-access-bench`. Different dataset +namespace from `compression_times` - kept in its own table so +dataset filters never have to disambiguate which suite a row +belongs to. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | | +| `commit_sha` | string | yes | FK | +| `dataset` | string | yes | | +| `format` | string | yes | | +| `value_ns` | int64 | yes | | +| `all_runtimes_ns` | list<int64> | yes | | +| `env_triple` | string | optional | | + +### `vector_search_runs` + +Cosine-similarity scans from `vector-search-bench`. The only family +that emits a timing **plus side counters** for the same scan; +keeping them in one row avoids a 1:N split that has to be re-joined +on read. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | | +| `commit_sha` | string | yes | FK | +| `dataset` | string | yes | e.g. `cohere-large-10m` | +| `layout` | string | yes | `TrainLayout`, e.g. `partitioned` | +| `flavor` | string | yes | `VectorFlavor`, e.g. `vortex-turboquant` | +| `threshold` | double | yes | cosine threshold | +| `value_ns` | int64 | yes | per-scan wall time | +| `all_runtimes_ns` | list<int64> | yes | | +| `matches` | int64 | yes | | +| `rows_scanned` | int64 | yes | | +| `bytes_scanned` | int64 | yes | | +| `iterations` | int32 | yes | not part of the dim hash | +| `env_triple` | string | optional | | + +## `measurement_id` hash + +Per-table xxhash64 over `commit_sha` plus that table's dimensional +tuple. Including `commit_sha` makes every (commit, dim) pair a +distinct row, which is what the chart pages render as a time +series. The hash is **server-internal** - the wire never carries +it. The server's INSERT path computes it before each +`INSERT ... ON CONFLICT DO UPDATE`, which gives idempotent upsert +on re-emission of the same (commit, dim) pair. Encoding details +(input order, NULL handling, byte layout) are the server's call, +since the value never crosses a process boundary. + +When the historical migrator lands (deferred), it reuses the +server's hash function via a shared crate. + +## Storage values + +`storage` is `'nvme'` or `'s3'`. Legacy `gcs` is dropped. Only +`query_measurements` carries `storage` - the other families don't +fan out by storage backend. + +## Schema changes during alpha + +There is no migration framework. If you change the schema: + +1. Update this doc. +2. Update the server's DDL. +3. Delete any local `bench.duckdb` and re-run. + +A real forward-only migration framework lands post-alpha. See +[`deferred.md`](./deferred.md). + +## What's intentionally NOT here (deferred) + +- `schema_meta` and migration framework. +- `known_engines` / `known_formats` / `known_datasets` lookup + tables and seed SQL. +- Views (`v_compression_ratios`, `v_latest_per_group`, etc.). +- Pre-downsampled aliases. +- A `microbench_runs` table - reserved as the next family to add + when microbench results start landing. diff --git a/benchmarks-website/planning/02-contracts.md b/benchmarks-website/planning/02-contracts.md new file mode 100644 index 00000000000..8f32072123d --- /dev/null +++ b/benchmarks-website/planning/02-contracts.md @@ -0,0 +1,227 @@ + + +# 02 - Wire contracts (alpha) + +The cross-component glue between the emitter, the POST script, and +the server. Wire-format only - implementations are local to each +component. + +If two components disagree about a shape, **this file is right** +and both update. + +## Records are discriminated by `kind` + +Each record on the wire carries a `kind` field that picks one of +the [five fact tables](./01-schema.md#tables). The emitter never +decides "what column" - it decides "what kind", and the rest of the +row is that kind's flat field set. + +| `kind` | Destination table | +|---|---| +| `query_measurement` | `query_measurements` | +| `compression_time` | `compression_times` | +| `compression_size` | `compression_sizes` | +| `random_access_time` | `random_access_times` | +| `vector_search_run` | `vector_search_runs` | + +**Unknown `kind` values cause a 400.** Unknown fields within a known +`kind` also cause a 400. Version skew should fail loudly. + +## Per-kind record shapes + +All shared metadata first; per-kind fields after. + +### `query_measurement` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"query_measurement"` | yes | discriminator | +| `commit_sha` | string | yes | 40-hex lowercase | +| `dataset` | string | yes | `tpch`, `tpcds`, `clickbench`, ... | +| `dataset_variant` | string | optional | ClickBench flavor, Public-BI name | +| `scale_factor` | string | optional | TPC SF; n_rows for StatPopGen / PolarSignals | +| `query_idx` | integer | yes | 1-based | +| `storage` | enum string | yes | `nvme` or `s3` | +| `engine` | string | yes | `datafusion`, `duckdb`, `vortex`, `arrow` | +| `format` | string | yes | `vortex-file-compressed`, `parquet`, `lance`, ... | +| `value_ns` | integer | yes | median timing, ns | +| `all_runtimes_ns` | array<integer> | yes | per-iteration timings (may be empty) | +| `peak_physical` | integer | optional | bytes | +| `peak_virtual` | integer | optional | bytes | +| `physical_delta` | integer | optional | bytes | +| `virtual_delta` | integer | optional | bytes | +| `env_triple` | string | optional | e.g. `x86_64-linux-gnu` | + +The four memory fields are populated together (all four or none). + +### `compression_time` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"compression_time"` | yes | | +| `commit_sha` | string | yes | | +| `dataset` | string | yes | | +| `dataset_variant` | string | optional | | +| `format` | string | yes | | +| `op` | enum string | yes | `encode` or `decode` | +| `value_ns` | integer | yes | | +| `all_runtimes_ns` | array<integer> | yes | | +| `env_triple` | string | optional | | + +### `compression_size` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"compression_size"` | yes | | +| `commit_sha` | string | yes | | +| `dataset` | string | yes | | +| `dataset_variant` | string | optional | | +| `format` | string | yes | | +| `value_bytes` | integer | yes | | + +### `random_access_time` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"random_access_time"` | yes | | +| `commit_sha` | string | yes | | +| `dataset` | string | yes | random-access dataset name (e.g. `chimp`, `taxi`) | +| `format` | string | yes | | +| `value_ns` | integer | yes | | +| `all_runtimes_ns` | array<integer> | yes | | +| `env_triple` | string | optional | | + +### `vector_search_run` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"vector_search_run"` | yes | | +| `commit_sha` | string | yes | | +| `dataset` | string | yes | e.g. `cohere-large-10m` | +| `layout` | string | yes | `TrainLayout`, e.g. `partitioned` | +| `flavor` | string | yes | `VectorFlavor`, e.g. `vortex-turboquant` | +| `threshold` | number | yes | cosine threshold | +| `value_ns` | integer | yes | per-scan wall time (median of iterations) | +| `all_runtimes_ns` | array<integer> | yes | | +| `matches` | integer | yes | | +| `rows_scanned` | integer | yes | | +| `bytes_scanned` | integer | yes | | +| `iterations` | integer | yes | | +| `env_triple` | string | optional | | + +## Ingest envelope + +`/api/ingest` accepts one envelope per POST. The envelope wraps a +heterogeneous batch of records (any mix of `kind`s). Required +top-level fields: + +- `run_meta`: object with `benchmark_id` (string), `schema_version` + (integer; `1` at alpha), `started_at` (RFC 3339 timestamp). +- `commit`: object with the columns of the [`commits` + table](./01-schema.md#commits-dim), keyed by their column names + with `commit_sha` renamed to `sha`. The server upserts this row + before applying records. +- `records`: array of per-`kind` records as defined above. + +`vortex-bench --gh-json-v3 ` writes JSONL of bare records +only. The envelope (`run_meta` + `commit`) is added by the +post-ingest script before POSTing - this keeps the Rust emitter +dependency-light. + +The post-ingest script is responsible for filling the `commit` +fields. CI has the SHA from `${{ github.sha }}`; the rest comes +from `git show` or equivalent. See +[`components/emitter.md`](./components/emitter.md). + +## HTTP matrix for `POST /api/ingest` + +| Condition | Status | +|---|---| +| Happy path | 200 with `{ "inserted": N, "updated": M }` | +| Malformed JSON | 400 | +| Unknown `kind`, unknown field, or per-record validation failure | 400 with the offending record index | +| Missing/invalid bearer token | 401 | +| Schema version newer than server expects | 409 | +| Other server error | 500 | + +All-or-nothing per POST: a single failed record fails the whole +batch. The reported `inserted` and `updated` counts are aggregated +across all five tables. + +## Authentication header + +```text +Authorization: Bearer +``` + +Compared with constant-time equality on the server. Token comes from +the `INGEST_BEARER_TOKEN` env var. + +## Slug grammar (server ↔ web-ui) + +The web-ui receives slugs from `/api/groups` and feeds them back +into `/api/chart/:slug`. Slugs are **opaque strings** as far as the +web-ui is concerned: it never parses or constructs them itself, +only echoes what the API returned. The server is free to choose any +slug format, change it without breaking the web-ui, or make it +debuggable (e.g. `qm-tpch-q01-nvme-sf1`) - the only contract is +"`/api/chart/:slug` accepts any slug `/api/groups` returned." + +## Read API + +Four JSON routes today. Field shapes are not binding; refine during +implementation. + +### `GET /api/groups` + +A flat list of distinct group keys derivable from the data, with +just enough metadata to link to a chart. The server walks each fact +table to produce the group keys defined in +[`01-schema.md`](./01-schema.md#group--chart--series-fit). Every +chart entry includes a `slug` that round-trips through +`/api/chart/:slug`, and every group has its own `slug` that +round-trips through `/api/group/:slug`. + +### `GET /api/chart/:slug` + +Returns the data for one chart: a `display_name`, a `unit_kind`, an +ordered `commits` list (sha + timestamp + first-line message + url), +and a `series` map keyed by series name where each value is an +array aligned to `commits` (with `null` for missing data points). +Accepts `?n=&y=&mode=&hidden=` to scope the commit window and +configure the rendered view. + +`unit_kind` is a small structured taxonomy that tells the client +*what* the values are. Wire values stay in the kind's base unit; the +client picks a display unit (e.g. `ms` for `time_ns` values around +1e6) so the rendered axis stays readable. Worked example: +`12,000,000,000` ns on the wire → `12 s` on the y-axis. + +| `unit_kind` | Base unit on the wire | Client display picker | +|---------------------|-------------------------|-------------------------------| +| `time_ns` | nanoseconds | `ns | µs | ms | s` by magnitude | +| `bytes` | bytes | `B | KiB | MiB | GiB | TiB` (binary) | +| `ratio` | dimensionless ratio | identity (no suffix) | +| `count` | dimensionless count | identity (no suffix) | +| `throughput_mb_s` | megabytes per second | identity, `MB/s` suffix | + +Adding a variant is a wire-compat change: bump the emitter, the +migrator, and the client unit picker in `chart-init.js` together. + +### `GET /api/group/:slug` + +Returns every chart in a group as a single batch payload, in render +order. Used by the `/group/{slug}` HTML page and (today) by the +landing page hydration path. Same query parameters as +`/api/chart/:slug`. + +### `GET /health` + +Returns `{ status, db_path, schema_version, latest_commit_timestamp, +row_counts }`. Cheap; suitable for load-balancer health checks. + +Per-commit page, range queries, and the rest of the read API are +deferred. See [`deferred.md`](./deferred.md). diff --git a/benchmarks-website/planning/AGENTS.md b/benchmarks-website/planning/AGENTS.md new file mode 100644 index 00000000000..719c47b4c22 --- /dev/null +++ b/benchmarks-website/planning/AGENTS.md @@ -0,0 +1,172 @@ + + +# AGENTS.md - benchmarks-website v3 + +Brief for coding agents working on the v3 rewrite of `bench.vortex.dev`. Keep this file short. +Detail belongs in component plans. + +## Status + +Alpha is shipped. The v3 server, migrator, and inline-charts UI are all merged to +`ct/benchmarks-v3`. The current focus is **production readiness**: secrets, CI ingestion wiring, +smoke-testing on a real host, the DNS flip, and v2 cleanup. See [`README.md`](./README.md) for the +live punch list. + +The v2 site (top-level files in `benchmarks-website/`: `server.js`, `src/`, `package.json`, +`index.html`, `Dockerfile`, `docker-compose.yml`, `ec2-init.txt`, etc.) is still in production on +`bench.vortex.dev` and **stays running unchanged** until the DNS flip. The v3 server lives alongside +it as `vortex-bench-server` at `benchmarks-website/server/`. + +## Architecture in 10 bullets + +- Single Rust binary: `axum` (HTTP) + `maud` (SSR HTML) + embedded `duckdb-rs`. All static assets + (`chart.umd.js`, `chart-init.js`, `style.css`) are `include_bytes!`'d into the binary. No CDN. + A `tower-http` `CompressionLayer` wraps every response (gzip/brotli). +- One DuckDB file on local disk holds five fact tables (compression time, query measurement, vector + search, RAG, random access) plus a `commits` dim table. Schema in + [`01-schema.md`](./01-schema.md). +- One ingest endpoint: `POST /api/ingest`, gated by a static bearer token from the + `INGEST_BEARER_TOKEN` env var. Wire shapes in [`02-contracts.md`](./02-contracts.md). +- Three HTML routes — `/`, `/chart/{slug}`, `/group/{slug}` — and four JSON routes — + `GET /api/groups`, `GET /api/chart/{slug}`, `GET /api/group/{slug}`, `GET /health` — all served + from the same binary. +- `ChartKey` and `GroupKey` enums round-trip through URLs as `.` + slugs. No DB lookup required to decode a URL. +- Charts render inline on the landing page. Each `` is paired with a + `"}"#; + let out = escape_json_for_script(input); + assert!(!out.contains(" String { + format!("{path}?v={STATIC_ASSET_VERSION}") +} + +pub(crate) async fn serve_chart_js() -> impl IntoResponse { + static_response(CHART_JS, "application/javascript; charset=utf-8") +} + +pub(crate) async fn serve_chart_zoom_js() -> impl IntoResponse { + static_response(CHART_ZOOM_JS, "application/javascript; charset=utf-8") +} + +pub(crate) async fn serve_chart_init_js() -> impl IntoResponse { + static_response(CHART_INIT_JS, "application/javascript; charset=utf-8") +} + +pub(crate) async fn serve_style_css() -> impl IntoResponse { + static_response(STYLE_CSS, "text/css; charset=utf-8") +} + +pub(crate) async fn serve_vortex_black_png() -> impl IntoResponse { + static_response(VORTEX_BLACK_PNG, "image/png") +} + +pub(crate) async fn serve_vortex_white_png() -> impl IntoResponse { + static_response(VORTEX_WHITE_PNG, "image/png") +} + +fn static_response(bytes: &'static [u8], content_type: &'static str) -> Response { + ( + [ + (header::CONTENT_TYPE, content_type), + ( + header::CACHE_CONTROL, + "no-cache, max-age=0, must-revalidate", + ), + ], + bytes, + ) + .into_response() +} diff --git a/benchmarks-website/server/src/html/summary.rs b/benchmarks-website/server/src/html/summary.rs new file mode 100644 index 00000000000..331cda64064 --- /dev/null +++ b/benchmarks-website/server/src/html/summary.rs @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Group summary card rendering. +//! +//! Each [`Summary`] variant renders into a small `.benchmark-scores-summary` +//! card that lives above the chart grid. Every variant is rendered the same +//! shape — a list of `.score-item` rows — only the rank label, value, and +//! footer change. + +use maud::Markup; +use maud::html; + +use crate::api::Summary; + +/// Render the summary card for a group, or empty markup if `summary` is +/// `None` or every variant's content list is empty. +pub(super) fn summary_markup(summary: Option<&Summary>) -> Markup { + let Some(summary) = summary else { + return html! {}; + }; + match summary { + Summary::RandomAccess { + title, + rankings, + explanation, + } if !rankings.is_empty() => html! { + section.benchmark-scores-summary aria-label=(title) { + h3.scores-title { (title) } + div.scores-list { + @for (idx, item) in rankings.iter().enumerate() { + div.score-item { + span.score-rank { "#" (idx + 1) } + span.score-series title=(item.name) { (item.name) } + span.score-metrics { + span.score-value { (format_time_ns(item.time)) } + span.score-runtime { (format!("{:.2}x", item.ratio)) } + } + } + } + } + div.scores-explanation { (explanation) } + } + }, + Summary::Compression { + title, + compress_ratio, + decompress_ratio, + dataset_count: _, + explanation, + } if compress_ratio.is_some() || decompress_ratio.is_some() => html! { + section.benchmark-scores-summary aria-label=(title) { + h3.scores-title { (title) } + div.scores-list { + @if let Some(v) = compress_ratio { + div.score-item { + span.score-rank { "⚡" } + span.score-series { "Write Speed (Compression)" } + span.score-metrics { + span.score-value { (format!("{v:.2}x")) } + } + } + } + @if let Some(v) = decompress_ratio { + div.score-item { + span.score-rank { "📤" } + span.score-series { "Scan Speed (Decompression)" } + span.score-metrics { + span.score-value { (format!("{v:.2}x")) } + } + } + } + } + div.scores-explanation { (explanation) } + } + }, + Summary::CompressionSize { + title, + min_ratio, + mean_ratio, + max_ratio, + dataset_count: _, + explanation, + } => html! { + section.benchmark-scores-summary aria-label=(title) { + h3.scores-title { (title) } + div.scores-list { + div.score-item { + span.score-rank { "⬇️" } + span.score-series { "Min Size Ratio" } + span.score-metrics { + span.score-value { (format!("{min_ratio:.2}x")) } + } + } + div.score-item { + span.score-rank { "📊" } + span.score-series { "Mean Size Ratio" } + span.score-metrics { + span.score-value { (format!("{mean_ratio:.2}x")) } + } + } + div.score-item { + span.score-rank { "⬆️" } + span.score-series { "Max Size Ratio" } + span.score-metrics { + span.score-value { (format!("{max_ratio:.2}x")) } + } + } + } + div.scores-explanation { (explanation) } + } + }, + Summary::QueryBenchmark { + title, + rankings, + explanation, + } if !rankings.is_empty() => html! { + section.benchmark-scores-summary aria-label=(title) { + h3.scores-title { (title) } + div.scores-list { + @for (idx, item) in rankings.iter().enumerate() { + div.score-item { + span.score-rank { "#" (idx + 1) } + span.score-series title=(item.name) { (item.name) } + span.score-metrics { + span.score-value { (format!("{:.2}x", item.score)) } + span.score-runtime { (format_time_ns(item.total_runtime)) } + } + } + } + } + div.scores-explanation { (explanation) } + } + }, + _ => html! {}, + } +} + +fn format_time_ns(ns: f64) -> String { + let abs = ns.abs(); + if abs >= 1_000_000_000.0 { + format!("{:.2} s", ns / 1_000_000_000.0) + } else if abs >= 1_000_000.0 { + format!("{:.2} ms", ns / 1_000_000.0) + } else if abs >= 1_000.0 { + format!("{:.2} us", ns / 1_000.0) + } else { + format!("{ns:.0} ns") + } +} diff --git a/benchmarks-website/server/src/html/toolbar.rs b/benchmarks-website/server/src/html/toolbar.rs new file mode 100644 index 00000000000..1075632c89b --- /dev/null +++ b/benchmarks-website/server/src/html/toolbar.rs @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Per-chart toolbar markup: the scope slider + Y-axis switch above each +//! chart, and the range scrollbar strip below it. + +use maud::Markup; +use maud::html; + +/// Render the per-chart toolbar. `idx` namespaces input ids so multiple +/// charts on the same page don't collide on ``. +/// +/// All buttons are `
...
`. + // We need to find the matching `` for the outer wrapper. The + // simplest robust approach is to scan and balance. + let bytes = tail.as_bytes(); + let mut depth = 0usize; + let mut i = 0usize; + while i < bytes.len() { + if bytes[i] == b'<' { + if tail[i..].starts_with("") { + depth -= 1; + if depth == 0 { + return tail[..i + "".len()].to_string(); + } + i += "".len(); + continue; + } + } + i += 1; + } + tail.to_string() +} + +/// Pull the `
` containing chips for one +/// dimension (`"engine"` or `"format"`). +pub fn filter_section(body: &str, dim: &str) -> String { + let bar = filter_bar_section(body); + let needle = format!(r#"data-filter="{dim}""#); + let Some(_) = bar.find(&needle) else { + return String::new(); + }; + // Walk back to the enclosing `
`. + let row_open = r#"
"#; + let row_close = "
"; + bar.split(row_open) + .find(|chunk| chunk.contains(&needle)) + .and_then(|chunk| chunk.split(row_close).next()) + .map(str::to_string) + .unwrap_or_default() +} + +/// Pull a single chip's opening tag for assertions. +pub fn extract_chip(section: &str, value: &str) -> String { + let needle = format!(r#"data-value="{value}""#); + let Some(idx) = section.find(&needle) else { + return String::new(); + }; + let head = §ion[..idx]; + let chip_start = head.rfind("').map(|p| p + 1).unwrap_or(tail.len()); + tail[..chip_end].to_string() +} diff --git a/benchmarks-website/server/tests/group_api.rs b/benchmarks-website/server/tests/group_api.rs new file mode 100644 index 00000000000..e2814d3932f --- /dev/null +++ b/benchmarks-website/server/tests/group_api.rs @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Integration tests for `/group/{slug}` and `/api/group/{slug}` plus the +//! v2-compatible group summary contract on `/api/groups`. + +mod common; + +use anyhow::Context as _; +use anyhow::Result; +use serde_json::Value; + +use self::common::Server; +use self::common::assert_close; +use self::common::group_by_name; +use self::common::insta_settings; +use self::common::pick_group_slug; +use self::common::seed; + +#[tokio::test] +async fn group_page_snapshot() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let slug = pick_group_slug(&server, |s| s.starts_with("TPC-H")).await?; + + let resp = client + .get(server.url(&format!("/group/{slug}"))) + .send() + .await?; + assert_eq!(resp.status(), 200); + let body = resp.text().await?; + assert!( + body.contains(r#"id="chart-data-0""#), + "group page must embed at least one chart payload inline" + ); + assert!( + body.contains(r#"class="toolbar toolbar--card""#), + "per-chart toolbar must be rendered on group page" + ); + insta_settings().bind(|| { + insta::assert_snapshot!("group_page_query", body); + }); + Ok(()) +} + +#[tokio::test] +async fn group_api_returns_charts() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let slug = pick_group_slug(&server, |s| s.starts_with("TPC-H")).await?; + + let client = reqwest::Client::new(); + let resp = client + .get(server.url(&format!("/api/group/{slug}"))) + .send() + .await?; + assert_eq!(resp.status(), 200); + let body: Value = resp.json().await?; + let charts = body["charts"].as_array().context("charts is array")?; + assert!(!charts.is_empty(), "group must have at least one chart"); + let first = &charts[0]; + assert!(first["slug"].as_str().is_some(), "chart slug present"); + assert!(first["name"].as_str().is_some(), "chart name present"); + assert!( + first["commits"].as_array().is_some(), + "embedded chart commits" + ); + assert!( + first["series"].as_object().is_some(), + "embedded chart series" + ); + assert_eq!( + body["summary"]["type"].as_str(), + Some("queryBenchmark"), + "group API should include the server-computed summary" + ); + Ok(()) +} + +#[tokio::test] +async fn group_summaries_match_v2_contract() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let groups: Value = client + .get(server.url("/api/groups")) + .send() + .await? + .json() + .await?; + + let random_access = &group_by_name(&groups, "Random Access")?["summary"]; + assert_eq!(random_access["type"].as_str(), Some("randomAccess")); + let rankings = random_access["rankings"] + .as_array() + .context("random access rankings")?; + assert_eq!(rankings[0]["name"].as_str(), Some("vortex-file-compressed")); + assert_eq!(rankings[1]["name"].as_str(), Some("parquet")); + assert_close(rankings[1]["ratio"].as_f64().context("random ratio")?, 2.0); + + let compression = &group_by_name(&groups, "Compression")?["summary"]; + assert_eq!(compression["type"].as_str(), Some("compression")); + assert_close( + compression["compressRatio"] + .as_f64() + .context("compressRatio")?, + 2.0, + ); + assert_close( + compression["decompressRatio"] + .as_f64() + .context("decompressRatio")?, + 2.0, + ); + assert_eq!(compression["datasetCount"].as_u64(), Some(1)); + + let compression_size = &group_by_name(&groups, "Compression Size")?["summary"]; + assert_eq!(compression_size["type"].as_str(), Some("compressionSize")); + assert_close( + compression_size["meanRatio"] + .as_f64() + .context("meanRatio")?, + 0.5, + ); + assert_eq!(compression_size["datasetCount"].as_u64(), Some(1)); + + let query = &group_by_name(&groups, "TPC-H (NVMe) (SF=1)")?["summary"]; + assert_eq!(query["type"].as_str(), Some("queryBenchmark")); + let rankings = query["rankings"].as_array().context("query rankings")?; + assert_eq!( + rankings[0]["name"].as_str(), + Some("datafusion:vortex-file-compressed"), + "query summary should include v2's missing-series penalty" + ); + assert_eq!(rankings[1]["name"].as_str(), Some("duckdb:parquet")); + let first_score = rankings[0]["score"].as_f64().context("first score")?; + let second_score = rankings[1]["score"].as_f64().context("second score")?; + assert!( + first_score < second_score, + "lower query score should rank first" + ); + + Ok(()) +} diff --git a/benchmarks-website/server/tests/ingest.rs b/benchmarks-website/server/tests/ingest.rs new file mode 100644 index 00000000000..a2cf46f8632 --- /dev/null +++ b/benchmarks-website/server/tests/ingest.rs @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Integration tests covering the acceptance criteria from +//! `benchmarks-website/planning/components/server.md`. + +use std::net::SocketAddr; + +use anyhow::Context as _; +use anyhow::Result; +use serde_json::Value; +use serde_json::json; +use tempfile::TempDir; +use tokio::net::TcpListener; +use tokio::task::JoinHandle; +use vortex_bench_server::app::AppState; +use vortex_bench_server::app::router; + +const TOKEN: &str = "test-bearer-token"; + +struct Server { + addr: SocketAddr, + _tmp: TempDir, + handle: JoinHandle<()>, +} + +impl Server { + async fn start() -> Result { + let tmp = TempDir::new()?; + let db_path = tmp.path().join("bench.duckdb"); + let state = AppState::open(&db_path, TOKEN.to_string())?; + let app = router(state); + + let listener = TcpListener::bind("127.0.0.1:0").await?; + let addr = listener.local_addr()?; + let handle = tokio::spawn(async move { + axum::serve(listener, app).await.unwrap(); + }); + Ok(Self { + addr, + _tmp: tmp, + handle, + }) + } + + fn url(&self, path: &str) -> String { + format!("http://{}{}", self.addr, path) + } +} + +impl Drop for Server { + fn drop(&mut self) { + self.handle.abort(); + } +} + +fn fixture_envelope() -> Value { + let raw = include_str!("../fixtures/envelope.json"); + serde_json::from_str(raw).expect("fixture envelope is valid JSON") +} + +#[tokio::test] +async fn happy_path_then_idempotent_reingest() -> Result<()> { + let server = Server::start().await?; + let client = reqwest::Client::new(); + let envelope = fixture_envelope(); + + let resp = client + .post(server.url("/api/ingest")) + .bearer_auth(TOKEN) + .json(&envelope) + .send() + .await?; + assert_eq!(resp.status(), 200, "first ingest should be 200"); + let body: Value = resp.json().await?; + assert_eq!(body["inserted"].as_u64(), Some(5)); + assert_eq!(body["updated"].as_u64(), Some(0)); + + let resp = client + .post(server.url("/api/ingest")) + .bearer_auth(TOKEN) + .json(&envelope) + .send() + .await?; + assert_eq!(resp.status(), 200, "second ingest should be 200"); + let body: Value = resp.json().await?; + assert_eq!(body["inserted"].as_u64(), Some(0), "no new rows on re-emit"); + assert!( + body["updated"].as_u64().context("updated is u64")? > 0, + "re-emit must report at least one updated row" + ); + Ok(()) +} + +#[tokio::test] +async fn missing_bearer_is_unauthorized() -> Result<()> { + let server = Server::start().await?; + let client = reqwest::Client::new(); + let envelope = fixture_envelope(); + + let resp = client + .post(server.url("/api/ingest")) + .json(&envelope) + .send() + .await?; + assert_eq!(resp.status(), 401); + Ok(()) +} + +#[tokio::test] +async fn wrong_bearer_is_unauthorized() -> Result<()> { + let server = Server::start().await?; + let client = reqwest::Client::new(); + let envelope = fixture_envelope(); + + let resp = client + .post(server.url("/api/ingest")) + .bearer_auth("not-the-real-token") + .json(&envelope) + .send() + .await?; + assert_eq!(resp.status(), 401); + Ok(()) +} + +#[tokio::test] +async fn unknown_kind_is_400() -> Result<()> { + let server = Server::start().await?; + let client = reqwest::Client::new(); + + let envelope = json!({ + "run_meta": { + "benchmark_id": "fixture", + "schema_version": 1, + "started_at": "2026-04-25T00:00:00Z" + }, + "commit": { + "sha": "0123456789abcdef0123456789abcdef01234567", + "timestamp": "2026-04-25T00:00:00Z", + "message": "x", "author_name": "x", "author_email": "x@x", + "committer_name": "x", "committer_email": "x@x", + "tree_sha": "fedcba9876543210fedcba9876543210fedcba98", + "url": "https://example.com" + }, + "records": [ + { "kind": "made_up_kind", "commit_sha": "0123456789abcdef0123456789abcdef01234567" } + ] + }); + let resp = client + .post(server.url("/api/ingest")) + .bearer_auth(TOKEN) + .json(&envelope) + .send() + .await?; + assert_eq!(resp.status(), 400); + Ok(()) +} + +#[tokio::test] +async fn unknown_field_is_400() -> Result<()> { + let server = Server::start().await?; + let client = reqwest::Client::new(); + + let mut envelope = fixture_envelope(); + envelope["records"][0]["surprise_field"] = json!("oops"); + let resp = client + .post(server.url("/api/ingest")) + .bearer_auth(TOKEN) + .json(&envelope) + .send() + .await?; + assert_eq!(resp.status(), 400); + Ok(()) +} + +#[tokio::test] +async fn schema_version_too_new_is_409() -> Result<()> { + let server = Server::start().await?; + let client = reqwest::Client::new(); + + let mut envelope = fixture_envelope(); + envelope["run_meta"]["schema_version"] = json!(99); + let resp = client + .post(server.url("/api/ingest")) + .bearer_auth(TOKEN) + .json(&envelope) + .send() + .await?; + assert_eq!(resp.status(), 409); + Ok(()) +} + +#[tokio::test] +async fn invalid_storage_is_400_record_error() -> Result<()> { + let server = Server::start().await?; + let client = reqwest::Client::new(); + + let mut envelope = fixture_envelope(); + envelope["records"][0]["storage"] = json!("gcs"); + let resp = client + .post(server.url("/api/ingest")) + .bearer_auth(TOKEN) + .json(&envelope) + .send() + .await?; + assert_eq!(resp.status(), 400); + let body: Value = resp.json().await?; + assert_eq!(body["record_index"], json!(0)); + Ok(()) +} + +#[tokio::test] +async fn health_reports_after_ingest() -> Result<()> { + let server = Server::start().await?; + let client = reqwest::Client::new(); + + // Pre-ingest: counts are zero. + let resp = client.get(server.url("/health")).send().await?; + assert_eq!(resp.status(), 200); + let body: Value = resp.json().await?; + assert_eq!(body["status"], "ok"); + assert_eq!(body["schema_version"], 1); + assert_eq!(body["row_counts"]["commits"], 0); + assert!(body["latest_commit_timestamp"].is_null()); + + // Ingest, then re-check. + client + .post(server.url("/api/ingest")) + .bearer_auth(TOKEN) + .json(&fixture_envelope()) + .send() + .await?; + + let resp = client.get(server.url("/health")).send().await?; + let body: Value = resp.json().await?; + assert_eq!(body["row_counts"]["commits"], 1); + assert_eq!(body["row_counts"]["query_measurements"], 1); + assert_eq!(body["row_counts"]["compression_times"], 1); + assert_eq!(body["row_counts"]["compression_sizes"], 1); + assert_eq!(body["row_counts"]["random_access_times"], 1); + assert_eq!(body["row_counts"]["vector_search_runs"], 1); + assert!(!body["latest_commit_timestamp"].is_null()); + Ok(()) +} + +#[tokio::test] +async fn read_routes_serve_after_ingest() -> Result<()> { + let server = Server::start().await?; + let client = reqwest::Client::new(); + + client + .post(server.url("/api/ingest")) + .bearer_auth(TOKEN) + .json(&fixture_envelope()) + .send() + .await?; + + let resp = client.get(server.url("/api/groups")).send().await?; + assert_eq!(resp.status(), 200); + let body: Value = resp.json().await?; + let groups = body["groups"].as_array().context("groups is array")?; + assert!( + !groups.is_empty(), + "groups should not be empty after ingest" + ); + + // Pick the first chart slug and round-trip it. + let first_chart = groups + .iter() + .find_map(|g| g["charts"].as_array().and_then(|c| c.first())) + .context("at least one chart")?; + let slug = first_chart["slug"] + .as_str() + .context("slug is a string")? + .to_string(); + + let resp = client + .get(server.url(&format!("/api/chart/{slug}"))) + .send() + .await?; + assert_eq!(resp.status(), 200, "chart {slug} should resolve"); + let body: Value = resp.json().await?; + assert!(body["display_name"].is_string()); + assert!(body["unit_kind"].is_string()); + assert!(body["commits"].is_array()); + assert_eq!( + body["commits"] + .as_array() + .context("commits is array")? + .len(), + 1 + ); + assert!(body["series"].is_object()); + Ok(()) +} + +#[tokio::test] +async fn unknown_slug_is_404() -> Result<()> { + let server = Server::start().await?; + let client = reqwest::Client::new(); + + let resp = client + .get(server.url("/api/chart/qm.aGVsbG8")) + .send() + .await?; + // Either 400 (couldn't decode JSON) or 404 (decoded but no rows). Both are + // acceptable per the contract; we just need it to not be a 500. + assert!( + resp.status() == 400 || resp.status() == 404, + "got {}", + resp.status() + ); + Ok(()) +} diff --git a/benchmarks-website/server/tests/landing.rs b/benchmarks-website/server/tests/landing.rs new file mode 100644 index 00000000000..2f327a0b4e6 --- /dev/null +++ b/benchmarks-website/server/tests/landing.rs @@ -0,0 +1,504 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Integration tests for the landing page (`GET /`). + +mod common; + +use anyhow::Context as _; +use anyhow::Result; +use serde_json::Value; + +use self::common::Server; +use self::common::extract_chart_data; +use self::common::extract_chip; +use self::common::filter_bar_section; +use self::common::filter_section; +use self::common::insta_settings; +use self::common::pick_chart_slug; +use self::common::pick_group_slug; +use self::common::seed; +use self::common::seed_long_history; + +#[tokio::test] +async fn landing_page_snapshot() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let resp = client.get(server.url("/")).send().await?; + assert_eq!(resp.status(), 200); + let content_type = resp + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + assert!( + content_type.starts_with("text/html"), + "expected text/html, got {content_type:?}" + ); + let body = resp.text().await?; + + // Inline canvas + chart-data-0 from the first group (every group is + // collapsed by default, but the first group's payload is inlined for + // fast on-toggle hydration). + assert!( + body.contains("" + ); + assert!( + body.contains(r#"id="chart-data-0""#), + "the first group must inline its chart payload for fast on-toggle hydration" + ); + assert!( + body.contains(r#"data-chart-slug="#), + "every chart card carries data-chart-slug for the lazy-fetch path" + ); + assert!( + !body.contains(r#"id="group-search""#), + "landing page should not render the old group search bar" + ); + assert!( + body.contains(r#"class="sticky-header""#), + "landing page should render the v2-style top navbar" + ); + assert!( + body.contains(r#"data-action="expand-all""#) + && body.contains(r#"data-action="collapse-all""#), + "navbar should expose expand/collapse controls" + ); + assert!( + body.contains(r#"data-role="theme-toggle""#), + "navbar should expose a theme toggle" + ); + assert!( + body.contains(r#"class="btn-icon""#) + || body.contains(r#"class="btn-icon theme-icon theme-icon-light""#), + "navbar controls should render icons" + ); + assert!( + body.contains(r#"Vortex_Black_NoBG.png"#) && body.contains(r#"Vortex_White_NoBG.png"#), + "navbar should render the Vortex logo assets" + ); + assert!( + body.contains("⚡") && body.contains("📤") && body.contains("⬇️") && body.contains("📊"), + "summaries should render the v2 summary icons" + ); + + insta_settings().bind(|| { + insta::assert_snapshot!("landing_page", body); + }); + Ok(()) +} + +/// All group disclosures render closed by default — the user picks which +/// to expand. The first group's chart payloads are still inlined in the +/// HTML (so opening it skips the JS fetch), but the disclosure itself +/// stays collapsed until clicked. +#[tokio::test] +async fn details_all_groups_closed_by_default() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let body = client.get(server.url("/")).send().await?.text().await?; + + let opens: Vec<_> = body + .match_indices(r#"
').map(|p| i + p).unwrap_or(i); + body[i..=tag_end].contains(" open") + }) + .collect(); + assert!(!opens.is_empty(), "landing page must render
"); + for (i, is_open) in opens.iter().enumerate() { + assert!(!is_open, "group #{i} must be closed by default"); + } + // The first group's chart payload should still be inlined — fast + // hydration on toggle without a network round-trip. + assert!( + body.contains(r#"id="chart-data-0""#), + "first group's chart payload should be inlined for fast on-toggle hydration", + ); + Ok(()) +} + +#[tokio::test] +async fn collapsed_groups_still_show_summaries() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let body = client.get(server.url("/")).send().await?.text().await?; + + let mut found_visible_summary = false; + for (group_start, _) in body.match_indices(r#"
') + .map(|p| details_start + p) + .context("details tag closes")?; + let is_open = body[details_start..=details_tag_end].contains(" open"); + if is_open { + continue; + } + + let summary_end = body[details_start..] + .find("
") + .map(|p| details_start + p) + .context("disclosure closes")?; + let chart_grid_start = body[summary_end..] + .find(r#"
"#) + .map(|p| summary_end + p) + .context("details contains chart grid")?; + let visible_region = &body[summary_end..chart_grid_start]; + if visible_region.contains(r#"class="benchmark-scores-summary""#) { + found_visible_summary = true; + break; + } + } + + assert!( + found_visible_summary, + "at least one closed group should render its score summary before the hidden chart grid" + ); + Ok(()) +} + +/// Every `.chart-card` carries a compact `.toolbar.toolbar--card` so the user +/// has per-chart controls. There is no page-level toolbar, no preset scope +/// button row, and no abs/rel mode toggle. +#[tokio::test] +async fn chart_card_carries_per_chart_toolbar() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let body = client.get(server.url("/")).send().await?.text().await?; + + let card_count = body.matches(r#"
0, "landing page must render chart cards"); + assert_eq!( + toolbar_count, card_count, + "every chart-card must contain a toolbar--card ({card_count} cards / {toolbar_count} toolbars)" + ); + assert_eq!( + strip_count, card_count, + "every chart-card must carry a range-strip below the canvas \ + ({card_count} cards / {strip_count} strips)" + ); + assert!( + body.contains(r#"data-role="range-window""#) + && body.contains(r#"data-role="range-handle-left""#) + && body.contains(r#"data-role="range-handle-right""#), + "range-strip must include a draggable window and two resize handles" + ); + assert!( + !body.contains(r#"data-mode="#), + "abs/rel mode buttons should not render" + ); + assert!( + !body.contains(r#"data-scope="#), + "preset scope buttons should not render; use the slider instead" + ); + assert!( + body.contains(r#"data-role="scope-slider""#), + "scope slider should remain available" + ); + assert!( + !body.contains(r#"scope-slider-label"#), + "scope value labels should not add repeated numbers to every card" + ); + + // Same invariant on /chart/{slug}. + let slug = pick_chart_slug(&server, |s| s.starts_with("TPC-H")).await?; + let body = client + .get(server.url(&format!("/chart/{slug}"))) + .send() + .await? + .text() + .await?; + assert!( + body.contains(r#"class="toolbar toolbar--card""#), + "chart page must carry a per-chart toolbar" + ); + assert!(!body.contains(r#"data-mode="#)); + assert!(!body.contains(r#"data-scope="#)); + assert!(body.contains(r#"data-role="scope-slider""#)); + assert!(!body.contains(r#"scope-slider-label"#)); + + // Same invariant on /group/{slug}. + let group_slug = pick_group_slug(&server, |s| s.starts_with("TPC-H")).await?; + let body = client + .get(server.url(&format!("/group/{group_slug}"))) + .send() + .await? + .text() + .await?; + assert!( + body.contains(r#"class="toolbar toolbar--card""#), + "group page must carry per-chart toolbars" + ); + assert!(!body.contains(r#"data-mode="#)); + assert!(!body.contains(r#"data-scope="#)); + assert!(body.contains(r#"data-role="scope-slider""#)); + assert!(!body.contains(r#"scope-slider-label"#)); + Ok(()) +} + +/// Landing-page `
` summaries appear in the canonical v2 order: the +/// fixture seeds Random Access, Compression, Compression Size, TPC-H, and a +/// vector-search group. The first three are in `api::GROUP_ORDER` in the +/// expected positions; TPC-H follows; the unknown vector-search group sorts +/// last (alphabetical fallback after the listed names). +#[tokio::test] +async fn landing_groups_render_in_v2_order() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let body = client.get(server.url("/")).send().await?.text().await?; + + // Extract group names in render order from the `data-group-name=` attrs. + let mut names = Vec::new(); + for window in body.split("data-group-name=\"").skip(1) { + if let Some(end) = window.find('"') { + names.push(window[..end].to_string()); + } + } + let expected = [ + "Random Access", + "Compression", + "Compression Size", + "TPC-H (NVMe) (SF=1)", + "cohere-large-10m / partitioned", + ]; + assert_eq!(names, expected, "v2 ordering"); + Ok(()) +} + +#[tokio::test] +async fn empty_landing_page_renders() -> Result<()> { + let server = Server::start().await?; + let client = reqwest::Client::new(); + + let resp = client.get(server.url("/")).send().await?; + assert_eq!(resp.status(), 200); + let body = resp.text().await?; + assert!(body.contains("No data ingested yet")); + Ok(()) +} + +/// Landing page renders the global filter dropdown inside the sticky +/// header, with chip rows for engine and format sourced from the seeded +/// data — no hard-coding. +#[tokio::test] +async fn landing_page_renders_global_filter_bar() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let body = client.get(server.url("/")).send().await?.text().await?; + + // The dropdown lives inside the sticky header so it stays on-screen + // while the user scrolls. + let header_chunk = body + .split(r#"class="sticky-header""#) + .nth(1) + .and_then(|s| s.split("").next()) + .context("sticky header chunk")?; + assert!( + header_chunk.contains(r#"data-role="global-filter-bar""#), + "filter dropdown must live inside the sticky header" + ); + assert!(header_chunk.contains(r#"data-role="filter-trigger""#)); + assert!(header_chunk.contains(r#"data-role="filter-panel""#)); + assert!(header_chunk.contains(r#"data-filter="engine""#)); + assert!(header_chunk.contains(r#"data-filter="format""#)); + // Engines + formats from the seed fixture must appear as chips. + assert!(body.contains(r#"data-value="datafusion""#)); + assert!(body.contains(r#"data-value="duckdb""#)); + assert!(body.contains(r#"data-value="vortex-file-compressed""#)); + assert!(body.contains(r#"data-value="parquet""#)); + // Both rows have an "all" reset chip. + assert!(body.matches(r#"data-value="*""#).count() >= 2); + // The "all" chip is now a one-shot reset and is never rendered active — + // active chips reflect the visible engine/format set. + assert!( + !body.contains(r#"class="filter-chip filter-chip--all filter-chip--active""#), + "the 'all' chip should never start active" + ); + // No filter applied by default → every specific chip is active. + let engine_section = filter_section(&body, "engine"); + for engine in ["datafusion", "duckdb"] { + assert!( + extract_chip(&engine_section, engine).contains("filter-chip--active"), + "engine chip {engine} should be active when no filter is applied" + ); + } + // No badge on the trigger when nothing is hidden. + assert!( + !body.contains(r#"data-role="filter-badge""#), + "filter badge should be absent when no chips are off" + ); + // Embedded filter state JSON for the client to pick up. + assert!(body.contains(r#"id="bench-filter-state""#)); + + insta_settings().bind(|| { + insta::assert_snapshot!("landing_page_filter_bar", filter_bar_section(&body)); + }); + Ok(()) +} + +/// Landing page honours `?engine=`/`?format=` and reflects them as the +/// active chip set + initial filter-state JSON, so a refresh preserves view. +#[tokio::test] +async fn landing_page_honours_filter_query_params() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let body = client + .get(server.url("/?engine=duckdb&format=vortex-file-compressed")) + .send() + .await? + .text() + .await?; + + assert!( + body.contains(r#"{"engines":["duckdb"],"formats":["vortex-file-compressed"]}"#), + "filter state JSON should reflect query params" + ); + let engine_section = filter_section(&body, "engine"); + assert!( + engine_section.contains(r#"data-value="duckdb""#) + && extract_chip(&engine_section, "duckdb").contains("filter-chip--active"), + "duckdb chip should be active" + ); + assert!( + !extract_chip(&engine_section, "datafusion").contains("filter-chip--active"), + "datafusion chip should NOT be active when engine=duckdb" + ); + assert!( + !extract_chip(&engine_section, "*").contains("filter-chip--active"), + "the 'all' chip is a reset, never active" + ); + // Trigger should show a badge counting the off chips (1 engine + 1 format). + assert!( + body.contains(r#"data-role="filter-badge""#), + "trigger should render a badge when chips are filtered off" + ); + Ok(()) +} + +/// The landing page caps the first group's inline JSON at +/// `LANDING_INLINE_N` (= 100) commits regardless of `?n=`. Power users get +/// the unbounded view via the `/api/chart/{slug}?n=all` refetch +/// `chart-init.js` triggers when they zoom past the inlined window. Sending +/// the full history inline would balloon the cold landing HTML — for the +/// inlined chart with one big history every kilobyte is paid by every +/// cold landing-page hit. +#[tokio::test] +async fn landing_first_group_caps_inline_commits() -> Result<()> { + // 250 commits is comfortably above the 100-commit landing inline cap so + // the cap actually kicks in. `seed_long_history` only seeds the + // Random-Access group; with the canonical group ordering Random Access + // sorts first on the landing page, so its chart-data-0 carries the + // inlined payload. + let server = Server::start().await?; + seed_long_history(&server, 250).await?; + + let client = reqwest::Client::new(); + let body = client.get(server.url("/")).send().await?.text().await?; + + let payload = + extract_chart_data(&body, 0).context("the first group must inline its chart payload")?; + let commits = payload["commits"] + .as_array() + .context("inline commits array")?; + assert!( + commits.len() <= 100, + "landing chart-data-0 must cap inline commits at LANDING_INLINE_N=100, \ + got {}", + commits.len(), + ); + // Sanity check: the cap actually fired on this fixture (≥ 100 commits + // seeded). Without this we'd silently regress to "always small fixture". + assert_eq!( + commits.len(), + 100, + "with 250 seeded commits the inline payload should be exactly the \ + 100-commit cap; got {}", + commits.len(), + ); + + // ?n=all on the URL still parses without panicking and still applies the + // inline cap — the page-level `?n` is intentionally ignored for the + // inline payloads, so a power user with `?n=all` in the bookmark gets + // the same compact landing HTML and relies on chart-init.js to refetch + // a wider window. + let body_all = client + .get(server.url("/?n=all")) + .send() + .await? + .text() + .await?; + let payload_all = extract_chart_data(&body_all, 0).context("inline payload present")?; + assert_eq!( + payload_all["commits"] + .as_array() + .context("inline commits array")? + .len(), + 100, + "?n=all on the landing page must NOT bypass the inline cap" + ); + Ok(()) +} + +/// Sanity smoke test: round-trip every chart slug `/api/groups` returns +/// through `/chart/{slug}` to make sure each slug shape's HTML route is +/// wired up. +#[tokio::test] +async fn chart_page_round_trips_every_slug() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let groups: Value = client + .get(server.url("/api/groups")) + .send() + .await? + .json() + .await?; + let slugs: Vec = groups["groups"] + .as_array() + .context("groups is array")? + .iter() + .flat_map(|g| g["charts"].as_array().cloned().unwrap_or_default()) + .filter_map(|c| c["slug"].as_str().map(str::to_string)) + .collect(); + anyhow::ensure!(!slugs.is_empty(), "expected at least one chart slug"); + + for slug in &slugs { + let resp = client + .get(server.url(&format!("/chart/{slug}"))) + .send() + .await?; + assert_eq!( + resp.status(), + 200, + "chart page for slug {slug} should be 200" + ); + let body = resp.text().await?; + assert!( + body.contains(r#"id="chart-data-0""#), + "missing inline chart data for slug {slug}" + ); + } + Ok(()) +} diff --git a/benchmarks-website/server/tests/permalinks.rs b/benchmarks-website/server/tests/permalinks.rs new file mode 100644 index 00000000000..ec3c3ce9c09 --- /dev/null +++ b/benchmarks-website/server/tests/permalinks.rs @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Integration tests for `/chart/{slug}` and `/group/{slug}` permalink +//! behaviour: full-history payloads, embedded filter state, 404s on +//! unknown slugs. + +mod common; + +use anyhow::Context as _; +use anyhow::Result; + +use self::common::Server; +use self::common::extract_chart_data; +use self::common::pick_chart_slug; +use self::common::pick_group_slug; +use self::common::seed; +use self::common::seed_long_history; + +/// `/chart/{slug}` and `/group/{slug}` permalinks default to the unbounded +/// commit window, and the inlined JSON payload contains the full raw +/// history (no server-side downsampling). Visual downsampling now lives in +/// `chart-init.js` and runs on the *visible* commit range only. +#[tokio::test] +async fn permalink_pages_inline_full_raw_history() -> Result<()> { + let server = Server::start().await?; + seed_long_history(&server, 200).await?; + + let chart_slug = pick_chart_slug(&server, |s| s == "Random Access").await?; + let group_slug = pick_group_slug(&server, |s| s == "Random Access").await?; + let client = reqwest::Client::new(); + + let chart_body = client + .get(server.url(&format!("/chart/{chart_slug}"))) + .send() + .await? + .text() + .await?; + let chart_payload = + extract_chart_data(&chart_body, 0).context("chart inline payload present")?; + assert_eq!( + chart_payload["commits"] + .as_array() + .context("commits is array")? + .len(), + 200, + "/chart permalink should inline the full raw history", + ); + + let group_body = client + .get(server.url(&format!("/group/{group_slug}"))) + .send() + .await? + .text() + .await?; + let group_payload = + extract_chart_data(&group_body, 0).context("group inline payload present")?; + assert_eq!( + group_payload["commits"] + .as_array() + .context("commits is array")? + .len(), + 200, + "/group permalink should inline the full raw history", + ); + + Ok(()) +} + +/// Permalink pages render the same filter dropdown in the navbar (so the +/// user can adjust visibility from any page) and embed the filter-state +/// JSON so chart-init.js applies the filter on hydration. +#[tokio::test] +async fn permalink_pages_embed_filter_state() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let chart_slug = pick_chart_slug(&server, |s| s.starts_with("TPC-H")).await?; + let group_slug = pick_group_slug(&server, |s| s.starts_with("TPC-H")).await?; + + let chart_body = client + .get(server.url(&format!("/chart/{chart_slug}?engine=duckdb&format=parquet"))) + .send() + .await? + .text() + .await?; + assert!( + chart_body.contains(r#"id="bench-filter-state""#), + "chart permalink must embed filter state" + ); + assert!( + chart_body.contains(r#"{"engines":["duckdb"],"formats":["parquet"]}"#), + "chart permalink must echo the query-param filter state" + ); + + let group_body = client + .get(server.url(&format!("/group/{group_slug}?engine=duckdb"))) + .send() + .await? + .text() + .await?; + assert!( + group_body.contains(r#"{"engines":["duckdb"],"formats":[]}"#), + "group permalink must echo the query-param filter state" + ); + Ok(()) +} + +#[tokio::test] +async fn unknown_slug_renders_404() -> Result<()> { + let server = Server::start().await?; + let client = reqwest::Client::new(); + + let resp = client.get(server.url("/chart/qm.aGVsbG8")).send().await?; + assert_eq!(resp.status(), 404); + let body = resp.text().await?; + assert!(body.contains("chart not found")); + Ok(()) +} diff --git a/benchmarks-website/server/tests/snapshots/chart_page_query.snap b/benchmarks-website/server/tests/snapshots/chart_page_query.snap new file mode 100644 index 00000000000..54ab6efd209 --- /dev/null +++ b/benchmarks-website/server/tests/snapshots/chart_page_query.snap @@ -0,0 +1,5 @@ +--- +source: benchmarks-website/server/tests/chart_api.rs +expression: body +--- +tpch sf=1 Q1 [nvme] — bench.vortex.dev

unit: ns · 2 series · 3 commits

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/group_page_query.snap b/benchmarks-website/server/tests/snapshots/group_page_query.snap new file mode 100644 index 00000000000..07aeacfea1f --- /dev/null +++ b/benchmarks-website/server/tests/snapshots/group_page_query.snap @@ -0,0 +1,5 @@ +--- +source: benchmarks-website/server/tests/group_api.rs +expression: body +--- +TPC-H (NVMe) (SF=1) — bench.vortex.dev

2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/landing_page.snap b/benchmarks-website/server/tests/snapshots/landing_page.snap new file mode 100644 index 00000000000..12aafb55b63 --- /dev/null +++ b/benchmarks-website/server/tests/snapshots/landing_page.snap @@ -0,0 +1,5 @@ +--- +source: benchmarks-website/server/tests/landing.rs +expression: body +--- +bench.vortex.dev
Random Access1 chart

Random Access Performance

#1vortex-file-compressed100.50 us1.00x
#2parquet201.00 us2.00x
Random access time | Ratio to fastest (lower is better)

taxi

Show
Y
Compression1 chart

Compression Throughput vs Parquet

Write Speed (Compression)2.00x
📤Scan Speed (Decompression)2.00x
Inverse geomean of Vortex/Parquet ratios (higher is better)

tpch-lineitem

Show
Y
Compression Size1 chart

Compression Size Summary

⬇️Min Size Ratio0.50x
📊Mean Size Ratio0.50x
⬆️Max Size Ratio0.50x
Geomean of Vortex/Parquet size ratios (lower is better)

tpch-lineitem

Show
Y
TPC-H (NVMe) (SF=1)2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
cohere-large-10m / partitioned1 chart

threshold=0.75

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/landing_page_filter_bar.snap b/benchmarks-website/server/tests/snapshots/landing_page_filter_bar.snap new file mode 100644 index 00000000000..1a995f8a01f --- /dev/null +++ b/benchmarks-website/server/tests/snapshots/landing_page_filter_bar.snap @@ -0,0 +1,5 @@ +--- +source: benchmarks-website/server/tests/web_ui.rs +expression: filter_bar_section(&body) +--- +
diff --git a/benchmarks-website/server/tests/static_assets.rs b/benchmarks-website/server/tests/static_assets.rs new file mode 100644 index 00000000000..0205dc685d3 --- /dev/null +++ b/benchmarks-website/server/tests/static_assets.rs @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Integration tests for the bundled `/static/...` asset routes plus the +//! response compression layer. + +mod common; + +use std::io::Read as _; + +use anyhow::Result; +use flate2::read::GzDecoder; + +use self::common::Server; +use self::common::seed; + +#[tokio::test] +async fn static_assets_are_served() -> Result<()> { + let server = Server::start().await?; + let client = reqwest::Client::new(); + + for (path, ct_prefix) in [ + ("/static/chart.umd.js", "application/javascript"), + ( + "/static/chartjs-plugin-zoom.umd.min.js", + "application/javascript", + ), + ("/static/chart-init.js", "application/javascript"), + ("/static/style.css", "text/css"), + ("/Vortex_Black_NoBG.png", "image/png"), + ("/Vortex_White_NoBG.png", "image/png"), + ] { + let resp = client.get(server.url(path)).send().await?; + assert_eq!(resp.status(), 200, "GET {path} should be 200"); + let ct = resp + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .unwrap_or_default() + .to_string(); + assert!( + ct.starts_with(ct_prefix), + "GET {path}: content-type {ct:?} should start with {ct_prefix:?}" + ); + let cache_control = resp + .headers() + .get(reqwest::header::CACHE_CONTROL) + .and_then(|v| v.to_str().ok()) + .unwrap_or_default() + .to_string(); + assert!( + cache_control.contains("no-cache"), + "GET {path}: static assets should revalidate so UI CSS/JS changes are not stale" + ); + let bytes = resp.bytes().await?; + assert!(!bytes.is_empty(), "GET {path}: body must not be empty"); + } + Ok(()) +} + +/// Every response — landing HTML, chart JSON, bundled JS — flows through +/// `tower-http`'s `CompressionLayer` so a client advertising +/// `Accept-Encoding: gzip` gets a gzipped (or brotli) body. The +/// reqwest dev-dependency is built without `gzip`/`brotli` features, so the +/// transport hands us the compressed bytes verbatim and we can both inspect +/// the `content-encoding` response header and decompress the body manually +/// to confirm it matches the uncompressed snapshot. +#[tokio::test] +async fn responses_are_compressed_when_client_accepts_gzip() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + + // 1. Landing HTML. + let plain_body = client.get(server.url("/")).send().await?.text().await?; + let resp = client + .get(server.url("/")) + .header(reqwest::header::ACCEPT_ENCODING, "gzip") + .send() + .await?; + assert_eq!(resp.status(), 200); + let encoding = resp + .headers() + .get(reqwest::header::CONTENT_ENCODING) + .and_then(|v| v.to_str().ok()) + .unwrap_or_default() + .to_string(); + assert_eq!( + encoding, "gzip", + "GET / with Accept-Encoding: gzip should respond with gzip" + ); + let compressed = resp.bytes().await?; + assert!( + compressed.len() < plain_body.len(), + "compressed body ({} B) should be smaller than plain body ({} B)", + compressed.len(), + plain_body.len(), + ); + let mut decoded = String::new(); + GzDecoder::new(&compressed[..]).read_to_string(&mut decoded)?; + assert_eq!( + decoded, plain_body, + "gzipped landing body should decompress to the uncompressed body" + ); + + // 2. Bundled JS — the heaviest static asset; gzip is the whole point. + let plain_js = client + .get(server.url("/static/chart.umd.js")) + .send() + .await? + .bytes() + .await?; + let js_resp = client + .get(server.url("/static/chart.umd.js")) + .header(reqwest::header::ACCEPT_ENCODING, "gzip") + .send() + .await?; + assert_eq!(js_resp.status(), 200); + let js_encoding = js_resp + .headers() + .get(reqwest::header::CONTENT_ENCODING) + .and_then(|v| v.to_str().ok()) + .unwrap_or_default() + .to_string(); + assert_eq!( + js_encoding, "gzip", + "/static/chart.umd.js must compress so the cold load isn't dominated by ~200KB of JS" + ); + let compressed_js = js_resp.bytes().await?; + let mut decoded_js = Vec::new(); + GzDecoder::new(&compressed_js[..]).read_to_end(&mut decoded_js)?; + assert_eq!( + decoded_js, + plain_js.as_ref(), + "decompressed chart.umd.js should match the unencoded body byte-for-byte" + ); + + // 3. Brotli is also offered when the client prefers it. + let br_resp = client + .get(server.url("/")) + .header(reqwest::header::ACCEPT_ENCODING, "br") + .send() + .await?; + assert_eq!(br_resp.status(), 200); + let br_encoding = br_resp + .headers() + .get(reqwest::header::CONTENT_ENCODING) + .and_then(|v| v.to_str().ok()) + .unwrap_or_default() + .to_string(); + assert_eq!( + br_encoding, "br", + "GET / with Accept-Encoding: br should respond with brotli" + ); + + Ok(()) +} diff --git a/benchmarks-website/server/tests/web_ui.rs b/benchmarks-website/server/tests/web_ui.rs new file mode 100644 index 00000000000..38e9a0bf053 --- /dev/null +++ b/benchmarks-website/server/tests/web_ui.rs @@ -0,0 +1,502 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Integration tests for the v3 web UI features that span the API and +//! HTML routes: +//! +//! - **Per-group hover descriptions** (Task A): editorial blurbs port from +//! v2's `BENCHMARK_DESCRIPTIONS` + `getBenchmarkDescription`. Asserted on +//! the landing page and on the `/group/{slug}` permalink. +//! - **Partial-coverage commits** (Task B): a chart's x-axis includes +//! commits that have NO row in the fact table for this chart, so +//! missing measurements render as visible gaps rather than silently +//! bridged lines. + +mod common; + +use anyhow::Context as _; +use anyhow::Result; +use serde_json::Value; +use serde_json::json; + +use self::common::Server; +use self::common::TOKEN; +use self::common::pick_chart_slug; +use self::common::pick_group_slug; +use self::common::seed; + +// ============================================================================= +// Task A — per-group hover descriptions +// ============================================================================= + +/// The landing page renders a small ⓘ icon next to every group title that +/// has a canonical description, with the description surfaced via the +/// `data-tooltip` attribute (CSS-only hover/focus tooltip). The description +/// also appears on `/api/groups`. +#[tokio::test] +async fn landing_page_renders_group_descriptions() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let body = client.get(server.url("/")).send().await?.text().await?; + + // Random Access — verbatim v2 description. + assert!( + body.contains(r#"data-tooltip="Tests performance of selecting arbitrary row indices from a file on NVMe storage""#), + "Random Access description must appear as a hover tooltip on the landing page" + ); + // Compression — verbatim v2 description (the longer wording, not the + // shorter `getBenchmarkDescription` fallback). + assert!( + body.contains(r#"data-tooltip="Measures encoding and decoding throughput (MB/s) for Vortex files and Parquet files (with zstd page compression)""#), + "Compression description must appear as a hover tooltip on the landing page" + ); + // Compression Size — verbatim v2 description. + assert!( + body.contains(r#"data-tooltip="Compares compressed file sizes and compression ratios across different encoding strategies""#), + "Compression Size description must appear as a hover tooltip on the landing page" + ); + // TPC-H NVMe SF=1 — derived description with scale-bytes annotation. + assert!( + body.contains( + r#"data-tooltip="TPC-H benchmark queries on local NVMe storage at SF=1 (~1GB of data)""# + ), + "TPC-H description with scale-bytes annotation must appear on the landing page" + ); + + // The icon itself is keyboard-focusable + role-annotated for a11y. + assert!( + body.contains(r#" Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let slug = pick_group_slug(&server, |s| s == "Random Access").await?; + let body = client + .get(server.url(&format!("/group/{slug}"))) + .send() + .await? + .text() + .await?; + + assert!( + body.contains(r#"data-tooltip="Tests performance of selecting arbitrary row indices from a file on NVMe storage""#), + "group permalink page must render the same description tooltip as the landing page" + ); + assert!( + body.contains(r#" Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let body = client.get(server.url("/")).send().await?.text().await?; + + // Locate the vector-search section and assert no info-icon inside its + // disclosure summary. + let needle = r#"data-group-name="cohere-large-10m / partitioned""#; + let start = body.find(needle).context("vector-search section present")?; + // The `` tag is the disclosure header; we want the slice + // between this section's start and the end of its ``. + let summary_end = body[start..] + .find("") + .map(|p| start + p) + .context("section contains ")?; + let summary = &body[start..summary_end]; + assert!( + !summary.contains("group-info-icon"), + "vector-search group should not render an info-icon (no canonical description), got: {summary}" + ); + Ok(()) +} + +/// `/api/groups` carries the description on every group entry as a `description` +/// field, so external API consumers can render their own UI without having to +/// hard-code v2's description list. +#[tokio::test] +async fn groups_api_carries_description_field() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let groups: Value = client + .get(server.url("/api/groups")) + .send() + .await? + .json() + .await?; + let arr = groups["groups"].as_array().context("groups[] array")?; + let by_name = |n: &str| { + arr.iter() + .find(|g| g["name"].as_str() == Some(n)) + .with_context(|| format!("group {n:?} present")) + }; + assert_eq!( + by_name("Random Access")?["description"].as_str(), + Some("Tests performance of selecting arbitrary row indices from a file on NVMe storage"), + ); + assert_eq!( + by_name("TPC-H (NVMe) (SF=1)")?["description"].as_str(), + Some("TPC-H benchmark queries on local NVMe storage at SF=1 (~1GB of data)"), + ); + // Vector-search group has no canonical description; the `description` + // key should be absent (skip_serializing_if). + let vsg = by_name("cohere-large-10m / partitioned")?; + assert!( + vsg.get("description").is_none(), + "vector-search group should not carry a description field, got: {vsg}" + ); + Ok(()) +} + +// ============================================================================= +// Task B — partial-coverage commits +// ============================================================================= + +/// Build an envelope that records a `random_access_time` measurement only +/// for the listed `(format, value_ns)` pairs. The fixture commits' SHAs are +/// deterministic so tests can assert on them. +fn ra_envelope(sha: &str, ts: &str, msg: &str, rows: &[(&str, i64)]) -> Value { + json!({ + "run_meta": { + "benchmark_id": "partial-coverage-fixture", + "schema_version": 1, + "started_at": ts + }, + "commit": { + "sha": sha, + "timestamp": ts, + "message": msg, + "author_name": "Test Author", + "author_email": "author@example.com", + "committer_name": "Test Committer", + "committer_email": "committer@example.com", + "tree_sha": "fedcba9876543210fedcba9876543210fedcba98", + "url": format!("https://github.com/vortex-data/vortex/commit/{sha}") + }, + "records": rows.iter().map(|(format, value_ns)| json!({ + "kind": "random_access_time", + "commit_sha": sha, + "dataset": "taxi", + "format": format, + "value_ns": value_ns, + "all_runtimes_ns": [value_ns] + })).collect::>() + }) +} + +/// Regression test for "charts have invisible gaps where commits should be." +/// +/// Seed three commits A, B, C in chronological order: +/// * A — series X and Y both have data +/// * B — only series Y has data (X crashed; this is the partial-coverage case) +/// * C — series X and Y both have data +/// +/// The chart's `commits[]` must include all three commits (B included), +/// and series X's value at B must be `null`. Before the fix the chart +/// silently dropped B because `SeriesAccumulator::ensure_commit` only +/// registered commits that had at least one row in the fact table. +#[tokio::test] +async fn chart_includes_commits_with_partial_series_coverage() -> Result<()> { + let server = Server::start().await?; + let client = reqwest::Client::new(); + + let envelopes = [ + ra_envelope( + "aaaa111111111111111111111111111111111111", + "2026-04-23T12:00:00Z", + "A: both series", + &[("vortex-file-compressed", 500), ("parquet", 1_000)], + ), + ra_envelope( + "bbbb222222222222222222222222222222222222", + "2026-04-24T12:00:00Z", + "B: only parquet (vortex crashed)", + &[("parquet", 1_100)], + ), + ra_envelope( + "cccc333333333333333333333333333333333333", + "2026-04-25T12:00:00Z", + "C: both series", + &[("vortex-file-compressed", 600), ("parquet", 1_200)], + ), + ]; + for env in &envelopes { + let resp = client + .post(server.url("/api/ingest")) + .bearer_auth(TOKEN) + .json(env) + .send() + .await?; + anyhow::ensure!( + resp.status().is_success(), + "seed ingest failed: {}", + resp.status() + ); + } + + let slug = pick_chart_slug(&server, |s| s == "Random Access").await?; + let chart: Value = client + .get(server.url(&format!("/api/chart/{slug}"))) + .send() + .await? + .json() + .await?; + + let commits = chart["commits"].as_array().context("commits[] array")?; + let shas: Vec<&str> = commits.iter().filter_map(|c| c["sha"].as_str()).collect(); + assert_eq!( + shas, + vec![ + "aaaa111111111111111111111111111111111111", + "bbbb222222222222222222222222222222222222", + "cccc333333333333333333333333333333333333", + ], + "all three commits must appear in commits[], including the partial-coverage commit B" + ); + + // Series X (vortex-file-compressed) has data at A and C, NULL at B. + let vortex = chart["series"]["vortex-file-compressed"] + .as_array() + .context("vortex-file-compressed series array")?; + assert_eq!(vortex.len(), 3, "series array aligns with commits[]"); + assert_eq!(vortex[0].as_f64(), Some(500.0)); + assert!( + vortex[1].is_null(), + "vortex-file-compressed must be null at the partial-coverage commit, got {:?}", + vortex[1], + ); + assert_eq!(vortex[2].as_f64(), Some(600.0)); + + // Series Y (parquet) has data at all three commits. + let parquet = chart["series"]["parquet"] + .as_array() + .context("parquet series array")?; + assert_eq!(parquet[0].as_f64(), Some(1_000.0)); + assert_eq!(parquet[1].as_f64(), Some(1_100.0)); + assert_eq!(parquet[2].as_f64(), Some(1_200.0)); + + Ok(()) +} + +/// A commit with NO row in the chart's fact table (every benchmark crashed +/// for that commit) still appears on the chart's x-axis as long as it falls +/// within the chart's window — i.e. ≥ the earliest commit that has data. +/// +/// Seed two commits with random-access data and one commit that only has a +/// `compression_size` row. The compression-size-only commit is in the +/// `commits` dim but has nothing in `random_access_times`, so the random- +/// access chart should still place it on the x-axis with NULL for every +/// series. +#[tokio::test] +async fn chart_includes_commits_with_zero_rows_in_fact_table() -> Result<()> { + let server = Server::start().await?; + let client = reqwest::Client::new(); + + // Commit A: random-access only. + let env_a = ra_envelope( + "aaaa111111111111111111111111111111111111", + "2026-04-23T12:00:00Z", + "A", + &[("parquet", 1_000)], + ); + // Commit B (chronologically between A and C): a compression_size row, + // nothing in random_access_times. + let env_b = json!({ + "run_meta": { + "benchmark_id": "partial-coverage-fixture", + "schema_version": 1, + "started_at": "2026-04-24T12:00:00Z" + }, + "commit": { + "sha": "bbbb222222222222222222222222222222222222", + "timestamp": "2026-04-24T12:00:00Z", + "message": "B: random-access did not run (only compression_size emitted)", + "author_name": "Test Author", + "author_email": "author@example.com", + "committer_name": "Test Committer", + "committer_email": "committer@example.com", + "tree_sha": "fedcba9876543210fedcba9876543210fedcba98", + "url": "https://github.com/vortex-data/vortex/commit/bbbb222222222222222222222222222222222222" + }, + "records": [ + { + "kind": "compression_size", + "commit_sha": "bbbb222222222222222222222222222222222222", + "dataset": "tpch-lineitem", + "format": "parquet", + "value_bytes": 4_000, + }, + ], + }); + // Commit C: random-access again. + let env_c = ra_envelope( + "cccc333333333333333333333333333333333333", + "2026-04-25T12:00:00Z", + "C", + &[("parquet", 1_200)], + ); + + for env in [&env_a, &env_b, &env_c] { + let resp = client + .post(server.url("/api/ingest")) + .bearer_auth(TOKEN) + .json(env) + .send() + .await?; + anyhow::ensure!( + resp.status().is_success(), + "seed ingest failed: {}", + resp.status() + ); + } + + let slug = pick_chart_slug(&server, |s| s == "Random Access").await?; + let chart: Value = client + .get(server.url(&format!("/api/chart/{slug}"))) + .send() + .await? + .json() + .await?; + + let shas: Vec<&str> = chart["commits"] + .as_array() + .context("commits[] array")? + .iter() + .filter_map(|c| c["sha"].as_str()) + .collect(); + assert_eq!( + shas, + vec![ + "aaaa111111111111111111111111111111111111", + "bbbb222222222222222222222222222222222222", + "cccc333333333333333333333333333333333333", + ], + "the commit with zero rows in the fact table must still appear in commits[]" + ); + + // The parquet series has data only at A and C. + let parquet = chart["series"]["parquet"] + .as_array() + .context("parquet series array")?; + assert_eq!(parquet.len(), 3); + assert_eq!(parquet[0].as_f64(), Some(1_000.0)); + assert!( + parquet[1].is_null(), + "parquet must be null at the zero-rows commit" + ); + assert_eq!(parquet[2].as_f64(), Some(1_200.0)); + + Ok(()) +} + +/// Commits older than the earliest fact-table row for this chart are NOT +/// included on the x-axis. Without this lower bound a chart's first commit +/// could be from before the benchmark even existed — the spec calls this +/// out explicitly as "Beware: don't accidentally include EVERY commit ever." +#[tokio::test] +async fn chart_excludes_commits_before_first_fact_row() -> Result<()> { + let server = Server::start().await?; + let client = reqwest::Client::new(); + + // Commit A: a `compression_time` row (random-access does not exist for A). + let env_a = json!({ + "run_meta": { + "benchmark_id": "partial-coverage-fixture", + "schema_version": 1, + "started_at": "2026-04-22T12:00:00Z" + }, + "commit": { + "sha": "aaaa111111111111111111111111111111111111", + "timestamp": "2026-04-22T12:00:00Z", + "message": "A: pre-history of the random-access bench", + "author_name": "Test Author", + "author_email": "author@example.com", + "committer_name": "Test Committer", + "committer_email": "committer@example.com", + "tree_sha": "fedcba9876543210fedcba9876543210fedcba98", + "url": "https://github.com/vortex-data/vortex/commit/aaaa111111111111111111111111111111111111" + }, + "records": [ + { + "kind": "compression_time", + "commit_sha": "aaaa111111111111111111111111111111111111", + "dataset": "tpch-lineitem", + "format": "parquet", + "op": "encode", + "value_ns": 9_000, + "all_runtimes_ns": [9_000], + }, + ], + }); + // Commit B: first random-access row appears. + let env_b = ra_envelope( + "bbbb222222222222222222222222222222222222", + "2026-04-23T12:00:00Z", + "B: random-access bench begins", + &[("parquet", 1_000)], + ); + + for env in [&env_a, &env_b] { + let resp = client + .post(server.url("/api/ingest")) + .bearer_auth(TOKEN) + .json(env) + .send() + .await?; + anyhow::ensure!( + resp.status().is_success(), + "seed ingest failed: {}", + resp.status() + ); + } + + let slug = pick_chart_slug(&server, |s| s == "Random Access").await?; + let chart: Value = client + .get(server.url(&format!("/api/chart/{slug}"))) + .send() + .await? + .json() + .await?; + + let shas: Vec<&str> = chart["commits"] + .as_array() + .context("commits[] array")? + .iter() + .filter_map(|c| c["sha"].as_str()) + .collect(); + assert_eq!( + shas, + vec!["bbbb222222222222222222222222222222222222"], + "commit A predates the first random-access row, so it must not be on the x-axis" + ); + Ok(()) +} diff --git a/benchmarks-website/src/styles/index.css b/benchmarks-website/src/styles/index.css index b32bf21377c..910670ec28f 100644 --- a/benchmarks-website/src/styles/index.css +++ b/benchmarks-website/src/styles/index.css @@ -151,7 +151,7 @@ code, pre { } .site-logo { - height: 48px; + height: 24px; width: auto; display: block; } @@ -161,7 +161,7 @@ code, pre { font-size: 1.5rem; font-weight: 600; margin: 0; - margin-left: calc(var(--sidebar-width) - 180px); + margin-left: calc(var(--sidebar-width) - 156px); color: var(--text-color); display: none; white-space: nowrap; @@ -1173,7 +1173,7 @@ code, pre { } .site-logo { - height: 32px; + height: 16px; } .site-title { diff --git a/vortex-bench/src/snapshots/vortex_bench__v3__tests__snapshot_query_measurement_clickbench_no_memory@clickbench.snap b/vortex-bench/src/snapshots/vortex_bench__v3__tests__snapshot_query_measurement_clickbench_no_memory@clickbench.snap index f3273b62a1b..8bb99a9a663 100644 --- a/vortex-bench/src/snapshots/vortex_bench__v3__tests__snapshot_query_measurement_clickbench_no_memory@clickbench.snap +++ b/vortex-bench/src/snapshots/vortex_bench__v3__tests__snapshot_query_measurement_clickbench_no_memory@clickbench.snap @@ -6,7 +6,6 @@ expression: render(&record) "kind": "query_measurement", "commit_sha": "", "dataset": "clickbench", - "dataset_variant": "partitioned", "query_idx": 1, "storage": "s3", "engine": "duckdb", diff --git a/vortex-bench/src/v3.rs b/vortex-bench/src/v3.rs index fdc71547d10..8b4064c0c2f 100644 --- a/vortex-bench/src/v3.rs +++ b/vortex-bench/src/v3.rs @@ -20,7 +20,6 @@ use target_lexicon::Triple; use crate::BenchmarkDataset; use crate::Engine; use crate::Format; -use crate::clickbench::Flavor; use crate::compress::CompressOp; use crate::measurements::CompressionTimingMeasurement; use crate::measurements::MemoryMeasurement; @@ -72,7 +71,9 @@ pub struct QueryMeasurementRecord { /// ClickBench flavor (`partitioned`/`single`) or Public-BI sub-dataset name. #[serde(skip_serializing_if = "Option::is_none")] pub dataset_variant: Option, - /// TPC scale factor or `n_rows` for StatPopGen / PolarSignals. + /// TPC scale factor (TPC-H / TPC-DS only). Other suites leave this `None` + /// so live records merge with the migrated v2 history, which never carried + /// a per-suite scale factor. #[serde(skip_serializing_if = "Option::is_none")] pub scale_factor: Option, /// 1-based query index within the suite. @@ -201,30 +202,26 @@ pub struct VectorSearchRunRecord { /// `benchmarks-website/planning/benchmark-mapping.md`. pub fn benchmark_dataset_dims(d: &BenchmarkDataset) -> (String, Option, Option) { match d { - BenchmarkDataset::TpcH { scale_factor } => ( - "tpch".to_string(), - None, - Some(canonical_tpc_scale_factor(scale_factor)), - ), - BenchmarkDataset::TpcDS { scale_factor } => ( - "tpcds".to_string(), - None, - Some(canonical_tpc_scale_factor(scale_factor)), - ), - BenchmarkDataset::ClickBench { flavor } => { - let variant = match flavor { - Flavor::Partitioned => "partitioned", - Flavor::Single => "single", - }; - ("clickbench".to_string(), Some(variant.to_string()), None) - } - BenchmarkDataset::PublicBi { name } => ("public-bi".to_string(), Some(name.clone()), None), - BenchmarkDataset::StatPopGen { n_rows } => { - ("statpopgen".to_string(), None, Some(n_rows.to_string())) + BenchmarkDataset::TpcH { scale_factor } => { + ("tpch".to_string(), None, Some(scale_factor.clone())) } - BenchmarkDataset::PolarSignals { n_rows } => { - ("polarsignals".to_string(), None, Some(n_rows.to_string())) + BenchmarkDataset::TpcDS { scale_factor } => { + ("tpcds".to_string(), None, Some(scale_factor.clone())) } + // ClickBench: the migrate path leaves `dataset_variant` NULL because + // v2 record names did not encode flavor, so the live emitter does the + // same to keep historical and live records in one `clickbench` group. + // Flavor is fixed per CI matrix entry and recoverable from there. + BenchmarkDataset::ClickBench { .. } => ("clickbench".to_string(), None, None), + BenchmarkDataset::PublicBi { name } => ("public-bi".to_string(), Some(name.clone()), None), + // StatPopGen / PolarSignals: the migrate path (v2 → v3 backfill) does + // not carry a per-record scale factor for these suites, so writing one + // here would split each into two groups (sf=NULL historical vs. sf=N + // live). Drop it to keep live ingests merging into the migrated + // group. The dataset-level `n_rows` is recoverable from the bench + // matrix if ever needed. + BenchmarkDataset::StatPopGen { .. } => ("statpopgen".to_string(), None, None), + BenchmarkDataset::PolarSignals { .. } => ("polarsignals".to_string(), None, None), BenchmarkDataset::Fineweb => ("fineweb".to_string(), None, None), BenchmarkDataset::GhArchive => ("gharchive".to_string(), None, None), } @@ -241,7 +238,6 @@ pub fn query_measurement_record( let (dataset, dataset_variant, scale_factor) = benchmark_dataset_dims(&qm.benchmark_dataset); let value_ns = duration_as_ns(qm.median_run()); let all_runtimes_ns = qm.runs.iter().copied().map(duration_as_ns).collect(); - let query_idx = v3_query_idx(qm); let (peak_physical, peak_virtual, physical_delta, virtual_delta) = match memory { Some(m) => ( Some(m.peak_physical_memory), @@ -256,7 +252,7 @@ pub fn query_measurement_record( dataset, dataset_variant, scale_factor, - query_idx, + query_idx: u32::try_from(qm.query_idx).unwrap_or(u32::MAX), storage: qm.storage.clone(), engine: engine_label(qm.target.engine).to_string(), format: qm.target.format.name().to_string(), @@ -388,34 +384,6 @@ fn duration_as_ns(d: std::time::Duration) -> u64 { u64::try_from(d.as_nanos()).unwrap_or(u64::MAX) } -fn canonical_tpc_scale_factor(scale_factor: &str) -> String { - let trimmed = scale_factor.trim(); - match trimmed.parse::() { - Ok(value) if value.is_finite() => format!("{value}"), - _ => scale_factor.to_string(), - } -} - -fn v3_query_idx(qm: &QueryMeasurement) -> u32 { - let query_idx = if query_source_is_zero_based(&qm.benchmark_dataset) { - qm.query_idx.saturating_add(1) - } else { - qm.query_idx - }; - u32::try_from(query_idx).unwrap_or(u32::MAX) -} - -fn query_source_is_zero_based(dataset: &BenchmarkDataset) -> bool { - matches!( - dataset, - BenchmarkDataset::ClickBench { .. } - | BenchmarkDataset::StatPopGen { .. } - | BenchmarkDataset::PolarSignals { .. } - | BenchmarkDataset::Fineweb - | BenchmarkDataset::GhArchive - ) -} - fn engine_label(engine: Engine) -> &'static str { match engine { Engine::Vortex => "vortex", @@ -494,7 +462,7 @@ mod tests { #[test] fn snapshot_query_measurement_clickbench_no_memory() -> anyhow::Result<()> { let qm = QueryMeasurement { - query_idx: 0, + query_idx: 1, target: Target::new(Engine::DuckDB, Format::Parquet), benchmark_dataset: BenchmarkDataset::ClickBench { flavor: Flavor::Partitioned, @@ -511,80 +479,6 @@ mod tests { Ok(()) } - #[test] - fn tpc_scale_factors_are_canonicalized_for_query_dims() { - assert_eq!( - benchmark_dataset_dims(&BenchmarkDataset::TpcH { - scale_factor: "1.0".to_string() - }), - ("tpch".to_string(), None, Some("1".to_string())) - ); - assert_eq!( - benchmark_dataset_dims(&BenchmarkDataset::TpcDS { - scale_factor: "10.0".to_string() - }), - ("tpcds".to_string(), None, Some("10".to_string())) - ); - } - - #[test] - fn zero_based_query_sources_emit_one_based_query_idx() { - let datasets = [ - BenchmarkDataset::ClickBench { - flavor: Flavor::Partitioned, - }, - BenchmarkDataset::StatPopGen { n_rows: 100_000 }, - BenchmarkDataset::PolarSignals { n_rows: 1_000_000 }, - BenchmarkDataset::Fineweb, - BenchmarkDataset::GhArchive, - ]; - - for benchmark_dataset in datasets { - let qm = QueryMeasurement { - query_idx: 0, - target: Target::new(Engine::DataFusion, Format::Parquet), - benchmark_dataset, - benchmark_runner: "ci-runner".to_string(), - storage: "nvme".to_string(), - runs: vec![Duration::from_nanos(1)], - }; - let V3Record::QueryMeasurement(record) = query_measurement_record(&qm, None) else { - panic!("expected query measurement record"); - }; - assert_eq!(record.query_idx, 1); - } - } - - #[test] - fn one_based_query_sources_keep_query_idx() { - let datasets = [ - BenchmarkDataset::TpcH { - scale_factor: "1".to_string(), - }, - BenchmarkDataset::TpcDS { - scale_factor: "1".to_string(), - }, - BenchmarkDataset::PublicBi { - name: "cms-provider".to_string(), - }, - ]; - - for benchmark_dataset in datasets { - let qm = QueryMeasurement { - query_idx: 1, - target: Target::new(Engine::DataFusion, Format::Parquet), - benchmark_dataset, - benchmark_runner: "ci-runner".to_string(), - storage: "nvme".to_string(), - runs: vec![Duration::from_nanos(1)], - }; - let V3Record::QueryMeasurement(record) = query_measurement_record(&qm, None) else { - panic!("expected query measurement record"); - }; - assert_eq!(record.query_idx, 1); - } - } - #[test] fn snapshot_compression_time_encode() -> anyhow::Result<()> { let timing = CompressionTimingMeasurement { @@ -665,6 +559,45 @@ mod tests { Ok(()) } + #[test] + fn live_dims_match_migrate_for_non_fan_out_suites() { + // The v2 → v3 migrate classifier leaves both `dataset_variant` and + // `scale_factor` NULL for the non-fan-out SQL suites (clickbench, + // polarsignals, statpopgen, fineweb, gharchive). The live emitter + // must do the same so live ingests merge with migrated history into + // a single group instead of forking off a sibling group keyed on a + // dim the historical rows do not carry. + for (case, expected) in [ + ( + BenchmarkDataset::ClickBench { + flavor: Flavor::Partitioned, + }, + "clickbench", + ), + ( + BenchmarkDataset::ClickBench { + flavor: Flavor::Single, + }, + "clickbench", + ), + ( + BenchmarkDataset::PolarSignals { n_rows: 1_000_000 }, + "polarsignals", + ), + ( + BenchmarkDataset::StatPopGen { n_rows: 100_000 }, + "statpopgen", + ), + (BenchmarkDataset::Fineweb, "fineweb"), + (BenchmarkDataset::GhArchive, "gharchive"), + ] { + let (ds, variant, sf) = benchmark_dataset_dims(&case); + assert_eq!(ds, expected, "dataset for {case:?}"); + assert_eq!(variant, None, "dataset_variant for {case:?}"); + assert_eq!(sf, None, "scale_factor for {case:?}"); + } + } + #[test] fn jsonl_round_trips_one_record_per_line() -> anyhow::Result<()> { let record = compression_size_record("taxi", None, Format::Parquet, 100);