From 1ef109ae35c59d8b13268ec4cacc62778dfe3ac8 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Sat, 28 Feb 2026 15:03:58 +0000 Subject: [PATCH 01/22] Added: llm-models-dev crate skeleton - Create llm-coding-tools-models-dev crate (v0.1.0) - Add public API: ModelsDevCatalog::load(), load_at() - Define CatalogError, CatalogLoadResult, CatalogLoadSource - Add cache path helpers and documentation - Add workspace integration and CI workflow updates - Include comprehensive documentation for all public items --- .github/workflows/rust.yml | 7 +- src/.cargo/verify.ps1 | 6 +- src/.cargo/verify.sh | 6 +- src/Cargo.lock | 158 ++++++++++++++++-- src/Cargo.toml | 2 +- src/llm-coding-tools-models-dev/Cargo.toml | 48 ++++++ src/llm-coding-tools-models-dev/README.md | 64 +++++++ .../src/cache/mod.rs | 8 + .../src/cache/path.rs | 48 ++++++ .../src/catalog/load_result.rs | 52 ++++++ .../src/catalog/mod.rs | 113 +++++++++++++ src/llm-coding-tools-models-dev/src/error.rs | 36 ++++ src/llm-coding-tools-models-dev/src/lib.rs | 9 + 13 files changed, 542 insertions(+), 15 deletions(-) create mode 100644 src/llm-coding-tools-models-dev/Cargo.toml create mode 100644 src/llm-coding-tools-models-dev/README.md create mode 100644 src/llm-coding-tools-models-dev/src/cache/mod.rs create mode 100644 src/llm-coding-tools-models-dev/src/cache/path.rs create mode 100644 src/llm-coding-tools-models-dev/src/catalog/load_result.rs create mode 100644 src/llm-coding-tools-models-dev/src/catalog/mod.rs create mode 100644 src/llm-coding-tools-models-dev/src/error.rs create mode 100644 src/llm-coding-tools-models-dev/src/lib.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 8ebd0db2..f24cfd1f 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -52,13 +52,13 @@ jobs: cargo +stable binstall --no-confirm cargo-semver-checks --force rustup +stable target add ${{ matrix.target }} - for CRATE in "llm-coding-tools-core" "llm-coding-tools-agents" "llm-coding-tools-serdesai"; do + for CRATE in "llm-coding-tools-core" "llm-coding-tools-agents" "llm-coding-tools-serdesai" "llm-coding-tools-models-dev"; do SEARCH_RESULT=$(cargo search "^${CRATE}$" --limit 1) if echo "$SEARCH_RESULT" | grep -q "^${CRATE} "; then echo "Running semver checks for ${CRATE}..." # Note: llm-coding-tools-core has mutually exclusive async/blocking features, # so we must use --only-explicit-features to avoid enabling all features. - # The serdesai crate is async-only and doesn't have the tokio feature. + # The serdesai and models-dev crates are async-only and don't have the tokio feature. if [ "${CRATE}" = "llm-coding-tools-core" ]; then cargo +stable semver-checks -p "${CRATE}" --target ${{ matrix.target }} --only-explicit-features --features tokio else @@ -79,6 +79,7 @@ jobs: cargo doc -p llm-coding-tools-core --features tokio --document-private-items --no-deps --target ${{ matrix.target }} cargo doc -p llm-coding-tools-agents --document-private-items --no-deps --target ${{ matrix.target }} cargo doc -p llm-coding-tools-serdesai --document-private-items --no-deps --target ${{ matrix.target }} + cargo doc -p llm-coding-tools-models-dev --document-private-items --no-deps --target ${{ matrix.target }} - name: Run linter if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/') @@ -88,6 +89,7 @@ jobs: cargo clippy -p llm-coding-tools-core --features tokio --target ${{ matrix.target }} -- -D warnings cargo clippy -p llm-coding-tools-agents --target ${{ matrix.target }} -- -D warnings cargo clippy -p llm-coding-tools-serdesai --target ${{ matrix.target }} -- -D warnings + cargo clippy -p llm-coding-tools-models-dev --target ${{ matrix.target }} -- -D warnings - name: Run formatter check uses: actions-rust-lang/rustfmt@v1 @@ -165,6 +167,7 @@ jobs: src/llm-coding-tools-core src/llm-coding-tools-agents src/llm-coding-tools-serdesai + src/llm-coding-tools-models-dev compression-tool: 7z artifact-groups-file: .github/artifact-groups.yml changelog-enabled: "true" diff --git a/src/.cargo/verify.ps1 b/src/.cargo/verify.ps1 index 745bf015..b90d0507 100644 --- a/src/.cargo/verify.ps1 +++ b/src/.cargo/verify.ps1 @@ -2,7 +2,7 @@ # All steps must pass without warnings # Keep in sync with verify.sh # -# Note: llm-coding-tools-serdesai is async-only (implements async Tool traits). +# Note: llm-coding-tools-serdesai and llm-coding-tools-models-dev are async-only. # The blocking feature only applies to llm-coding-tools-core. $ErrorActionPreference = "Stop" @@ -35,16 +35,19 @@ try { Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-core", "--quiet") Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-agents", "--quiet") Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-serdesai", "--quiet") +Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-models-dev", "--quiet") Write-Host "Testing..." Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-core", "--quiet") Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-agents", "--quiet") Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-serdesai", "--quiet") +Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-models-dev", "--quiet") Write-Host "Clippy..." Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-core", "--quiet", "--", "-D", "warnings") Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-agents", "--quiet", "--", "-D", "warnings") Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-serdesai", "--quiet", "--", "-D", "warnings") +Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-models-dev", "--quiet", "--", "-D", "warnings") Write-Host "Testing blocking feature..." Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-core", "--no-default-features", "--features", "blocking", "--quiet") @@ -65,6 +68,7 @@ Write-Host "Publish dry-run..." Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-core", "--quiet") Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-agents", "--quiet") Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-serdesai", "--quiet") +Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-models-dev", "--quiet") Write-Host "All checks passed!" } diff --git a/src/.cargo/verify.sh b/src/.cargo/verify.sh index b51897cf..a6f4f416 100755 --- a/src/.cargo/verify.sh +++ b/src/.cargo/verify.sh @@ -3,7 +3,7 @@ # All steps must pass without warnings # Keep in sync with verify.ps1 # -# Note: llm-coding-tools-serdesai is async-only (implements async Tool traits). +# Note: llm-coding-tools-serdesai and llm-coding-tools-models-dev are async-only. # The blocking feature only applies to llm-coding-tools-core. set -e @@ -24,16 +24,19 @@ echo "Building..." run_cmd cargo build -p llm-coding-tools-core --quiet run_cmd cargo build -p llm-coding-tools-agents --quiet run_cmd cargo build -p llm-coding-tools-serdesai --quiet +run_cmd cargo build -p llm-coding-tools-models-dev --quiet echo "Testing..." run_cmd cargo test -p llm-coding-tools-core --quiet run_cmd cargo test -p llm-coding-tools-agents --quiet run_cmd cargo test -p llm-coding-tools-serdesai --quiet +run_cmd cargo test -p llm-coding-tools-models-dev --quiet echo "Clippy..." run_cmd cargo clippy -p llm-coding-tools-core --quiet -- -D warnings run_cmd cargo clippy -p llm-coding-tools-agents --quiet -- -D warnings run_cmd cargo clippy -p llm-coding-tools-serdesai --quiet -- -D warnings +run_cmd cargo clippy -p llm-coding-tools-models-dev --quiet -- -D warnings echo "Testing blocking feature..." run_cmd cargo test -p llm-coding-tools-core --no-default-features --features blocking --quiet @@ -48,5 +51,6 @@ echo "Publish dry-run..." run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-core --quiet run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-agents --quiet run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-serdesai --quiet +run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-models-dev --quiet echo "All checks passed!" diff --git a/src/Cargo.lock b/src/Cargo.lock index a9d43b94..08986572 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -74,6 +74,12 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + [[package]] name = "assert-json-diff" version = "2.0.2" @@ -144,6 +150,30 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bitcode" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6ed1b54d8dc333e7be604d00fa9262f4635485ffea923647b6521a5fff045d" +dependencies = [ + "arrayvec", + "bitcode_derive", + "bytemuck", + "glam", + "serde", +] + +[[package]] +name = "bitcode_derive" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "238b90427dfad9da4a9abd60f3ec1cdee6b80454bde49ed37f1781dd8e9dc7f9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "bitfields" version = "1.0.2" @@ -197,6 +227,12 @@ version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" + [[package]] name = "bytes" version = "1.11.0" @@ -548,6 +584,27 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.61.2", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -794,6 +851,12 @@ dependencies = [ "wasip3", ] +[[package]] +name = "glam" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34627c5158214743a374170fed714833fdf4e4b0cbcc1ea98417866a4c5d4441" + [[package]] name = "globset" version = "0.4.18" @@ -1315,15 +1378,24 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "libc" -version = "0.2.180" +version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" + +[[package]] +name = "libredox" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "062b52cd41eb8d929e81b592a47df833c33c15684933a9329440137a6d9f134c" +dependencies = [ + "libc", +] [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "lite-strtab" @@ -1390,6 +1462,21 @@ dependencies = [ "wiremock", ] +[[package]] +name = "llm-coding-tools-models-dev" +version = "0.1.0" +dependencies = [ + "bitcode", + "dirs", + "llm-coding-tools-core", + "reqwest 0.13.1", + "serde", + "tempfile", + "thiserror 2.0.18", + "tokio", + "zstd", +] + [[package]] name = "llm-coding-tools-serdesai" version = "0.2.0" @@ -1500,9 +1587,9 @@ checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" [[package]] name = "nix" -version = "0.31.1" +version = "0.31.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225e7cfe711e0ba79a68baeddb2982723e4235247aefce1482f2f16c27865b66" +checksum = "5d6d0705320c1e6ba1d912b5e37cf18071b6c2e9b7fa8215a1e8a7651966f5d3" dependencies = [ "bitflags", "cfg-if", @@ -1547,6 +1634,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391" +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "page_size" version = "0.6.0" @@ -1637,6 +1730,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + [[package]] name = "plotters" version = "0.3.7" @@ -1886,6 +1985,17 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_users" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" +dependencies = [ + "getrandom 0.2.17", + "libredox", + "thiserror 2.0.18", +] + [[package]] name = "ref-cast" version = "1.0.25" @@ -2036,9 +2146,9 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustix" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ "bitflags", "errno", @@ -2642,9 +2752,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.25.0" +version = "3.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" +checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" dependencies = [ "fastrand", "getrandom 0.3.4", @@ -3773,3 +3883,31 @@ name = "zmij" version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfcd145825aace48cff44a8844de64bf75feec3080e0aa5cdbde72961ae51a65" + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/src/Cargo.toml b/src/Cargo.toml index 0dbd669e..7429dbb9 100644 --- a/src/Cargo.toml +++ b/src/Cargo.toml @@ -1,7 +1,7 @@ [workspace] resolver = "2" -members = ["llm-coding-tools-core", "llm-coding-tools-serdesai", "llm-coding-tools-agents"] +members = ["llm-coding-tools-core", "llm-coding-tools-serdesai", "llm-coding-tools-agents", "llm-coding-tools-models-dev"] # Profile Build [profile.profile] diff --git a/src/llm-coding-tools-models-dev/Cargo.toml b/src/llm-coding-tools-models-dev/Cargo.toml new file mode 100644 index 00000000..5ee01d31 --- /dev/null +++ b/src/llm-coding-tools-models-dev/Cargo.toml @@ -0,0 +1,48 @@ +[package] +name = "llm-coding-tools-models-dev" +version = "0.1.0" +edition = "2021" +description = "models.dev catalog ingestion with online-first sync pipeline" +repository = "https://github.com/Sewer56/llm-coding-tools" +license = "Apache-2.0" +include = ["src/**/*", "README.md"] +readme = "README.md" + +[features] +default = ["tokio"] +# Async with tokio runtime +tokio = ["dep:tokio", "dep:reqwest"] + +[dependencies] +# Core library for ModelCatalog and related types +llm-coding-tools-core = { path = "../llm-coding-tools-core", version = "0.2.0", features = [ + "tokio", +] } + +# Cross-platform cache directory detection +dirs = "6.0.0" + +# HTTP client for conditional GET requests +reqwest = { version = "0.13", default-features = false, features = [ + "rustls", + "rustls-native-certs", +], optional = true } + +# Fast binary serialization +bitcode = "0.6.9" + +# Compression for cache payload +zstd = "0.13.3" + +# JSON parsing for models.dev API responses +serde = { version = "1.0.228", features = ["derive"] } + +# Ergonomic error definitions +thiserror = "2.0.18" + +# Async runtime (when tokio feature enabled) +tokio = { version = "1.49", features = ["fs", "io-util"], optional = true } + +[dev-dependencies] +tokio = { version = "1.49", features = ["rt", "macros"] } +tempfile = "3.26" diff --git a/src/llm-coding-tools-models-dev/README.md b/src/llm-coding-tools-models-dev/README.md new file mode 100644 index 00000000..8ea3ad47 --- /dev/null +++ b/src/llm-coding-tools-models-dev/README.md @@ -0,0 +1,64 @@ +# llm-coding-tools-models-dev + +models.dev catalog ingestion with online-first sync and local cache fallback. + +This crate loads provider/model data from models.dev and builds a +`llm_coding_tools_core::models::ModelCatalog`. + +## Why this exists + +If you run coding agents against many providers, you usually want all three: + +- Fresh data when online. +- A reliable fallback when offline. +- A compact cache that is cheap to load. + +That is the core goal here. + +## What it does + +- **Online-first sync**: Sends conditional requests with `If-None-Match` and reuses cache on `304 Not Modified`. +- **Implicit fallback**: If network sync fails, loads the last valid cache automatically. +- **Compact storage**: Stores cache as prelude + ETag + `zstd(bitcode(payload))`. +- **Minimal API**: Exposes `ModelsDevCatalog::load()` and `ModelsDevCatalog::load_at(...)`. + +## Usage + +```rust +use llm_coding_tools_models_dev::{CatalogLoadSource, ModelsDevCatalog}; + +async fn load_catalog() -> Result<(), Box> { + let result = ModelsDevCatalog::load().await?; + + match result.source { + CatalogLoadSource::Downloaded => println!("Downloaded fresh snapshot."), + CatalogLoadSource::NotModifiedCache => println!("Cache is already up to date."), + CatalogLoadSource::FallbackCache => println!("Network unavailable, using cached snapshot."), + } + + if let Some(entry) = result.catalog.lookup("openai", "gpt-4") { + println!("provider api url: {}", entry.0.api_url); + println!("max input tokens: {}", entry.1.max_input); + } + + Ok(()) +} +``` + +## Cache location + +By default, cache is stored in the platform cache directory: + +- Linux: `~/.cache/llm-coding-tools/models.dev.catalog.v1.cache` +- macOS: `~/Library/Caches/llm-coding-tools/models.dev.catalog.v1.cache` +- Windows: `%LOCALAPPDATA%\llm-coding-tools\models.dev.catalog.v1.cache` + +Set `LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH` to override this path. + +## Feature flags + +- `tokio` (default): async runtime support. + +## License + +Apache-2.0 diff --git a/src/llm-coding-tools-models-dev/src/cache/mod.rs b/src/llm-coding-tools-models-dev/src/cache/mod.rs new file mode 100644 index 00000000..9150d79d --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/cache/mod.rs @@ -0,0 +1,8 @@ +//! Cache path resolution and management. +//! +//! This module handles cross-platform cache directory detection and +//! the default cache file path for models.dev catalogs. + +mod path; + +pub use path::shared_cache_path; diff --git a/src/llm-coding-tools-models-dev/src/cache/path.rs b/src/llm-coding-tools-models-dev/src/cache/path.rs new file mode 100644 index 00000000..d8841147 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/cache/path.rs @@ -0,0 +1,48 @@ +//! Cross-platform cache path resolution. + +#![allow(dead_code)] + +use crate::error::CatalogError; +use std::path::PathBuf; + +/// Environment variable name for overriding the default cache path. +pub const CACHE_PATH_ENV_VAR: &str = "LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH"; + +/// Returns the shared cache path for the models.dev catalog. +/// +/// This function determines the appropriate cache location using the following +/// precedence: +/// +/// 1. `LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH` environment variable (if set) +/// 2. Platform cache directory + `llm-coding-tools/models.dev.catalog.v1.cache` +/// +/// # Platform Cache Locations +/// +/// - **Linux**: `~/.cache/llm-coding-tools/models.dev.catalog.v1.cache` +/// - **macOS**: `~/Library/Caches/llm-coding-tools/models.dev.catalog.v1.cache` +/// - **Windows**: `%LOCALAPPDATA%\llm-coding-tools\models.dev.catalog.v1.cache` +/// +/// # Returns +/// +/// The full path to the cache file. +/// +/// # Errors +/// +/// Returns [`CatalogError::CachePathNotFound`] when: +/// - The environment variable is not set AND +/// - The platform cache directory cannot be determined +/// +/// # Examples +/// +/// ``` +/// use llm_coding_tools_models_dev::shared_cache_path; +/// +/// # fn example() -> Result<(), Box> { +/// let path = shared_cache_path()?; +/// println!("Cache location: {}", path.display()); +/// # Ok(()) +/// # } +/// ``` +pub fn shared_cache_path() -> Result { + todo!("shared_cache_path() not yet implemented") +} diff --git a/src/llm-coding-tools-models-dev/src/catalog/load_result.rs b/src/llm-coding-tools-models-dev/src/catalog/load_result.rs new file mode 100644 index 00000000..aee2d46e --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/catalog/load_result.rs @@ -0,0 +1,52 @@ +//! Result types for catalog load operations. + +use llm_coding_tools_core::models::ModelCatalog; + +/// Result of a successful catalog load operation. +/// +/// This struct provides both the loaded catalog and metadata about +/// how the catalog was obtained (fresh download, cached, etc.). +pub struct CatalogLoadResult { + /// The loaded model catalog ready for lookups. + pub catalog: ModelCatalog, + + /// Information about how the catalog was loaded. + pub source: CatalogLoadSource, +} + +/// Indicates how the catalog was loaded. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CatalogLoadSource { + /// The catalog was downloaded fresh (HTTP 200 OK) and the cache was updated. + Downloaded, + + /// The cache was up to date (HTTP 304 Not Modified) and loaded from disk. + NotModifiedCache, + + /// A network failure occurred, but a valid cached copy was available + /// and loaded as a fallback. + FallbackCache, +} + +impl CatalogLoadSource { + /// Returns true if the catalog was loaded from the network (fresh download). + #[inline] + pub fn is_fresh(&self) -> bool { + matches!(self, CatalogLoadSource::Downloaded) + } + + /// Returns true if the catalog was loaded from cache (either fresh cache or fallback). + #[inline] + pub fn is_cached(&self) -> bool { + matches!( + self, + CatalogLoadSource::NotModifiedCache | CatalogLoadSource::FallbackCache + ) + } + + /// Returns true if this was a fallback load due to network failure. + #[inline] + pub fn is_fallback(&self) -> bool { + matches!(self, CatalogLoadSource::FallbackCache) + } +} diff --git a/src/llm-coding-tools-models-dev/src/catalog/mod.rs b/src/llm-coding-tools-models-dev/src/catalog/mod.rs new file mode 100644 index 00000000..2a826c81 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/catalog/mod.rs @@ -0,0 +1,113 @@ +//! Catalog loading and synchronization with models.dev. +//! +//! Flow is simple: +//! - Try online sync first using conditional HTTP (`If-None-Match`) +//! - Reuse cache on `304 Not Modified` +//! - Fall back to cached data if the network path fails + +mod load_result; + +pub use load_result::{CatalogLoadResult, CatalogLoadSource}; + +use crate::error::CatalogError; +use std::path::Path; + +/// Entry point for loading models.dev catalogs. +/// +/// This struct provides static methods for loading the catalog either +/// from the default shared cache location or from a custom path. +pub struct ModelsDevCatalog; + +impl ModelsDevCatalog { + /// Loads the catalog from the default shared cache location. + /// + /// This is the primary entry point for most use cases. It will: + /// 1. Check for an existing cache and extract its ETag + /// 2. Send a conditional GET request with `If-None-Match` + /// 3. On `200 OK`: download, normalize, cache, and return fresh data + /// 4. On `304 Not Modified`: decode and return cached data + /// 5. On network failure: fall back to cached data if available + /// + /// The cache location is determined by: + /// - `LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH` environment variable (if set) + /// - Platform cache directory + `llm-coding-tools/models.dev.catalog.v1.cache` + /// + /// # Returns + /// + /// A [`CatalogLoadResult`] containing the loaded catalog and information + /// about how it was loaded (downloaded fresh, from cache, or fallback). + /// + /// # Errors + /// + /// Returns [`CatalogError`] when: + /// - The cache path cannot be determined and no cache exists + /// - An HTTP error occurs and no cache is available for fallback + /// - The cache is corrupted and cannot be decoded + /// - Catalog construction from normalized data fails + /// + /// # Examples + /// + /// ``` + /// use llm_coding_tools_models_dev::ModelsDevCatalog; + /// + /// # async fn example() -> Result<(), Box> { + /// let result = ModelsDevCatalog::load().await?; + /// + /// // Use the catalog + /// if let Some(entry) = result.catalog.lookup("openai", "gpt-4") { + /// println!("API URL: {}", entry.0.api_url); + /// } + /// # Ok(()) + /// # } + /// ``` + pub async fn load() -> Result { + todo!("ModelsDevCatalog::load() not yet implemented") + } + + /// Loads the catalog from a specific cache file path. + /// + /// This method provides the same behavior as [`load`](Self::load), but + /// allows specifying a custom cache file path. This is useful for: + /// - Testing with temporary cache files + /// - Custom deployment scenarios + /// - Isolated cache locations + /// + /// # Parameters + /// + /// * `path` - The path to the cache file. Parent directories will be + /// created if they don't exist. + /// + /// # Returns + /// + /// A [`CatalogLoadResult`] containing the loaded catalog and source + /// information. + /// + /// # Errors + /// + /// Returns [`CatalogError`] under the same conditions as [`load`](Self::load), + /// plus: + /// - The parent directory cannot be created + /// - The path is not a valid file path + /// + /// # Examples + /// + /// ``` + /// use llm_coding_tools_models_dev::ModelsDevCatalog; + /// use std::path::PathBuf; + /// + /// # async fn example() -> Result<(), Box> { + /// let cache_path = PathBuf::from("/tmp/my-cache.cache"); + /// let result = ModelsDevCatalog::load_at(&cache_path).await?; + /// + /// // Use the catalog + /// if let Some(entry) = result.catalog.lookup("openai", "gpt-4") { + /// println!("API URL: {}", entry.0.api_url); + /// } + /// # Ok(()) + /// # } + /// ``` + pub async fn load_at(path: impl AsRef) -> Result { + let _path = path.as_ref(); + todo!("ModelsDevCatalog::load_at() not yet implemented") + } +} diff --git a/src/llm-coding-tools-models-dev/src/error.rs b/src/llm-coding-tools-models-dev/src/error.rs new file mode 100644 index 00000000..daad6d28 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/error.rs @@ -0,0 +1,36 @@ +//! Error types for models.dev catalog operations. + +use llm_coding_tools_core::models::ModelCatalogBuildError; +use thiserror::Error; + +/// Errors that can occur during catalog loading and synchronization. +#[derive(Debug, Error)] +pub enum CatalogError { + /// The platform's cache directory could not be determined. + #[error("cache directory not found on this platform")] + CachePathNotFound, + + /// A configuration error occurred (e.g., invalid environment variable). + #[error("configuration error: {0}")] + Configuration(String), + + /// An I/O error occurred while reading or writing the cache. + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + /// An HTTP error occurred during the sync request. + #[error("HTTP error: {0}")] + Reqwest(#[from] reqwest::Error), + + /// A zstd decompression error occurred. + #[error("decompression error: {0}")] + Zstd(String), + + /// A bitcode deserialization error occurred. + #[error("decode error: {0}")] + BitcodeDecode(String), + + /// The catalog failed to build from source rows. + #[error("catalog build error: {0}")] + ModelCatalogBuild(#[from] ModelCatalogBuildError), +} diff --git a/src/llm-coding-tools-models-dev/src/lib.rs b/src/llm-coding-tools-models-dev/src/lib.rs new file mode 100644 index 00000000..82e92d1b --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/lib.rs @@ -0,0 +1,9 @@ +#![doc = include_str!(concat!("../", env!("CARGO_PKG_README")))] + +pub mod cache; +pub mod catalog; +pub mod error; + +pub use cache::shared_cache_path; +pub use catalog::{CatalogLoadResult, CatalogLoadSource, ModelsDevCatalog}; +pub use error::CatalogError; From 4342fe1b8bb329f5df9ac96631d8c6864e2fc2d3 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Sat, 28 Feb 2026 15:56:13 +0000 Subject: [PATCH 02/22] Added: cache path resolution with env override and CatalogResult type alias --- .../src/cache/mod.rs | 1 + .../src/cache/path.rs | 17 ++++++++++++++--- src/llm-coding-tools-models-dev/src/error.rs | 3 +++ src/llm-coding-tools-models-dev/src/lib.rs | 2 +- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/llm-coding-tools-models-dev/src/cache/mod.rs b/src/llm-coding-tools-models-dev/src/cache/mod.rs index 9150d79d..e1ae9be5 100644 --- a/src/llm-coding-tools-models-dev/src/cache/mod.rs +++ b/src/llm-coding-tools-models-dev/src/cache/mod.rs @@ -5,4 +5,5 @@ mod path; +pub use crate::error::CatalogResult; pub use path::shared_cache_path; diff --git a/src/llm-coding-tools-models-dev/src/cache/path.rs b/src/llm-coding-tools-models-dev/src/cache/path.rs index d8841147..06a43a7e 100644 --- a/src/llm-coding-tools-models-dev/src/cache/path.rs +++ b/src/llm-coding-tools-models-dev/src/cache/path.rs @@ -2,7 +2,7 @@ #![allow(dead_code)] -use crate::error::CatalogError; +use crate::{error::CatalogResult, CatalogError}; use std::path::PathBuf; /// Environment variable name for overriding the default cache path. @@ -43,6 +43,17 @@ pub const CACHE_PATH_ENV_VAR: &str = "LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH"; /// # Ok(()) /// # } /// ``` -pub fn shared_cache_path() -> Result { - todo!("shared_cache_path() not yet implemented") +const CACHE_SUBDIR: &str = "llm-coding-tools"; +const CACHE_FILENAME: &str = "models.dev.catalog.v1.cache"; + +pub fn shared_cache_path() -> CatalogResult { + // 1. Check env var first + if let Ok(path) = std::env::var(CACHE_PATH_ENV_VAR) { + return Ok(PathBuf::from(path)); + } + + // 2. Fall back to dirs::cache_dir() + let cache_dir = dirs::cache_dir().ok_or(CatalogError::CachePathNotFound)?; + + Ok(cache_dir.join(CACHE_SUBDIR).join(CACHE_FILENAME)) } diff --git a/src/llm-coding-tools-models-dev/src/error.rs b/src/llm-coding-tools-models-dev/src/error.rs index daad6d28..02f1b5c9 100644 --- a/src/llm-coding-tools-models-dev/src/error.rs +++ b/src/llm-coding-tools-models-dev/src/error.rs @@ -34,3 +34,6 @@ pub enum CatalogError { #[error("catalog build error: {0}")] ModelCatalogBuild(#[from] ModelCatalogBuildError), } + +/// Convenience type alias for catalog operations. +pub type CatalogResult = Result; diff --git a/src/llm-coding-tools-models-dev/src/lib.rs b/src/llm-coding-tools-models-dev/src/lib.rs index 82e92d1b..a635ec71 100644 --- a/src/llm-coding-tools-models-dev/src/lib.rs +++ b/src/llm-coding-tools-models-dev/src/lib.rs @@ -6,4 +6,4 @@ pub mod error; pub use cache::shared_cache_path; pub use catalog::{CatalogLoadResult, CatalogLoadSource, ModelsDevCatalog}; -pub use error::CatalogError; +pub use error::{CatalogError, CatalogResult}; From cd905182eb398839dc9a5ce6bd394791f1ddc6d6 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Sat, 28 Feb 2026 20:50:47 +0000 Subject: [PATCH 03/22] Added: models-dev cache container and blocking runtime support --- .github/workflows/rust.yml | 3 + src/.cargo/verify.ps1 | 5 +- src/.cargo/verify.sh | 5 +- src/Cargo.lock | 51 +++ src/llm-coding-tools-models-dev/Cargo.toml | 22 +- src/llm-coding-tools-models-dev/README.md | 59 ++- .../src/cache/format.rs | 384 ++++++++++++++++++ .../src/cache/mod.rs | 17 +- .../src/cache/path.rs | 8 +- .../src/catalog/mod.rs | 24 ++ src/llm-coding-tools-models-dev/src/error.rs | 4 + .../src/fs/blocking_impl.rs | 34 ++ src/llm-coding-tools-models-dev/src/fs/mod.rs | 45 ++ .../src/fs/tokio_impl.rs | 35 ++ src/llm-coding-tools-models-dev/src/lib.rs | 8 + 15 files changed, 673 insertions(+), 31 deletions(-) create mode 100644 src/llm-coding-tools-models-dev/src/cache/format.rs create mode 100644 src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs create mode 100644 src/llm-coding-tools-models-dev/src/fs/mod.rs create mode 100644 src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index f24cfd1f..ee44bdc9 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -127,6 +127,7 @@ jobs: use-cross: ${{ matrix.use-cross }} packages: | llm-coding-tools-core + llm-coding-tools-models-dev no-default-features: true features: "blocking" @@ -137,12 +138,14 @@ jobs: RUSTDOCFLAGS: "-D warnings" run: | cargo doc -p llm-coding-tools-core --no-default-features --features blocking --document-private-items --no-deps --target ${{ matrix.target }} + cargo doc -p llm-coding-tools-models-dev --no-default-features --features blocking --document-private-items --no-deps --target ${{ matrix.target }} - name: Run linter (Blocking) if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/') working-directory: src run: | cargo clippy -p llm-coding-tools-core --no-default-features --features blocking --target ${{ matrix.target }} -- -D warnings + cargo clippy -p llm-coding-tools-models-dev --no-default-features --features blocking --target ${{ matrix.target }} -- -D warnings - name: Run formatter check uses: actions-rust-lang/rustfmt@v1 diff --git a/src/.cargo/verify.ps1 b/src/.cargo/verify.ps1 index b90d0507..912389f1 100644 --- a/src/.cargo/verify.ps1 +++ b/src/.cargo/verify.ps1 @@ -2,8 +2,8 @@ # All steps must pass without warnings # Keep in sync with verify.sh # -# Note: llm-coding-tools-serdesai and llm-coding-tools-models-dev are async-only. -# The blocking feature only applies to llm-coding-tools-core. +# Note: llm-coding-tools-serdesai is async-only. +# Blocking mode is validated for core and models-dev. $ErrorActionPreference = "Stop" @@ -51,6 +51,7 @@ Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-models-dev", "- Write-Host "Testing blocking feature..." Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-core", "--no-default-features", "--features", "blocking", "--quiet") +Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-models-dev", "--no-default-features", "--features", "blocking", "--quiet") Write-Host "Docs..." $originalRustdocFlags = $env:RUSTDOCFLAGS diff --git a/src/.cargo/verify.sh b/src/.cargo/verify.sh index a6f4f416..eefd0e0b 100755 --- a/src/.cargo/verify.sh +++ b/src/.cargo/verify.sh @@ -3,8 +3,8 @@ # All steps must pass without warnings # Keep in sync with verify.ps1 # -# Note: llm-coding-tools-serdesai and llm-coding-tools-models-dev are async-only. -# The blocking feature only applies to llm-coding-tools-core. +# Note: llm-coding-tools-serdesai is async-only. +# Blocking mode is validated for core and models-dev. set -e @@ -40,6 +40,7 @@ run_cmd cargo clippy -p llm-coding-tools-models-dev --quiet -- -D warnings echo "Testing blocking feature..." run_cmd cargo test -p llm-coding-tools-core --no-default-features --features blocking --quiet +run_cmd cargo test -p llm-coding-tools-models-dev --no-default-features --features blocking --quiet echo "Docs..." run_cmd env RUSTDOCFLAGS="-D warnings" cargo doc --workspace --no-deps --quiet diff --git a/src/Cargo.lock b/src/Cargo.lock index 08986572..25cb087f 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -360,6 +360,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "concat-idents" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f76990911f2267d837d9d0ad060aa63aaad170af40904b29461734c339030d4d" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "const-random" version = "0.1.18" @@ -652,6 +662,29 @@ dependencies = [ "encoding_rs", ] +[[package]] +name = "endian-writer" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5fba6714ed232b3a46d07255c9cb2d20e9a8aee06a20d5d2e3eb4e2b48d28ae" +dependencies = [ + "concat-idents", + "paste", +] + +[[package]] +name = "endian-writer-derive" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "160e7b32d1a63d6f02993f5ce2da2b7125480ae40c45d9a0b74d158f203f7e53" +dependencies = [ + "endian-writer", + "memoffset", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -1468,7 +1501,10 @@ version = "0.1.0" dependencies = [ "bitcode", "dirs", + "endian-writer", + "endian-writer-derive", "llm-coding-tools-core", + "maybe-async", "reqwest 0.13.1", "serde", "tempfile", @@ -1562,6 +1598,15 @@ dependencies = [ "libc", ] +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.17" @@ -1673,6 +1718,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "percent-encoding" version = "2.3.2" diff --git a/src/llm-coding-tools-models-dev/Cargo.toml b/src/llm-coding-tools-models-dev/Cargo.toml index 5ee01d31..4aab2702 100644 --- a/src/llm-coding-tools-models-dev/Cargo.toml +++ b/src/llm-coding-tools-models-dev/Cargo.toml @@ -10,14 +10,21 @@ readme = "README.md" [features] default = ["tokio"] +# Base async signatures (enabled by runtime features) +async = [] # Async with tokio runtime -tokio = ["dep:tokio", "dep:reqwest"] +tokio = ["async", "dep:tokio", "dep:reqwest", "llm-coding-tools-core/tokio"] +# Blocking/sync mode - mutually exclusive with tokio/async +blocking = [ + "dep:reqwest", + "reqwest/blocking", + "llm-coding-tools-core/blocking", + "maybe-async/is_sync", +] [dependencies] # Core library for ModelCatalog and related types -llm-coding-tools-core = { path = "../llm-coding-tools-core", version = "0.2.0", features = [ - "tokio", -] } +llm-coding-tools-core = { path = "../llm-coding-tools-core", version = "0.2.0", default-features = false } # Cross-platform cache directory detection dirs = "6.0.0" @@ -34,6 +41,13 @@ bitcode = "0.6.9" # Compression for cache payload zstd = "0.13.3" +# Shared async/sync implementation for load/cache APIs +maybe-async = "0.2" + +# Endian-aware fixed-header serialization helpers +endian-writer = "2.2.0" +endian-writer-derive = "0.1.0" + # JSON parsing for models.dev API responses serde = { version = "1.0.228", features = ["derive"] } diff --git a/src/llm-coding-tools-models-dev/README.md b/src/llm-coding-tools-models-dev/README.md index 8ea3ad47..353a16e5 100644 --- a/src/llm-coding-tools-models-dev/README.md +++ b/src/llm-coding-tools-models-dev/README.md @@ -1,32 +1,32 @@ # llm-coding-tools-models-dev -models.dev catalog ingestion with online-first sync and local cache fallback. - -This crate loads provider/model data from models.dev and builds a -`llm_coding_tools_core::models::ModelCatalog`. +Reads the online models.dev catalog into llm-coding-tools-core; with support +for a cached fallback and caching via ETag(s). ## Why this exists -If you run coding agents against many providers, you usually want all three: +If you run coding agents against many providers, you want to have fresh data. +[models.dev][models.dev] is one such source of data. -- Fresh data when online. -- A reliable fallback when offline. -- A compact cache that is cheap to load. +This crate has the sufficient code to download from models.dev, distill down only +the relevant data we need; and create a llm_coding_tools_core `ModelCatalog`. -That is the core goal here. +## Usage -## What it does +### Load flow (simple) -- **Online-first sync**: Sends conditional requests with `If-None-Match` and reuses cache on `304 Not Modified`. -- **Implicit fallback**: If network sync fails, loads the last valid cache automatically. -- **Compact storage**: Stores cache as prelude + ETag + `zstd(bitcode(payload))`. -- **Minimal API**: Exposes `ModelsDevCatalog::load()` and `ModelsDevCatalog::load_at(...)`. +1. Read cache header (if present) and get the old ETag. +2. Send request to models.dev with `If-None-Match` when ETag exists. +3. If server returns `304 Not Modified`, load catalog from cache. +4. If server returns `200 OK`, parse and normalize JSON, write fresh cache, then build catalog. +5. If network fails, try cached data as fallback; if no valid cache exists, return an error. -## Usage +### Non-blocking (`tokio`) ```rust use llm_coding_tools_models_dev::{CatalogLoadSource, ModelsDevCatalog}; +#[cfg(feature = "tokio")] async fn load_catalog() -> Result<(), Box> { let result = ModelsDevCatalog::load().await?; @@ -45,6 +45,30 @@ async fn load_catalog() -> Result<(), Box> { } ``` +### Blocking (`blocking`) + +```rust +use llm_coding_tools_models_dev::{CatalogLoadSource, ModelsDevCatalog}; + +#[cfg(feature = "blocking")] +fn load_catalog() -> Result<(), Box> { + let result = ModelsDevCatalog::load()?; + + match result.source { + CatalogLoadSource::Downloaded => println!("Downloaded fresh snapshot."), + CatalogLoadSource::NotModifiedCache => println!("Cache is already up to date."), + CatalogLoadSource::FallbackCache => println!("Network unavailable, using cached snapshot."), + } + + if let Some(entry) = result.catalog.lookup("openai", "gpt-4") { + println!("provider api url: {}", entry.0.api_url); + println!("max input tokens: {}", entry.1.max_input); + } + + Ok(()) +} +``` + ## Cache location By default, cache is stored in the platform cache directory: @@ -58,7 +82,12 @@ Set `LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH` to override this path. ## Feature flags - `tokio` (default): async runtime support. +- `blocking`: synchronous runtime support. + +Exactly one runtime mode must be enabled. ## License Apache-2.0 + +[models.dev]: https://models.dev diff --git a/src/llm-coding-tools-models-dev/src/cache/format.rs b/src/llm-coding-tools-models-dev/src/cache/format.rs new file mode 100644 index 00000000..4f910404 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/cache/format.rs @@ -0,0 +1,384 @@ +//! Cache container layout and read/write helpers. +//! +//! The on-disk layout for `models.dev.catalog.v1.cache` is: +//! +//! ```text +//! [0..12) 12-byte fixed prelude: +//! - [0..4) etag_len: u32 little-endian +//! - [4..8) payload_len_compressed: u32 little-endian +//! - [8..12) payload_len_decompressed: u32 little-endian +//! [12..N) raw ETag bytes (etag_len bytes, may be 0) +//! [N..EOF) compressed payload (rest of file) +//! ``` +//! +//! Versioning is keyed by filename (`*.v1.cache`), so this prelude carries +//! lengths only and no magic marker. +//! `payload_len_compressed` is retained so reads can detect unexpected file +//! truncation before decode. +//! +//! Read path intentionally keeps payload compressed. We read the whole file in +//! one pre-sized allocation, then parse/slice into `prelude`, `etag`, and +//! `payload` views without additional copying. +//! +//! ## Safety +//! +//! Not a 'safe' parser. We assume the file was created by the user. +//! There's no validation for erroneous data; e.g. malociously crafted headers. +//! Only validation for accidental corruption/truncation (e.g., from partial writes) is included. + +use crate::{ + error::{CatalogError, CatalogResult}, + fs, +}; +use endian_writer::{EndianReader, EndianWriter, HasSize, LittleEndianReader, LittleEndianWriter}; +use endian_writer_derive::EndianWritable; +use std::mem::size_of; +use std::path::Path; +use std::ptr::copy_nonoverlapping; + +/// Fixed v1 prelude, encoded little-endian. +#[derive(Debug, Clone, Copy, PartialEq, Eq, EndianWritable)] +#[repr(C)] +struct CachePreludeV1 { + /// Length in bytes of the optional ETag block. + etag_len: u32, + /// Length in bytes of compressed payload as written to disk. + payload_len_compressed: u32, + /// Length in bytes after decompression. + payload_len_decompressed: u32, +} + +/// Input parameters for writing a cache container. +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct CacheWriteInput<'a> { + /// Optional ETag bytes (e.g., HTTP ETag value). + pub(crate) etag: Option<&'a [u8]>, + /// Compressed payload bytes. + pub(crate) payload_compressed: &'a [u8], + /// Expected decompressed payload length in bytes. + pub(crate) payload_len_decompressed: usize, +} + +/// Fixed prelude size for v1. +const CACHE_HEADER_LEN: usize = ::SIZE; + +// SAFETY: All modern platforms have usize >= 32 bits. +// This lets us safely cast u32 lengths to usize without checked arithmetic. +const _: () = assert!(size_of::() >= size_of::()); + +/// Raw cache blocks extracted from disk. +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct CacheFileData { + /// Prefix length of ETag bytes after the fixed prelude. + etag_len: u32, + /// Length in bytes of compressed payload from prelude. + payload_len_compressed: u32, + /// Size hint for the eventual decompressed payload allocation. + payload_len_decompressed: u32, + /// Full file bytes laid out as `prelude || etag || payload_compressed`. + file_bytes: Box<[u8]>, +} + +impl CacheFileData { + /// Returns the optional ETag as a borrowed byte slice. + #[inline] + pub(crate) fn etag_bytes(&self) -> Option<&[u8]> { + let etag_start = CACHE_HEADER_LEN; + let etag_end = CACHE_HEADER_LEN + self.etag_len as usize; + let etag = &self.file_bytes[etag_start..etag_end]; + if etag.is_empty() { + None + } else { + Some(etag) + } + } + + /// Returns compressed payload bytes as a borrowed slice. + #[inline] + pub(crate) fn payload_compressed(&self) -> &[u8] { + let payload_start = CACHE_HEADER_LEN + self.etag_len as usize; + &self.file_bytes[payload_start..] + } + + /// Returns compressed payload length in bytes. + #[inline] + pub(crate) fn payload_len_compressed(&self) -> u32 { + self.payload_len_compressed + } + + /// Returns expected decompressed payload length in bytes. + #[inline] + pub(crate) fn payload_len_decompressed(&self) -> u32 { + self.payload_len_decompressed + } +} + +/// Reads a cache container from disk. +/// +/// This reads only the prelude + raw blocks and does not decompress payload. +/// Compressed payload length is validated against prelude metadata to catch +/// unexpected truncation or trailing bytes before decode. +/// +/// # Errors +/// +/// Returns [`CatalogError::CacheFormat`] when the prelude is truncated, when +/// encoded lengths overflow platform limits, or when declared block lengths do not +/// match file contents. +#[maybe_async::maybe_async] +pub(crate) async fn read_cache_file(path: &Path) -> CatalogResult { + let file_bytes = fs::read(path).await?; + if file_bytes.len() < CACHE_HEADER_LEN { + return Err(CatalogError::CacheFormat("cache prelude is truncated")); + } + + let prelude = decode_prelude(&file_bytes[..CACHE_HEADER_LEN]); + let etag_len = prelude.etag_len as usize; + let payload_len_compressed = prelude.payload_len_compressed as usize; + let expected_total = CACHE_HEADER_LEN + etag_len + payload_len_compressed; + + if file_bytes.len() != expected_total { + return Err(CatalogError::CacheFormat( + "cache file size mismatch (possible truncation or trailing data)", + )); + } + + Ok(CacheFileData { + etag_len: prelude.etag_len, + payload_len_compressed: prelude.payload_len_compressed, + payload_len_decompressed: prelude.payload_len_decompressed, + file_bytes, + }) +} + +/// Writes a cache container to disk. +/// +/// # Errors +/// +/// Returns [`CatalogError::CacheFormat`] if a block length exceeds v1 `u32` +/// limits. +#[maybe_async::maybe_async] +pub(crate) async fn write_cache_file( + path: &Path, + input: &CacheWriteInput<'_>, +) -> CatalogResult<()> { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).await?; + } + + let etag_bytes = input.etag.unwrap_or(&[]); + let prelude = CachePreludeV1 { + etag_len: to_u32_limit(etag_bytes.len(), "etag exceeds v1 length limits")?, + payload_len_compressed: to_u32_limit( + input.payload_compressed.len(), + "compressed payload exceeds v1 length limits", + )?, + payload_len_decompressed: to_u32_limit( + input.payload_len_decompressed, + "decompressed payload exceeds v1 length limits", + )?, + }; + + let encoded_prelude = encode_prelude(prelude); + + let encoded_len = CACHE_HEADER_LEN + .checked_add(etag_bytes.len()) + .and_then(|value| value.checked_add(input.payload_compressed.len())) + .ok_or(CatalogError::CacheFormat( + "cache file exceeds platform length limits", + ))?; + + let mut uninit = fs::alloc_uninit_u8_slice(encoded_len); + let ptr = uninit.as_mut_ptr().cast::(); + + unsafe { + copy_nonoverlapping(encoded_prelude.as_ptr(), ptr, CACHE_HEADER_LEN); + copy_nonoverlapping( + etag_bytes.as_ptr(), + ptr.add(CACHE_HEADER_LEN), + etag_bytes.len(), + ); + copy_nonoverlapping( + input.payload_compressed.as_ptr(), + ptr.add(CACHE_HEADER_LEN + etag_bytes.len()), + input.payload_compressed.len(), + ); + } + + let file_bytes = fs::assume_init_u8_slice(uninit); + fs::write(path, &file_bytes).await?; + Ok(()) +} + +#[inline] +fn to_u32_limit(value: usize, msg: &'static str) -> CatalogResult { + u32::try_from(value).map_err(|_| CatalogError::CacheFormat(msg)) +} + +/// Encodes prelude into little-endian bytes. +#[inline] +fn encode_prelude(prelude: CachePreludeV1) -> [u8; CACHE_HEADER_LEN] { + let mut bytes = [0_u8; CACHE_HEADER_LEN]; + // SAFETY: `bytes` has exactly the derived serialized size of `CachePreludeV1`. + unsafe { + let mut writer = LittleEndianWriter::new(bytes.as_mut_ptr()); + writer.write(&prelude); + } + bytes +} + +/// Decodes prelude from little-endian bytes. +#[inline] +fn decode_prelude(bytes: &[u8]) -> CachePreludeV1 { + // SAFETY: Caller guarantees `bytes` is at least `CACHE_PRELUDE_LEN`. + unsafe { + let mut reader = LittleEndianReader::new(bytes.as_ptr()); + reader.read() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + // Verifies prelude encoding/decoding preserves all fields. + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn prelude_layout_round_trips() { + let prelude = CachePreludeV1 { + etag_len: 13, + payload_len_compressed: 44, + payload_len_decompressed: 333, + }; + + let round_trip = decode_prelude(&encode_prelude(prelude)); + assert_eq!(round_trip, prelude); + } + + // Verifies full round-trip with ETag included. + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn write_then_read_round_trips_with_etag() { + let temp = TempDir::new().expect("tempdir"); + let path = temp.path().join("models.dev.catalog.v1.cache"); + + let input = CacheWriteInput { + etag: Some(b"etag-123"), + payload_compressed: b"payload-zstd-bytes", + payload_len_decompressed: 2048, + }; + write_cache_file(&path, &input).await.expect("write cache"); + let data = read_cache_file(&path).await.expect("read cache"); + + assert_eq!(data.etag_bytes(), input.etag); + assert_eq!(data.payload_compressed(), input.payload_compressed); + assert_eq!( + data.payload_len_compressed(), + input.payload_compressed.len() as u32 + ); + assert_eq!( + data.payload_len_decompressed(), + input.payload_len_decompressed as u32 + ); + } + + // Verifies full round-trip without ETag. + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn write_then_read_round_trips_without_etag() { + let temp = TempDir::new().expect("tempdir"); + let path = temp.path().join("models.dev.catalog.v1.cache"); + + let input = CacheWriteInput { + etag: None, + payload_compressed: b"payload-only", + payload_len_decompressed: 1024, + }; + write_cache_file(&path, &input).await.expect("write cache"); + let data = read_cache_file(&path).await.expect("read cache"); + + assert_eq!(data.etag_bytes(), input.etag); + assert_eq!(data.payload_compressed(), input.payload_compressed); + assert_eq!( + data.payload_len_decompressed(), + input.payload_len_decompressed as u32 + ); + } + + // Rejects files shorter than the fixed header. + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn read_rejects_truncated_prelude() { + // File is 1 byte shorter than required header + let temp = TempDir::new().expect("tempdir"); + let path = temp.path().join("short-prelude.cache"); + + std::fs::write(&path, [0_u8; CACHE_HEADER_LEN - 1]).expect("write fixture"); + let error = read_cache_file(&path) + .await + .expect_err("truncated prelude should fail"); + assert!(matches!(error, CatalogError::CacheFormat(_))); + } + + // Rejects when file ends before etag_len bytes after header. + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn read_rejects_short_etag_length() { + // Header claims 12 bytes of etag but only 4 provided + let temp = TempDir::new().expect("tempdir"); + let path = temp.path().join("short-etag.cache"); + + let prelude = CachePreludeV1 { + etag_len: 12, + payload_len_compressed: 0, + payload_len_decompressed: 0, + }; + let mut bytes = encode_prelude(prelude).to_vec(); + bytes.extend_from_slice(b"tiny"); // 'tiny' etag is 4 bytes + std::fs::write(&path, bytes).expect("write fixture"); + + // Header claims 12 bytes of etag but only 4 'tiny' provided, so 8 bytes short. + let error = read_cache_file(&path) + .await + .expect_err("short etag should fail"); + assert!(matches!(error, CatalogError::CacheFormat(_))); + } + + // Accepts minimal valid file with all zero-length fields. + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn read_supports_empty_etag_and_payload() { + let temp = TempDir::new().expect("tempdir"); + let path = temp.path().join("empty.cache"); + + let prelude = CachePreludeV1 { + etag_len: 0, + payload_len_compressed: 0, + payload_len_decompressed: 0, + }; + std::fs::write(&path, encode_prelude(prelude)).expect("write fixture"); + let data = read_cache_file(&path).await.expect("read empty cache"); + + assert_eq!(data.etag_bytes(), None); + assert!(data.payload_compressed().is_empty()); + assert_eq!(data.payload_len_compressed(), 0); + assert_eq!(data.payload_len_decompressed(), 0); + } + + // Rejects when declared compressed payload length does not match file size. + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn read_rejects_mismatched_payload_length() { + // Header claims 10 bytes payload but only 5 provided + let temp = TempDir::new().expect("tempdir"); + let path = temp.path().join("mismatched-payload-len.cache"); + + let prelude = CachePreludeV1 { + etag_len: 4, + payload_len_compressed: 10, + payload_len_decompressed: 0, + }; + let mut bytes = encode_prelude(prelude).to_vec(); + bytes.extend_from_slice(b"etag"); + bytes.extend_from_slice(b"short"); // only 5 bytes, not 10 here. + std::fs::write(&path, bytes).expect("write fixture"); + + let error = read_cache_file(&path) + .await + .expect_err("payload length mismatch should fail"); + assert!(matches!(error, CatalogError::CacheFormat(_))); + } +} diff --git a/src/llm-coding-tools-models-dev/src/cache/mod.rs b/src/llm-coding-tools-models-dev/src/cache/mod.rs index e1ae9be5..5043b9a5 100644 --- a/src/llm-coding-tools-models-dev/src/cache/mod.rs +++ b/src/llm-coding-tools-models-dev/src/cache/mod.rs @@ -1,8 +1,19 @@ -//! Cache path resolution and management. +//! Cache path and container utilities for models.dev snapshots. //! -//! This module handles cross-platform cache directory detection and -//! the default cache file path for models.dev catalogs. +//! Responsibilities are split by concern: +//! +//! - `path` resolves the shared cache location. +//! - `format` defines the cache container layout and read/write helpers. +//! +//! Runtime behavior follows crate features: +//! - `tokio` (default): async file I/O APIs. +//! - `blocking`: sync file I/O APIs. +//! +//! The public API currently exposes path resolution only; container helpers are +//! crate-internal until the sync/load flow is wired. +#[allow(dead_code)] // Wired into the load/sync path down the road +pub(crate) mod format; mod path; pub use crate::error::CatalogResult; diff --git a/src/llm-coding-tools-models-dev/src/cache/path.rs b/src/llm-coding-tools-models-dev/src/cache/path.rs index 06a43a7e..9aa5bf58 100644 --- a/src/llm-coding-tools-models-dev/src/cache/path.rs +++ b/src/llm-coding-tools-models-dev/src/cache/path.rs @@ -1,13 +1,14 @@ //! Cross-platform cache path resolution. -#![allow(dead_code)] - use crate::{error::CatalogResult, CatalogError}; use std::path::PathBuf; /// Environment variable name for overriding the default cache path. pub const CACHE_PATH_ENV_VAR: &str = "LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH"; +const CACHE_SUBDIR: &str = "llm-coding-tools"; +const CACHE_FILENAME: &str = "models.dev.catalog.v1.cache"; + /// Returns the shared cache path for the models.dev catalog. /// /// This function determines the appropriate cache location using the following @@ -43,9 +44,6 @@ pub const CACHE_PATH_ENV_VAR: &str = "LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH"; /// # Ok(()) /// # } /// ``` -const CACHE_SUBDIR: &str = "llm-coding-tools"; -const CACHE_FILENAME: &str = "models.dev.catalog.v1.cache"; - pub fn shared_cache_path() -> CatalogResult { // 1. Check env var first if let Ok(path) = std::env::var(CACHE_PATH_ENV_VAR) { diff --git a/src/llm-coding-tools-models-dev/src/catalog/mod.rs b/src/llm-coding-tools-models-dev/src/catalog/mod.rs index 2a826c81..af2a864e 100644 --- a/src/llm-coding-tools-models-dev/src/catalog/mod.rs +++ b/src/llm-coding-tools-models-dev/src/catalog/mod.rs @@ -50,6 +50,7 @@ impl ModelsDevCatalog { /// ``` /// use llm_coding_tools_models_dev::ModelsDevCatalog; /// + /// # #[cfg(feature = "tokio")] /// # async fn example() -> Result<(), Box> { /// let result = ModelsDevCatalog::load().await?; /// @@ -59,7 +60,18 @@ impl ModelsDevCatalog { /// } /// # Ok(()) /// # } + /// + /// # #[cfg(feature = "blocking")] + /// # fn example() -> Result<(), Box> { + /// # let result = ModelsDevCatalog::load()?; + /// // Use the catalog + /// # if let Some(entry) = result.catalog.lookup("openai", "gpt-4") { + /// # println!("API URL: {}", entry.0.api_url); + /// # } + /// # Ok(()) + /// # } /// ``` + #[maybe_async::maybe_async] pub async fn load() -> Result { todo!("ModelsDevCatalog::load() not yet implemented") } @@ -95,6 +107,7 @@ impl ModelsDevCatalog { /// use llm_coding_tools_models_dev::ModelsDevCatalog; /// use std::path::PathBuf; /// + /// # #[cfg(feature = "tokio")] /// # async fn example() -> Result<(), Box> { /// let cache_path = PathBuf::from("/tmp/my-cache.cache"); /// let result = ModelsDevCatalog::load_at(&cache_path).await?; @@ -105,7 +118,18 @@ impl ModelsDevCatalog { /// } /// # Ok(()) /// # } + /// + /// # #[cfg(feature = "blocking")] + /// # fn example() -> Result<(), Box> { + /// # let cache_path = PathBuf::from("/tmp/my-cache.cache"); + /// # let result = ModelsDevCatalog::load_at(&cache_path)?; + /// # if let Some(entry) = result.catalog.lookup("openai", "gpt-4") { + /// # println!("API URL: {}", entry.0.api_url); + /// # } + /// # Ok(()) + /// # } /// ``` + #[maybe_async::maybe_async] pub async fn load_at(path: impl AsRef) -> Result { let _path = path.as_ref(); todo!("ModelsDevCatalog::load_at() not yet implemented") diff --git a/src/llm-coding-tools-models-dev/src/error.rs b/src/llm-coding-tools-models-dev/src/error.rs index 02f1b5c9..c4da4a20 100644 --- a/src/llm-coding-tools-models-dev/src/error.rs +++ b/src/llm-coding-tools-models-dev/src/error.rs @@ -30,6 +30,10 @@ pub enum CatalogError { #[error("decode error: {0}")] BitcodeDecode(String), + /// The on-disk cache file is malformed or incompatible. + #[error("cache format error: {0}")] + CacheFormat(&'static str), + /// The catalog failed to build from source rows. #[error("catalog build error: {0}")] ModelCatalogBuild(#[from] ModelCatalogBuildError), diff --git a/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs new file mode 100644 index 00000000..5bfbc808 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs @@ -0,0 +1,34 @@ +//! Blocking/sync filesystem operations. + +use std::io::{ErrorKind, Read as _}; +use std::path::Path; + +/// Reads a file into memory in one pre-sized allocation. +#[inline] +pub(crate) fn read(path: impl AsRef) -> std::io::Result> { + let mut file = std::fs::File::open(path)?; + let file_len_u64 = file.metadata()?.len(); + let file_len = usize::try_from(file_len_u64).map_err(|_| { + std::io::Error::new(ErrorKind::InvalidData, "file is too large to fit in memory") + })?; + + let mut bytes = super::alloc_uninit_u8_slice(file_len); + if file_len != 0 { + let buf = super::uninit_u8_slice_as_mut_bytes(&mut bytes); + file.read_exact(buf)?; + } + + Ok(super::assume_init_u8_slice(bytes)) +} + +/// Writes all bytes to a file, creating or truncating it. +#[inline] +pub(crate) fn write(path: impl AsRef, bytes: &[u8]) -> std::io::Result<()> { + std::fs::write(path, bytes) +} + +/// Creates a directory and all parent directories. +#[inline] +pub(crate) fn create_dir_all(path: impl AsRef) -> std::io::Result<()> { + std::fs::create_dir_all(path) +} diff --git a/src/llm-coding-tools-models-dev/src/fs/mod.rs b/src/llm-coding-tools-models-dev/src/fs/mod.rs new file mode 100644 index 00000000..e08a6304 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/fs/mod.rs @@ -0,0 +1,45 @@ +//! Filesystem abstraction layer. +//! +//! Provides unified APIs that work with both sync and async runtimes. +//! Exactly one runtime feature must be enabled: +//! - `tokio`: Async operations using the tokio runtime +//! - `blocking`: Synchronous operations + +use std::mem::MaybeUninit; + +#[cfg(all(feature = "tokio", feature = "blocking"))] +compile_error!("Features `tokio` and `blocking` are mutually exclusive."); + +#[cfg(not(any(feature = "tokio", feature = "blocking")))] +compile_error!("Either `tokio` or `blocking` feature must be enabled for the fs module."); + +/// Allocates an uninitialized boxed byte slice with logical length `len`. +#[inline] +pub(crate) fn alloc_uninit_u8_slice(len: usize) -> Box<[MaybeUninit]> { + Box::<[u8]>::new_uninit_slice(len) +} + +/// Views an uninitialized `u8` slice as mutable bytes for initialization. +#[inline] +pub(crate) fn uninit_u8_slice_as_mut_bytes(bytes: &mut [MaybeUninit]) -> &mut [u8] { + // SAFETY: `MaybeUninit` has identical layout to `u8`; caller only uses + // returned slice for writes before reading. + unsafe { std::slice::from_raw_parts_mut(bytes.as_mut_ptr().cast::(), bytes.len()) } +} + +/// Converts a fully-initialized boxed uninitialized slice into initialized bytes. +#[inline] +pub(crate) fn assume_init_u8_slice(bytes: Box<[MaybeUninit]>) -> Box<[u8]> { + // SAFETY: caller guarantees all bytes were initialized. + unsafe { bytes.assume_init() } +} + +#[cfg(feature = "tokio")] +mod tokio_impl; +#[cfg(feature = "tokio")] +pub(crate) use tokio_impl::*; + +#[cfg(feature = "blocking")] +mod blocking_impl; +#[cfg(feature = "blocking")] +pub(crate) use blocking_impl::*; diff --git a/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs new file mode 100644 index 00000000..53474ce1 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs @@ -0,0 +1,35 @@ +//! Tokio-based async filesystem operations. + +use std::io::ErrorKind; +use std::path::Path; +use tokio::io::AsyncReadExt as _; + +/// Reads a file into memory in one pre-sized allocation. +#[inline] +pub(crate) async fn read(path: impl AsRef) -> std::io::Result> { + let mut file = tokio::fs::File::open(path).await?; + let file_len_u64 = file.metadata().await?.len(); + let file_len = usize::try_from(file_len_u64).map_err(|_| { + std::io::Error::new(ErrorKind::InvalidData, "file is too large to fit in memory") + })?; + + let mut bytes = super::alloc_uninit_u8_slice(file_len); + if file_len != 0 { + let buf = super::uninit_u8_slice_as_mut_bytes(&mut bytes); + file.read_exact(buf).await?; + } + + Ok(super::assume_init_u8_slice(bytes)) +} + +/// Writes all bytes to a file, creating or truncating it. +#[inline] +pub(crate) async fn write(path: impl AsRef, bytes: &[u8]) -> std::io::Result<()> { + tokio::fs::write(path, bytes).await +} + +/// Creates a directory and all parent directories. +#[inline] +pub(crate) async fn create_dir_all(path: impl AsRef) -> std::io::Result<()> { + tokio::fs::create_dir_all(path).await +} diff --git a/src/llm-coding-tools-models-dev/src/lib.rs b/src/llm-coding-tools-models-dev/src/lib.rs index a635ec71..0cdd10f5 100644 --- a/src/llm-coding-tools-models-dev/src/lib.rs +++ b/src/llm-coding-tools-models-dev/src/lib.rs @@ -1,8 +1,16 @@ #![doc = include_str!(concat!("../", env!("CARGO_PKG_README")))] +// Validate feature combinations at compile time. +#[cfg(all(feature = "async", feature = "blocking"))] +compile_error!("Features `async` and `blocking` are mutually exclusive."); + +#[cfg(not(any(feature = "async", feature = "blocking")))] +compile_error!("Either an async runtime (e.g., `tokio`) or `blocking` feature must be enabled."); + pub mod cache; pub mod catalog; pub mod error; +mod fs; pub use cache::shared_cache_path; pub use catalog::{CatalogLoadResult, CatalogLoadSource, ModelsDevCatalog}; From a4eeca37db971813cce4323b1ae875ab718a5e9d Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Fri, 6 Mar 2026 14:23:27 +0000 Subject: [PATCH 04/22] Refactor: Use vec![] instead of with_capacity + push in test --- .../src/models/catalog/internal/builder.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs index eb6b1fe5..3605bede 100644 --- a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs +++ b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs @@ -730,12 +730,11 @@ mod tests { )); } let last_provider_key = format!("provider_{}", 5461usize); - let mut provider_models = Vec::with_capacity(1); - provider_models.push(provider_model_source( + let provider_models = vec![ProviderModelSource::new( &last_provider_key, "m1", info(4096, 512), - )); + )]; let catalog = build_from_source(&providers, &provider_models).expect("boundary case should pass"); From d44a4d2e4699803e1b5eee3319dc8ebe66d26d0c Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Fri, 6 Mar 2026 18:19:41 +0000 Subject: [PATCH 05/22] Changed: Use borrowed strings in ProviderModelSource and add models.dev API mapping Optimize catalog construction by borrowing string keys instead of allocating, and add direct API parsing for models.dev data. Changes: - ProviderModelSource now borrows `&'a str` keys instead of owning `String`s - Added api module with schema parsing and catalog source mapping - Added serde_json dependency for API parsing - Updated benchmarks to work with borrowed strings - Refined documentation to reflect mapping approach Benefits: - Reduces allocations during catalog construction - Enables direct mapping from models.dev API to catalog - Keeps parse memory bounded by ignoring unused fields --- src/Cargo.lock | 1 + .../benches/model_catalog_builder.rs | 48 +- .../src/models/catalog/internal/builder.rs | 31 +- .../src/models/catalog/mod.rs | 4 +- .../models/catalog/public/builder_types.rs | 31 +- src/llm-coding-tools-models-dev/Cargo.toml | 1 + src/llm-coding-tools-models-dev/README.md | 10 +- .../src/api/catalog_sources.rs | 526 ++++++++++++++++++ .../src/api/mod.rs | 12 + .../src/api/schema.rs | 153 +++++ .../src/cache/mod.rs | 2 +- .../src/catalog/mod.rs | 5 +- src/llm-coding-tools-models-dev/src/error.rs | 4 + src/llm-coding-tools-models-dev/src/lib.rs | 2 + 14 files changed, 778 insertions(+), 52 deletions(-) create mode 100644 src/llm-coding-tools-models-dev/src/api/catalog_sources.rs create mode 100644 src/llm-coding-tools-models-dev/src/api/mod.rs create mode 100644 src/llm-coding-tools-models-dev/src/api/schema.rs diff --git a/src/Cargo.lock b/src/Cargo.lock index 25cb087f..7dee5401 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -1507,6 +1507,7 @@ dependencies = [ "maybe-async", "reqwest 0.13.1", "serde", + "serde_json", "tempfile", "thiserror 2.0.18", "tokio", diff --git a/src/llm-coding-tools-core/benches/model_catalog_builder.rs b/src/llm-coding-tools-core/benches/model_catalog_builder.rs index 4aedbac3..b9fcdd7d 100644 --- a/src/llm-coding-tools-core/benches/model_catalog_builder.rs +++ b/src/llm-coding-tools-core/benches/model_catalog_builder.rs @@ -7,9 +7,32 @@ use llm_coding_tools_core::models::{ ProviderType, }; +struct ProviderModelSpec { + provider_idx: usize, + model_key: String, + model: ModelInfo, +} + struct Dataset { providers: Vec, - provider_models: Vec, + provider_models: Vec, +} + +impl Dataset { + fn provider_model_sources(&self) -> Vec> { + let mut sources = Vec::with_capacity(self.provider_models.len()); + for provider_model in &self.provider_models { + let provider_key = self.providers[provider_model.provider_idx] + .provider_key + .as_str(); + sources.push(ProviderModelSource::new( + provider_key, + provider_model.model_key.as_str(), + provider_model.model, + )); + } + sources + } } fn make_dataset(provider_count: usize, model_count: usize) -> Dataset { @@ -47,17 +70,17 @@ fn make_dataset(provider_count: usize, model_count: usize) -> Dataset { None }; - provider_models.push(ProviderModelSource::new( - format!("provider-{provider_idx}"), - format!("org-{}/model-{i}", i % 17), - ModelInfo { + provider_models.push(ProviderModelSpec { + provider_idx, + model_key: format!("org-{}/model-{i}", i % 17), + model: ModelInfo { modalities: Modality::TEXT, max_input: 4096 + ((cfg as u32) * 32), max_output: 512 + ((cfg as u32) * 8), temperature, top_p, }, - )); + }); } Dataset { @@ -66,9 +89,8 @@ fn make_dataset(provider_count: usize, model_count: usize) -> Dataset { } } -fn construct_batch(dataset: &Dataset) { - let catalog = - ModelCatalog::build(&dataset.providers, &dataset.provider_models).expect("batch build"); +fn construct_batch(providers: &[ProviderSource], provider_models: &[ProviderModelSource<'_>]) { + let catalog = ModelCatalog::build(providers, provider_models).expect("batch build"); black_box(( catalog.provider_count(), @@ -85,12 +107,18 @@ fn benchmark_builder_construction(c: &mut Criterion) { ("max", 16384usize, 65535usize), ] { let dataset = make_dataset(provider_count, model_count); + let provider_model_sources = dataset.provider_model_sources(); group.throughput(Throughput::Elements( (provider_count + dataset.provider_models.len()) as u64, )); group.bench_with_input(BenchmarkId::new("batch", name), &dataset, |b, input| { - b.iter(|| construct_batch(black_box(input))) + b.iter(|| { + construct_batch( + black_box(&input.providers), + black_box(&provider_model_sources), + ) + }) }); } diff --git a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs index 3605bede..02a412d4 100644 --- a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs +++ b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs @@ -68,7 +68,7 @@ fn build_state_with_capacity( #[inline] pub(crate) fn build_from_source( providers: &[ProviderSource], - provider_models: &[ProviderModelSource], + provider_models: &[ProviderModelSource<'_>], ) -> Result { let provider_stats = analyze_provider_sources(providers)?; let mut state = build_state_with_capacity(provider_stats.provider_count, provider_models.len()); @@ -90,7 +90,7 @@ pub(crate) fn build_from_source( fn populate_tables_once( state: &mut BuildState, providers: &[ProviderSource], - provider_models: &[ProviderModelSource], + provider_models: &[ProviderModelSource<'_>], ) -> Result<(), ModelCatalogBuildError> { let mut env_start: u16 = 0; let mut provider_idx_by_key: AHashMap<&str, ProviderIdx> = @@ -127,18 +127,15 @@ fn populate_tables_once( for provider_model in provider_models { // Validate provider exists before inserting model. - if !provider_idx_by_key.contains_key(provider_model.provider_key.as_str()) { + if !provider_idx_by_key.contains_key(provider_model.provider_key) { return Err(ModelCatalogBuildError::ProviderKeyNotFoundForModel { - provider_key: provider_model.provider_key.clone(), - model_key: provider_model.model_key.clone(), + provider_key: provider_model.provider_key.to_owned(), + model_key: provider_model.model_key.to_owned(), }); } // Check for duplicate (provider_key, model_key) pair. - let key = ( - provider_model.provider_key.as_str(), - provider_model.model_key.as_str(), - ); + let key = (provider_model.provider_key, provider_model.model_key); if !seen_provider_models.insert(key) { return Err(ModelCatalogBuildError::DuplicateKey { table: LookupTableKind::ProviderModel, @@ -197,7 +194,7 @@ fn insert_provider( #[inline] fn insert_provider_model( state: &mut BuildState, - provider_model: &ProviderModelSource, + provider_model: &ProviderModelSource<'_>, ) -> Result<(), ModelCatalogBuildError> { let info = provider_model.model; @@ -240,8 +237,8 @@ fn insert_provider_model( let key = hash_provider_model_key( &state.hash_state, - &provider_model.provider_key, - &provider_model.model_key, + provider_model.provider_key, + provider_model.model_key, ); let hash48 = PackedProviderModelTableEntry::truncate_hash48(key.as_u64()); @@ -454,15 +451,15 @@ mod tests { ProviderSource::new(provider_key, provider) } - fn provider_model_source( - provider_key: &str, - model_key: &str, + fn provider_model_source<'a>( + provider_key: &'a str, + model_key: &'a str, model: ModelInfo, - ) -> ProviderModelSource { + ) -> ProviderModelSource<'a> { ProviderModelSource::new(provider_key, model_key, model) } - fn test_sources() -> (Vec, Vec) { + fn test_sources() -> (Vec, Vec>) { ( vec![provider_source( "alpha", diff --git a/src/llm-coding-tools-core/src/models/catalog/mod.rs b/src/llm-coding-tools-core/src/models/catalog/mod.rs index 9d99f090..4d7095c6 100644 --- a/src/llm-coding-tools-core/src/models/catalog/mod.rs +++ b/src/llm-coding-tools-core/src/models/catalog/mod.rs @@ -298,7 +298,7 @@ impl ModelCatalog { #[inline] pub fn build( providers: &[ProviderSource], - provider_models: &[ProviderModelSource], + provider_models: &[ProviderModelSource<'_>], ) -> Result { build_from_source(providers, provider_models) } @@ -562,7 +562,7 @@ mod tests { .into_iter() .map(|(key, info)| ProviderSource::new(key, info)) .collect(); - let provider_model_sources: Vec = provider_models + let provider_model_sources: Vec> = provider_models .into_iter() .map(|(provider_key, model_key, info)| { ProviderModelSource::new(provider_key, model_key, info) diff --git a/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs b/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs index 7167c3cc..28b45227 100644 --- a/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs +++ b/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs @@ -80,17 +80,22 @@ impl From<(String, ProviderInfo)> for ProviderSource { /// /// This wrapper keeps builder input self-documenting and avoids tuple-position /// ambiguity at call sites. +/// +/// The keys are borrowed because the catalog builder hashes them during +/// construction and does not retain them afterward. Callers must therefore keep +/// the referenced strings alive until [`crate::models::catalog::ModelCatalog::build`] +/// returns. #[derive(Debug, Clone, PartialEq)] -pub struct ProviderModelSource { - /// Provider identifier used by lookups (for example, `"openai"`). - pub provider_key: String, - /// Model identifier used by lookups (for example, `"gpt-4"`). - pub model_key: String, +pub struct ProviderModelSource<'a> { + /// Borrowed provider identifier used by lookups (for example, `"openai"`). + pub provider_key: &'a str, + /// Borrowed model identifier used by lookups (for example, `"gpt-4"`). + pub model_key: &'a str, /// Model metadata associated with [`Self::model_key`]. pub model: ModelInfo, } -impl ProviderModelSource { +impl<'a> ProviderModelSource<'a> { /// Creates a provider model source. /// /// # Parameters @@ -103,22 +108,18 @@ impl ProviderModelSource { /// /// A new [`ProviderModelSource`]. #[inline] - pub fn new( - provider_key: impl Into, - model_key: impl Into, - model: ModelInfo, - ) -> Self { + pub fn new(provider_key: &'a str, model_key: &'a str, model: ModelInfo) -> Self { Self { - provider_key: provider_key.into(), - model_key: model_key.into(), + provider_key, + model_key, model, } } } -impl From<(String, String, ModelInfo)> for ProviderModelSource { +impl<'a> From<(&'a str, &'a str, ModelInfo)> for ProviderModelSource<'a> { #[inline] - fn from((provider_key, model_key, model): (String, String, ModelInfo)) -> Self { + fn from((provider_key, model_key, model): (&'a str, &'a str, ModelInfo)) -> Self { Self { provider_key, model_key, diff --git a/src/llm-coding-tools-models-dev/Cargo.toml b/src/llm-coding-tools-models-dev/Cargo.toml index 4aab2702..934588c7 100644 --- a/src/llm-coding-tools-models-dev/Cargo.toml +++ b/src/llm-coding-tools-models-dev/Cargo.toml @@ -50,6 +50,7 @@ endian-writer-derive = "0.1.0" # JSON parsing for models.dev API responses serde = { version = "1.0.228", features = ["derive"] } +serde_json = "1.0.145" # Ergonomic error definitions thiserror = "2.0.18" diff --git a/src/llm-coding-tools-models-dev/README.md b/src/llm-coding-tools-models-dev/README.md index 353a16e5..a0818b1e 100644 --- a/src/llm-coding-tools-models-dev/README.md +++ b/src/llm-coding-tools-models-dev/README.md @@ -18,7 +18,7 @@ the relevant data we need; and create a llm_coding_tools_core `ModelCatalog`. 1. Read cache header (if present) and get the old ETag. 2. Send request to models.dev with `If-None-Match` when ETag exists. 3. If server returns `304 Not Modified`, load catalog from cache. -4. If server returns `200 OK`, parse and normalize JSON, write fresh cache, then build catalog. +4. If server returns `200 OK`, parse JSON, map it into catalog sources, write fresh cache, then build catalog. 5. If network fails, try cached data as fallback; if no valid cache exists, return an error. ### Non-blocking (`tokio`) @@ -31,9 +31,9 @@ async fn load_catalog() -> Result<(), Box> { let result = ModelsDevCatalog::load().await?; match result.source { - CatalogLoadSource::Downloaded => println!("Downloaded fresh snapshot."), + CatalogLoadSource::Downloaded => println!("Downloaded fresh catalog data."), CatalogLoadSource::NotModifiedCache => println!("Cache is already up to date."), - CatalogLoadSource::FallbackCache => println!("Network unavailable, using cached snapshot."), + CatalogLoadSource::FallbackCache => println!("Network unavailable, using cached catalog data."), } if let Some(entry) = result.catalog.lookup("openai", "gpt-4") { @@ -55,9 +55,9 @@ fn load_catalog() -> Result<(), Box> { let result = ModelsDevCatalog::load()?; match result.source { - CatalogLoadSource::Downloaded => println!("Downloaded fresh snapshot."), + CatalogLoadSource::Downloaded => println!("Downloaded fresh catalog data."), CatalogLoadSource::NotModifiedCache => println!("Cache is already up to date."), - CatalogLoadSource::FallbackCache => println!("Network unavailable, using cached snapshot."), + CatalogLoadSource::FallbackCache => println!("Network unavailable, using cached catalog data."), } if let Some(entry) = result.catalog.lookup("openai", "gpt-4") { diff --git a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs new file mode 100644 index 00000000..3593c33f --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs @@ -0,0 +1,526 @@ +//! models.dev API -> `ModelCatalog` mapping. +//! +//! This module parses models.dev `api.json`, maps provider/model metadata into +//! transient core builder inputs, and immediately constructs a [`ModelCatalog`]. +//! +//! Mapping policy: +//! - missing limits default to `0`; +//! - model modalities are mapped from `modalities.input[]`/`modalities.output[]` +//! into directional [`Modality`] flags; +//! - unknown npm package identifiers map to [`ProviderType::Unknown`]; +//! - unknown modality labels are ignored; if nothing maps, modalities default to +//! [`Modality::TEXT`]; +//! - model rows remain provider-scoped; shared configurations are deduplicated by +//! core during catalog build. + +use super::schema::{parse_api_json, ApiModelEntry, ApiModelLimit, ApiModelModalities}; +use crate::error::CatalogResult; +use llm_coding_tools_core::models::{ + Modality, ModelCatalog, ModelInfo, ProviderInfo, ProviderModelSource, ProviderSource, + ProviderType, +}; + +/// Parses models.dev `api.json` bytes and builds a [`ModelCatalog`]. +pub(crate) fn catalog_from_api_json_bytes(json_bytes: &[u8]) -> CatalogResult { + let provider_entries = parse_api_json(json_bytes)?; + let mut provider_model_count = 0usize; + for provider in provider_entries.values() { + provider_model_count = provider_model_count.saturating_add(provider.models.len()); + } + + let mut provider_rows = Vec::with_capacity(provider_entries.len()); + let mut model_rows = Vec::with_capacity(provider_model_count); + + for (provider_key, provider) in &provider_entries { + debug_assert!(provider.id.is_empty() || provider.id == *provider_key); + + let api_type = provider_type_from_models_dev_npm(provider.npm.as_deref()); + for (model_key, model_entry) in &provider.models { + model_rows.push(ProviderModelSource::new( + provider_key.as_str(), + model_key.as_str(), + model_info_from_entry(model_entry), + )); + } + + provider_rows.push(ProviderSource::new( + provider_key.as_str(), + ProviderInfo { + api_url: provider.api.clone().unwrap_or_default(), + env_vars: provider.env.clone(), + api_type, + }, + )); + } + + Ok(ModelCatalog::build(&provider_rows, &model_rows)?) +} + +#[inline] +fn model_info_from_entry(model_entry: &ApiModelEntry) -> ModelInfo { + let (max_input, max_output) = match model_entry.limit.as_ref() { + Some(limit) => (model_max_input(limit), limit.output), + None => (0, 0), + }; + let modalities = model_modalities(model_entry.modalities.as_ref()); + + ModelInfo { + modalities, + max_input, + max_output, + temperature: None, + top_p: None, + } +} + +#[inline] +fn model_modalities(raw: Option<&ApiModelModalities>) -> Modality { + let Some(raw) = raw else { + return Modality::TEXT; + }; + + let mut modalities = Modality::empty(); + for label in &raw.input { + modalities |= input_modality_flag(label.as_str()); + } + for label in &raw.output { + modalities |= output_modality_flag(label.as_str()); + } + + if modalities.is_empty() { + Modality::TEXT + } else { + modalities + } +} + +#[inline] +fn input_modality_flag(label: &str) -> Modality { + match label { + "text" => Modality::TEXT_INPUT, + "image" => Modality::IMAGE_INPUT, + "audio" => Modality::AUDIO_INPUT, + "video" => Modality::VIDEO_INPUT, + // `pdf` appears in models.dev input modalities. Core has no PDF bit yet, + // so map it to text-input capability as closest supported fallback. + "pdf" => Modality::TEXT_INPUT, + _ => Modality::empty(), + } +} + +#[inline] +fn output_modality_flag(label: &str) -> Modality { + match label { + "text" => Modality::TEXT_OUTPUT, + "image" => Modality::IMAGE_OUTPUT, + "audio" => Modality::AUDIO_OUTPUT, + "video" => Modality::VIDEO_OUTPUT, + _ => Modality::empty(), + } +} + +#[inline] +fn model_max_input(limit: &ApiModelLimit) -> u32 { + if limit.input == 0 { + limit.context + } else { + limit.input + } +} + +#[inline] +fn provider_type_from_models_dev_npm(npm_package: Option<&str>) -> ProviderType { + match npm_package { + Some("@ai-sdk/openai") => ProviderType::OpenAiCompletions, + Some("@ai-sdk/openai-responses") => ProviderType::OpenAiResponses, + Some("@ai-sdk/anthropic") => ProviderType::Anthropic, + Some("@ai-sdk/google") => ProviderType::Google, + Some("@ai-sdk/groq") => ProviderType::Groq, + Some("@ai-sdk/mistral") => ProviderType::Mistral, + Some("@ai-sdk/ollama") => ProviderType::Ollama, + Some("@ai-sdk/amazon-bedrock") => ProviderType::Bedrock, + Some("@ai-sdk/azure") => ProviderType::Azure, + Some("@openrouter/ai-sdk-provider") => ProviderType::OpenRouter, + Some("@ai-sdk/huggingface") => ProviderType::HuggingFace, + Some("@ai-sdk/cohere") => ProviderType::Cohere, + Some("@ai-sdk/chatgpt-oauth") => ProviderType::ChatGptOAuth, + Some("@ai-sdk/claude-code-oauth") => ProviderType::ClaudeCodeOAuth, + Some("@ai-sdk/antigravity") => ProviderType::Antigravity, + Some(_) | None => ProviderType::Unknown, + } +} + +#[cfg(test)] +mod tests { + use super::{catalog_from_api_json_bytes, provider_type_from_models_dev_npm}; + use llm_coding_tools_core::models::{Modality, ModelCatalog, ProviderType}; + + fn catalog(json: &[u8]) -> ModelCatalog { + catalog_from_api_json_bytes(json).expect("API payload should map") + } + + fn provider_snapshot( + catalog: &ModelCatalog, + provider_key: &str, + ) -> (String, Vec, ProviderType) { + let provider = catalog + .lookup_provider(provider_key) + .expect("provider should exist"); + ( + provider.api_url.to_string(), + provider + .env_vars() + .iter() + .map(|env_var| (*env_var).to_string()) + .collect(), + provider.api_type, + ) + } + + fn model_snapshot( + catalog: &ModelCatalog, + provider_key: &str, + model_key: &str, + ) -> (Modality, u32, u32, Option, Option) { + let model = catalog + .lookup_provider_model(provider_key, model_key) + .expect("provider model should exist"); + ( + model.modalities, + model.max_input, + model.max_output, + model.temperature(), + model.top_p(), + ) + } + + #[test] + fn catalog_source_mapping_maps_provider_rows() { + let api_json = br#" + { + "alpha": { + "id": "alpha", + "npm": "@ai-sdk/openai-responses", + "api": "https://alpha.example/v1", + "env": ["ALPHA_KEY"], + "models": {} + } + } + "#; + let catalog = catalog(api_json); + + assert_eq!(catalog.provider_count(), 1); + let provider = catalog + .lookup_provider("alpha") + .expect("alpha provider should exist"); + assert_eq!(provider.api_url, "https://alpha.example/v1"); + assert_eq!(provider.env_vars(), ["ALPHA_KEY"]); + assert_eq!(provider.api_type, ProviderType::OpenAiResponses); + } + + #[test] + fn catalog_source_mapping_defaults_missing_limits_to_zero() { + let api_json = br#" + { + "alpha": { + "id": "alpha", + "npm": null, + "api": null, + "env": [], + "models": { + "m1": {} + } + } + } + "#; + let catalog = catalog(api_json); + + assert_eq!(catalog.provider_model_count(), 1); + let model = catalog + .lookup_provider_model("alpha", "m1") + .expect("alpha/m1 should exist"); + assert_eq!(model.modalities, Modality::TEXT); + assert_eq!(model.max_input, 0); + assert_eq!(model.max_output, 0); + } + + #[test] + fn catalog_source_mapping_uses_limit_input_when_present() { + let api_json = br#" + { + "alpha": { + "id": "alpha", + "npm": null, + "api": null, + "env": [], + "models": { + "m1": { + "limit": { + "context": 128000, + "input": 124000, + "output": 4096 + } + } + } + } + } + "#; + let catalog = catalog(api_json); + + let model = catalog + .lookup_provider_model("alpha", "m1") + .expect("alpha/m1 should exist"); + assert_eq!(model.max_input, 124000); + assert_eq!(model.max_output, 4096); + } + + #[test] + fn catalog_source_mapping_maps_directional_modalities() { + let api_json = br#" + { + "alpha": { + "id": "alpha", + "npm": null, + "api": null, + "env": [], + "models": { + "m1": { + "modalities": { + "input": ["text", "image", "pdf"], + "output": ["text", "audio"] + }, + "limit": { "context": 4096, "output": 512 } + } + } + } + } + "#; + + let catalog = catalog(api_json); + let model = catalog + .lookup_provider_model("alpha", "m1") + .expect("alpha/m1 should exist"); + assert_eq!( + model.modalities, + Modality::TEXT_INPUT + | Modality::TEXT_OUTPUT + | Modality::IMAGE_INPUT + | Modality::AUDIO_OUTPUT + ); + } + + #[test] + fn catalog_source_mapping_maps_pdf_input_to_text_input() { + let api_json = br#" + { + "alpha": { + "id": "alpha", + "npm": null, + "api": null, + "env": [], + "models": { + "m1": { + "modalities": { + "input": ["pdf"], + "output": [] + } + } + } + } + } + "#; + + let catalog = catalog(api_json); + let model = catalog + .lookup_provider_model("alpha", "m1") + .expect("alpha/m1 should exist"); + assert_eq!(model.modalities, Modality::TEXT_INPUT); + } + + #[test] + fn catalog_source_mapping_falls_back_to_text_for_unknown_modalities() { + let api_json = br#" + { + "alpha": { + "id": "alpha", + "npm": null, + "api": null, + "env": [], + "models": { + "m1": { + "modalities": { + "input": ["binary"], + "output": ["embedding"] + } + } + } + } + } + "#; + + let catalog = catalog(api_json); + let model = catalog + .lookup_provider_model("alpha", "m1") + .expect("alpha/m1 should exist"); + assert_eq!(model.modalities, Modality::TEXT); + } + + #[test] + fn catalog_source_mapping_keeps_duplicate_model_ids_per_provider() { + let api_json = br#" + { + "alpha": { + "id": "alpha", + "npm": "@ai-sdk/openai", + "api": null, + "env": [], + "models": { + "m1": { + "modalities": { + "input": ["image"], + "output": ["text"] + }, + "limit": { "context": 4096, "output": 512 } + } + } + }, + "beta": { + "id": "beta", + "npm": "@ai-sdk/anthropic", + "api": null, + "env": [], + "models": { + "m1": { + "modalities": { + "input": ["audio"], + "output": ["video"] + }, + "limit": { "context": 8192, "output": 256 } + } + } + } + } + "#; + let catalog = catalog(api_json); + + assert_eq!(catalog.provider_model_count(), 2); + + let alpha_model = catalog + .lookup_provider_model("alpha", "m1") + .expect("alpha/m1 should exist"); + assert_eq!(alpha_model.max_input, 4096); + assert_eq!(alpha_model.max_output, 512); + assert_eq!( + alpha_model.modalities, + Modality::IMAGE_INPUT | Modality::TEXT_OUTPUT + ); + + let beta_model = catalog + .lookup_provider_model("beta", "m1") + .expect("beta/m1 should exist"); + assert_eq!(beta_model.max_input, 8192); + assert_eq!(beta_model.max_output, 256); + assert_eq!( + beta_model.modalities, + Modality::AUDIO_INPUT | Modality::VIDEO_OUTPUT + ); + } + + #[test] + fn catalog_source_mapping_keeps_same_data_for_different_input_key_order() { + let api_json_a = br#" + { + "beta": { + "id": "beta", + "npm": "@ai-sdk/anthropic", + "api": null, + "env": [], + "models": { + "m2": { "limit": { "context": 2048, "output": 512 } } + } + }, + "alpha": { + "id": "alpha", + "npm": "@ai-sdk/openai", + "api": null, + "env": [], + "models": { + "m1": { "limit": { "context": 1024, "output": 256 } } + } + } + } + "#; + + let api_json_b = br#" + { + "alpha": { + "id": "alpha", + "npm": "@ai-sdk/openai", + "api": null, + "env": [], + "models": { + "m1": { "limit": { "context": 1024, "output": 256 } } + } + }, + "beta": { + "id": "beta", + "npm": "@ai-sdk/anthropic", + "api": null, + "env": [], + "models": { + "m2": { "limit": { "context": 2048, "output": 512 } } + } + } + } + "#; + + let catalog_a = catalog(api_json_a); + let catalog_b = catalog(api_json_b); + + assert_eq!(catalog_a.provider_count(), catalog_b.provider_count()); + assert_eq!( + catalog_a.provider_model_count(), + catalog_b.provider_model_count() + ); + assert_eq!( + catalog_a.model_config_count(), + catalog_b.model_config_count() + ); + assert_eq!( + provider_snapshot(&catalog_a, "alpha"), + provider_snapshot(&catalog_b, "alpha") + ); + assert_eq!( + provider_snapshot(&catalog_a, "beta"), + provider_snapshot(&catalog_b, "beta") + ); + assert_eq!( + model_snapshot(&catalog_a, "alpha", "m1"), + model_snapshot(&catalog_b, "alpha", "m1") + ); + assert_eq!( + model_snapshot(&catalog_a, "beta", "m2"), + model_snapshot(&catalog_b, "beta", "m2") + ); + } + + #[test] + fn provider_type_mapping_handles_known_and_unknown_packages() { + assert_eq!( + provider_type_from_models_dev_npm(Some("@ai-sdk/openai")), + ProviderType::OpenAiCompletions + ); + assert_eq!( + provider_type_from_models_dev_npm(Some("@ai-sdk/google")), + ProviderType::Google + ); + assert_eq!( + provider_type_from_models_dev_npm(Some("@ai-sdk/openai-compatible")), + ProviderType::Unknown + ); + assert_eq!( + provider_type_from_models_dev_npm(None), + ProviderType::Unknown + ); + } +} diff --git a/src/llm-coding-tools-models-dev/src/api/mod.rs b/src/llm-coding-tools-models-dev/src/api/mod.rs new file mode 100644 index 00000000..730624ee --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/api/mod.rs @@ -0,0 +1,12 @@ +//! models.dev API parsing and catalog-source mapping. +//! +//! - [`schema`] parses upstream `https://models.dev/api.json` into a minimal +//! serde representation. +//! - [`catalog_sources`] maps parsed data into a +//! [`llm_coding_tools_core::models::ModelCatalog`]. +//! +//! Both modules intentionally keep only fields required by core catalog +//! construction so ingest stays fast and memory-bounded. + +pub(crate) mod catalog_sources; +pub(crate) mod schema; diff --git a/src/llm-coding-tools-models-dev/src/api/schema.rs b/src/llm-coding-tools-models-dev/src/api/schema.rs new file mode 100644 index 00000000..add3092e --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/api/schema.rs @@ -0,0 +1,153 @@ +//! Minimal models.dev API schema used by this crate. +//! +//! We deserialize only fields needed for catalog-source mapping: +//! provider metadata (`id`, `npm`, `api`, `env`) and model token limits +//! (`limit.context`, `limit.input`, `limit.output`) plus directional modalities +//! (`modalities.input[]`, `modalities.output[]`). +//! +//! Representative payload shape from `https://models.dev/api.json`: +//! +//! ```json +//! { +//! "openai": { +//! "id": "openai", +//! "npm": "@ai-sdk/openai", +//! "api": null, +//! "env": ["OPENAI_API_KEY"], +//! "models": { +//! "gpt-4o": { +//! "id": "gpt-4o", +//! "modalities": { +//! "input": ["text", "image"], +//! "output": ["text"] +//! }, +//! "limit": { +//! "context": 128000, +//! "output": 16384 +//! } +//! } +//! } +//! } +//! } +//! ``` +//! +//! Mapping into local structs: +//! - top-level provider map entry -> [`ApiProviderEntry`] +//! - `models.` object -> [`ApiModelEntry`] +//! - `models..modalities` object -> [`ApiModelModalities`] +//! - `models..limit` object -> [`ApiModelLimit`] +//! +//! Unknown fields are intentionally ignored so we can drop large unused sections +//! early and keep parse memory bounded. + +use crate::error::CatalogResult; +use serde::Deserialize; +use std::collections::HashMap; + +#[derive(Debug, Deserialize)] +pub(crate) struct ApiProviderEntry { + pub(crate) id: String, + #[serde(default)] + pub(crate) npm: Option, + #[serde(default)] + pub(crate) api: Option, + #[serde(default)] + pub(crate) env: Vec, + #[serde(default)] + pub(crate) models: HashMap, +} + +#[derive(Debug, Deserialize)] +pub(crate) struct ApiModelEntry { + #[serde(default)] + pub(crate) limit: Option, + #[serde(default)] + pub(crate) modalities: Option, +} + +#[derive(Debug, Deserialize)] +pub(crate) struct ApiModelModalities { + #[serde(default)] + pub(crate) input: Vec, + #[serde(default)] + pub(crate) output: Vec, +} + +#[derive(Debug, Deserialize)] +pub(crate) struct ApiModelLimit { + #[serde(default)] + pub(crate) context: u32, + #[serde(default)] + pub(crate) input: u32, + #[serde(default)] + pub(crate) output: u32, +} + +/// Parses upstream `api.json` bytes into a provider map. +/// +/// Input must match the current models.dev shape: a flat top-level object where +/// each key is a provider id and each value is a provider entry. +#[inline] +pub(crate) fn parse_api_json( + json_bytes: &[u8], +) -> CatalogResult> { + Ok(serde_json::from_slice(json_bytes)?) +} + +#[cfg(test)] +mod tests { + use super::parse_api_json; + + #[test] + fn parse_api_json_supports_flat_provider_map() { + let api_json = br#"{"alpha":{"id":"alpha","npm":"@ai-sdk/openai","api":null,"env":["ALPHA_KEY"],"models":{"m1":{"modalities":{"input":["text","image"],"output":["text"]},"limit":{"context":4096,"output":512}}}}}"#; + let providers = parse_api_json(api_json).expect("API payload should parse"); + let provider = providers.get("alpha").expect("provider should exist"); + + assert_eq!(provider.id, "alpha"); + assert_eq!(provider.npm.as_deref(), Some("@ai-sdk/openai")); + assert_eq!(provider.env.as_slice(), ["ALPHA_KEY"]); + + let model = provider.models.get("m1").expect("model should exist"); + let modalities = model.modalities.as_ref().expect("modalities should exist"); + let limit = model.limit.as_ref().expect("limit should exist"); + assert_eq!(modalities.input.as_slice(), ["text", "image"]); + assert_eq!(modalities.output.as_slice(), ["text"]); + assert_eq!(limit.context, 4096); + assert_eq!(limit.output, 512); + } + + #[test] + fn parse_api_json_ignores_unknown_fields() { + let api_json = br#" + { + "alpha": { + "id": "alpha", + "name": "Alpha", + "npm": "@ai-sdk/openai", + "api": "https://alpha.example/v1", + "env": ["ALPHA_KEY"], + "models": { + "m1": { + "description": "ignored", + "limit": { + "context": 128000, + "input": 124000, + "output": 4096 + } + } + } + } + } + "#; + + let providers = parse_api_json(api_json).expect("API payload should parse"); + let provider = providers.get("alpha").expect("provider should exist"); + let model = provider.models.get("m1").expect("model should exist"); + let limit = model.limit.as_ref().expect("limit should exist"); + + assert_eq!(limit.context, 128000); + assert_eq!(limit.input, 124000); + assert_eq!(limit.output, 4096); + } +} diff --git a/src/llm-coding-tools-models-dev/src/cache/mod.rs b/src/llm-coding-tools-models-dev/src/cache/mod.rs index 5043b9a5..966419c9 100644 --- a/src/llm-coding-tools-models-dev/src/cache/mod.rs +++ b/src/llm-coding-tools-models-dev/src/cache/mod.rs @@ -1,4 +1,4 @@ -//! Cache path and container utilities for models.dev snapshots. +//! Cache path and container utilities for models.dev catalog data. //! //! Responsibilities are split by concern: //! diff --git a/src/llm-coding-tools-models-dev/src/catalog/mod.rs b/src/llm-coding-tools-models-dev/src/catalog/mod.rs index af2a864e..dca3b904 100644 --- a/src/llm-coding-tools-models-dev/src/catalog/mod.rs +++ b/src/llm-coding-tools-models-dev/src/catalog/mod.rs @@ -24,7 +24,8 @@ impl ModelsDevCatalog { /// This is the primary entry point for most use cases. It will: /// 1. Check for an existing cache and extract its ETag /// 2. Send a conditional GET request with `If-None-Match` - /// 3. On `200 OK`: download, normalize, cache, and return fresh data + /// 3. On `200 OK`: download, map the API payload into catalog sources, + /// cache it, and return fresh data /// 4. On `304 Not Modified`: decode and return cached data /// 5. On network failure: fall back to cached data if available /// @@ -43,7 +44,7 @@ impl ModelsDevCatalog { /// - The cache path cannot be determined and no cache exists /// - An HTTP error occurs and no cache is available for fallback /// - The cache is corrupted and cannot be decoded - /// - Catalog construction from normalized data fails + /// - Catalog construction from mapped catalog sources fails /// /// # Examples /// diff --git a/src/llm-coding-tools-models-dev/src/error.rs b/src/llm-coding-tools-models-dev/src/error.rs index c4da4a20..ba0b2b21 100644 --- a/src/llm-coding-tools-models-dev/src/error.rs +++ b/src/llm-coding-tools-models-dev/src/error.rs @@ -22,6 +22,10 @@ pub enum CatalogError { #[error("HTTP error: {0}")] Reqwest(#[from] reqwest::Error), + /// A JSON parse error occurred while decoding models.dev API JSON. + #[error("JSON parse error: {0}")] + Json(#[from] serde_json::Error), + /// A zstd decompression error occurred. #[error("decompression error: {0}")] Zstd(String), diff --git a/src/llm-coding-tools-models-dev/src/lib.rs b/src/llm-coding-tools-models-dev/src/lib.rs index 0cdd10f5..1b4eedae 100644 --- a/src/llm-coding-tools-models-dev/src/lib.rs +++ b/src/llm-coding-tools-models-dev/src/lib.rs @@ -7,6 +7,8 @@ compile_error!("Features `async` and `blocking` are mutually exclusive."); #[cfg(not(any(feature = "async", feature = "blocking")))] compile_error!("Either an async runtime (e.g., `tokio`) or `blocking` feature must be enabled."); +#[allow(dead_code)] // Wired into catalog build/load slices +mod api; pub mod cache; pub mod catalog; pub mod error; From 388c4a7a26b09f90267654d005c16e22318c23c4 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Fri, 6 Mar 2026 20:38:42 +0000 Subject: [PATCH 06/22] Added: ETag-based catalog caching with conditional GET and schema updates Adds HTTP caching with ETags, plus API schema cleanup and modality handling fixes. Changes: - Added `CatalogCachePayload` schema in `cache/payload.rs` for efficient bitcode serialization - Added `cache_payload_from_api_json_bytes()` for JSON-to-payload mapping - Added `load_catalog_from_cache_file_data()` for 304 Not Modified fast path - Added atomic cache writes via temp-file-plus-rename in `fs` module - Added `sync.rs` with conditional GET using `If-None-Match` header - Wired `ModelsDevCatalog::load()` and `load_at()` to delegate to sync layer - Added crate-private test URL override and mock server for unit testing - Removed unused `id` field from `ApiProviderEntry` (not in models.dev API) - Changed PDF input modality to return empty instead of mapping to TEXT_INPUT - Changed empty modalities to return empty instead of defaulting to TEXT - Added `bitcode` dependency for fast binary serialization Benefits: - Keeps public API minimal while enabling efficient caching - Aligns schema with actual models.dev API response structure --- src/Cargo.lock | 45 +++- src/llm-coding-tools-core/Cargo.toml | 3 + .../src/models/catalog/public/provider_idx.rs | 2 +- src/llm-coding-tools-core/src/models/mod.rs | 2 +- .../src/models/provider_type.rs | 2 +- src/llm-coding-tools-models-dev/Cargo.toml | 1 + src/llm-coding-tools-models-dev/README.md | 2 +- .../src/api/catalog_sources.rs | 154 ++++++++----- .../src/api/schema.rs | 4 +- .../src/cache/format.rs | 58 ++++- .../src/cache/mod.rs | 3 +- .../src/cache/payload.rs | 205 +++++++++++++++++ .../src/catalog/load_cache.rs | 89 ++++++++ .../src/catalog/mod.rs | 83 ++++++- .../src/catalog/sync.rs | 210 ++++++++++++++++++ .../src/catalog/test_utils.rs | 77 +++++++ .../src/fs/blocking_impl.rs | 6 + .../src/fs/tokio_impl.rs | 6 + 18 files changed, 879 insertions(+), 73 deletions(-) create mode 100644 src/llm-coding-tools-models-dev/src/cache/payload.rs create mode 100644 src/llm-coding-tools-models-dev/src/catalog/load_cache.rs create mode 100644 src/llm-coding-tools-models-dev/src/catalog/sync.rs create mode 100644 src/llm-coding-tools-models-dev/src/catalog/test_utils.rs diff --git a/src/Cargo.lock b/src/Cargo.lock index 7dee5401..68757b47 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -1469,6 +1469,7 @@ name = "llm-coding-tools-core" version = "0.2.0" dependencies = [ "ahash", + "bitcode", "bitfields", "bitflags", "criterion", @@ -1508,6 +1509,7 @@ dependencies = [ "reqwest 0.13.1", "serde", "serde_json", + "serial_test", "tempfile", "thiserror 2.0.18", "tokio", @@ -2306,6 +2308,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scc" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc" +dependencies = [ + "sdd", +] + [[package]] name = "schannel" version = "0.1.28" @@ -2346,6 +2357,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sdd" +version = "3.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca" + [[package]] name = "security-framework" version = "3.5.1" @@ -2674,6 +2691,32 @@ dependencies = [ "tracing", ] +[[package]] +name = "serial_test" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "911bd979bf1070a3f3aa7b691a3b3e9968f339ceeec89e08c280a8a22207a32f" +dependencies = [ + "futures-executor", + "futures-util", + "log", + "once_cell", + "parking_lot", + "scc", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a7d91949b85b0d2fb687445e448b40d322b6b3e4af6b44a29b21d9a5f33e6d9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "sha2" version = "0.10.9" @@ -2809,7 +2852,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" dependencies = [ "fastrand", - "getrandom 0.3.4", + "getrandom 0.4.2", "once_cell", "rustix", "windows-sys 0.61.2", diff --git a/src/llm-coding-tools-core/Cargo.toml b/src/llm-coding-tools-core/Cargo.toml index 3238bfa4..b88ce699 100644 --- a/src/llm-coding-tools-core/Cargo.toml +++ b/src/llm-coding-tools-core/Cargo.toml @@ -40,6 +40,9 @@ serde_json = "1.0" # Zero overhead compile time bitflag generation bitflags = "2.11.0" +# Fast binary serialization for catalog cache types +bitcode = "0.6.9" + # Compile-time generated packed bitfield structs for model metadata bitfields = "1.0.2" diff --git a/src/llm-coding-tools-core/src/models/catalog/public/provider_idx.rs b/src/llm-coding-tools-core/src/models/catalog/public/provider_idx.rs index ba3951e7..d82121a9 100644 --- a/src/llm-coding-tools-core/src/models/catalog/public/provider_idx.rs +++ b/src/llm-coding-tools-core/src/models/catalog/public/provider_idx.rs @@ -4,7 +4,7 @@ /// /// Used to reference a specific provider in the catalog's /// packed provider entry tables and string tables. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, bitcode::Encode, bitcode::Decode)] #[repr(transparent)] pub struct ProviderIdx(pub(crate) u16); diff --git a/src/llm-coding-tools-core/src/models/mod.rs b/src/llm-coding-tools-core/src/models/mod.rs index 17137b62..495de325 100644 --- a/src/llm-coding-tools-core/src/models/mod.rs +++ b/src/llm-coding-tools-core/src/models/mod.rs @@ -5,6 +5,6 @@ mod provider_type; pub use catalog::{ LookupTableKind, Modality, Model, ModelCatalog, ModelCatalogBuildError, ModelInfo, Provider, - ProviderInfo, ProviderModelSource, ProviderSource, + ProviderIdx, ProviderInfo, ProviderModelSource, ProviderSource, }; pub use provider_type::ProviderType; diff --git a/src/llm-coding-tools-core/src/models/provider_type.rs b/src/llm-coding-tools-core/src/models/provider_type.rs index af7c5d24..1b18e262 100644 --- a/src/llm-coding-tools-core/src/models/provider_type.rs +++ b/src/llm-coding-tools-core/src/models/provider_type.rs @@ -1,5 +1,5 @@ /// Provider behavior profile used by model resolver logic. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, bitcode::Encode, bitcode::Decode)] #[repr(u8)] pub enum ProviderType { /// Unknown or unsupported provider package. diff --git a/src/llm-coding-tools-models-dev/Cargo.toml b/src/llm-coding-tools-models-dev/Cargo.toml index 934588c7..26ef2550 100644 --- a/src/llm-coding-tools-models-dev/Cargo.toml +++ b/src/llm-coding-tools-models-dev/Cargo.toml @@ -61,3 +61,4 @@ tokio = { version = "1.49", features = ["fs", "io-util"], optional = true } [dev-dependencies] tokio = { version = "1.49", features = ["rt", "macros"] } tempfile = "3.26" +serial_test = "3" diff --git a/src/llm-coding-tools-models-dev/README.md b/src/llm-coding-tools-models-dev/README.md index a0818b1e..9a31b444 100644 --- a/src/llm-coding-tools-models-dev/README.md +++ b/src/llm-coding-tools-models-dev/README.md @@ -8,7 +8,7 @@ for a cached fallback and caching via ETag(s). If you run coding agents against many providers, you want to have fresh data. [models.dev][models.dev] is one such source of data. -This crate has the sufficient code to download from models.dev, distill down only +This crate has sufficient code to download from models.dev, distill down only the relevant data we need; and create a llm_coding_tools_core `ModelCatalog`. ## Usage diff --git a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs index 3593c33f..2a3c1ee7 100644 --- a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs +++ b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs @@ -14,46 +14,69 @@ //! core during catalog build. use super::schema::{parse_api_json, ApiModelEntry, ApiModelLimit, ApiModelModalities}; -use crate::error::CatalogResult; +use crate::cache::payload::{ + catalog_from_cache_payload, CachedModelRow, CachedProviderRow, CatalogCachePayload, +}; +use crate::error::{CatalogError, CatalogResult}; use llm_coding_tools_core::models::{ - Modality, ModelCatalog, ModelInfo, ProviderInfo, ProviderModelSource, ProviderSource, - ProviderType, + Modality, ModelCatalog, ModelCatalogBuildError, ModelInfo, ProviderIdx, ProviderType, }; -/// Parses models.dev `api.json` bytes and builds a [`ModelCatalog`]. -pub(crate) fn catalog_from_api_json_bytes(json_bytes: &[u8]) -> CatalogResult { +pub(crate) fn cache_payload_from_api_json_bytes( + json_bytes: &[u8], +) -> CatalogResult { let provider_entries = parse_api_json(json_bytes)?; - let mut provider_model_count = 0usize; - for provider in provider_entries.values() { - provider_model_count = provider_model_count.saturating_add(provider.models.len()); - } - let mut provider_rows = Vec::with_capacity(provider_entries.len()); - let mut model_rows = Vec::with_capacity(provider_model_count); + let provider_count = provider_entries.len(); + if provider_count > (u16::MAX as usize) + 1 { + return Err(CatalogError::ModelCatalogBuild( + ModelCatalogBuildError::TooManyProviders { + count: provider_count, + max: (u16::MAX as usize) + 1, + }, + )); + } - for (provider_key, provider) in &provider_entries { - debug_assert!(provider.id.is_empty() || provider.id == *provider_key); + let mut providers = Vec::with_capacity(provider_count); + let mut models = Vec::with_capacity( + provider_entries + .values() + .map(|provider| provider.models.len()) + .sum(), + ); + for (provider_key, provider) in provider_entries { + let provider_idx = ProviderIdx::new(providers.len() as u16); let api_type = provider_type_from_models_dev_npm(provider.npm.as_deref()); - for (model_key, model_entry) in &provider.models { - model_rows.push(ProviderModelSource::new( - provider_key.as_str(), - model_key.as_str(), - model_info_from_entry(model_entry), - )); - } - provider_rows.push(ProviderSource::new( - provider_key.as_str(), - ProviderInfo { - api_url: provider.api.clone().unwrap_or_default(), - env_vars: provider.env.clone(), - api_type, - }, - )); + providers.push(CachedProviderRow { + provider_key, + api_url: provider.api.unwrap_or_default(), + env_vars: provider.env, + api_type, + }); + + for (model_key, model_entry) in provider.models { + let model = model_info_from_entry(&model_entry); + models.push(CachedModelRow { + provider_idx, + model_key, + modalities_bits: model.modalities.bits(), + max_input: model.max_input, + max_output: model.max_output, + temperature: model.temperature, + top_p: model.top_p, + }); + } } - Ok(ModelCatalog::build(&provider_rows, &model_rows)?) + Ok(CatalogCachePayload { providers, models }) +} + +/// Parses models.dev `api.json` bytes and builds a [`ModelCatalog`]. +pub(crate) fn catalog_from_api_json_bytes(json_bytes: &[u8]) -> CatalogResult { + let payload = cache_payload_from_api_json_bytes(json_bytes)?; + catalog_from_cache_payload(payload) } #[inline] @@ -87,11 +110,7 @@ fn model_modalities(raw: Option<&ApiModelModalities>) -> Modality { modalities |= output_modality_flag(label.as_str()); } - if modalities.is_empty() { - Modality::TEXT - } else { - modalities - } + modalities } #[inline] @@ -101,10 +120,7 @@ fn input_modality_flag(label: &str) -> Modality { "image" => Modality::IMAGE_INPUT, "audio" => Modality::AUDIO_INPUT, "video" => Modality::VIDEO_INPUT, - // `pdf` appears in models.dev input modalities. Core has no PDF bit yet, - // so map it to text-input capability as closest supported fallback. - "pdf" => Modality::TEXT_INPUT, - _ => Modality::empty(), + _ => Modality::empty(), // pdf not supported } } @@ -152,8 +168,11 @@ fn provider_type_from_models_dev_npm(npm_package: Option<&str>) -> ProviderType #[cfg(test)] mod tests { - use super::{catalog_from_api_json_bytes, provider_type_from_models_dev_npm}; - use llm_coding_tools_core::models::{Modality, ModelCatalog, ProviderType}; + use super::{ + cache_payload_from_api_json_bytes, catalog_from_api_json_bytes, + provider_type_from_models_dev_npm, + }; + use llm_coding_tools_core::models::{Modality, ModelCatalog, ProviderIdx, ProviderType}; fn catalog(json: &[u8]) -> ModelCatalog { catalog_from_api_json_bytes(json).expect("API payload should map") @@ -194,12 +213,46 @@ mod tests { ) } + #[test] + fn cache_payload_maps_single_provider_with_models() { + let api_json = br#" + { + "openai": { + "npm": "@ai-sdk/openai", + "api": "https://api.openai.com/v1", + "env": ["OPENAI_API_KEY"], + "models": { + "gpt-4": { + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 8192, "output": 4096 } + } + } + } + } + "#; + + let payload = cache_payload_from_api_json_bytes(api_json).expect("payload should build"); + assert_eq!(payload.providers.len(), 1); + assert_eq!(payload.models.len(), 1); + + assert_eq!(payload.providers[0].provider_key, "openai"); + assert_eq!( + payload.providers[0].api_type, + ProviderType::OpenAiCompletions + ); + + assert_eq!(payload.models[0].provider_idx, ProviderIdx::new(0)); + assert_eq!(payload.models[0].model_key, "gpt-4"); + assert_eq!(payload.models[0].modalities_bits, Modality::TEXT.bits()); + assert_eq!(payload.models[0].max_input, 8192); + assert_eq!(payload.models[0].max_output, 4096); + } + #[test] fn catalog_source_mapping_maps_provider_rows() { let api_json = br#" { "alpha": { - "id": "alpha", "npm": "@ai-sdk/openai-responses", "api": "https://alpha.example/v1", "env": ["ALPHA_KEY"], @@ -223,7 +276,6 @@ mod tests { let api_json = br#" { "alpha": { - "id": "alpha", "npm": null, "api": null, "env": [], @@ -249,7 +301,6 @@ mod tests { let api_json = br#" { "alpha": { - "id": "alpha", "npm": null, "api": null, "env": [], @@ -279,7 +330,6 @@ mod tests { let api_json = br#" { "alpha": { - "id": "alpha", "npm": null, "api": null, "env": [], @@ -310,11 +360,10 @@ mod tests { } #[test] - fn catalog_source_mapping_maps_pdf_input_to_text_input() { + fn catalog_source_mapping_maps_pdf_input_to_empty() { let api_json = br#" { "alpha": { - "id": "alpha", "npm": null, "api": null, "env": [], @@ -334,15 +383,14 @@ mod tests { let model = catalog .lookup_provider_model("alpha", "m1") .expect("alpha/m1 should exist"); - assert_eq!(model.modalities, Modality::TEXT_INPUT); + assert_eq!(model.modalities, Modality::empty()); } #[test] - fn catalog_source_mapping_falls_back_to_text_for_unknown_modalities() { + fn catalog_source_mapping_falls_back_to_empty_for_unknown_modalities() { let api_json = br#" { "alpha": { - "id": "alpha", "npm": null, "api": null, "env": [], @@ -362,7 +410,7 @@ mod tests { let model = catalog .lookup_provider_model("alpha", "m1") .expect("alpha/m1 should exist"); - assert_eq!(model.modalities, Modality::TEXT); + assert_eq!(model.modalities, Modality::empty()); } #[test] @@ -370,7 +418,6 @@ mod tests { let api_json = br#" { "alpha": { - "id": "alpha", "npm": "@ai-sdk/openai", "api": null, "env": [], @@ -385,7 +432,6 @@ mod tests { } }, "beta": { - "id": "beta", "npm": "@ai-sdk/anthropic", "api": null, "env": [], @@ -431,7 +477,6 @@ mod tests { let api_json_a = br#" { "beta": { - "id": "beta", "npm": "@ai-sdk/anthropic", "api": null, "env": [], @@ -440,7 +485,6 @@ mod tests { } }, "alpha": { - "id": "alpha", "npm": "@ai-sdk/openai", "api": null, "env": [], @@ -454,7 +498,6 @@ mod tests { let api_json_b = br#" { "alpha": { - "id": "alpha", "npm": "@ai-sdk/openai", "api": null, "env": [], @@ -463,7 +506,6 @@ mod tests { } }, "beta": { - "id": "beta", "npm": "@ai-sdk/anthropic", "api": null, "env": [], diff --git a/src/llm-coding-tools-models-dev/src/api/schema.rs b/src/llm-coding-tools-models-dev/src/api/schema.rs index add3092e..3e0f4c12 100644 --- a/src/llm-coding-tools-models-dev/src/api/schema.rs +++ b/src/llm-coding-tools-models-dev/src/api/schema.rs @@ -1,7 +1,7 @@ //! Minimal models.dev API schema used by this crate. //! //! We deserialize only fields needed for catalog-source mapping: -//! provider metadata (`id`, `npm`, `api`, `env`) and model token limits +//! provider metadata (`npm`, `api`, `env`) and model token limits //! (`limit.context`, `limit.input`, `limit.output`) plus directional modalities //! (`modalities.input[]`, `modalities.output[]`). //! @@ -46,7 +46,6 @@ use std::collections::HashMap; #[derive(Debug, Deserialize)] pub(crate) struct ApiProviderEntry { - pub(crate) id: String, #[serde(default)] pub(crate) npm: Option, #[serde(default)] @@ -104,7 +103,6 @@ mod tests { let providers = parse_api_json(api_json).expect("API payload should parse"); let provider = providers.get("alpha").expect("provider should exist"); - assert_eq!(provider.id, "alpha"); assert_eq!(provider.npm.as_deref(), Some("@ai-sdk/openai")); assert_eq!(provider.env.as_slice(), ["ALPHA_KEY"]); diff --git a/src/llm-coding-tools-models-dev/src/cache/format.rs b/src/llm-coding-tools-models-dev/src/cache/format.rs index 4f910404..fbfad751 100644 --- a/src/llm-coding-tools-models-dev/src/cache/format.rs +++ b/src/llm-coding-tools-models-dev/src/cache/format.rs @@ -23,7 +23,7 @@ //! ## Safety //! //! Not a 'safe' parser. We assume the file was created by the user. -//! There's no validation for erroneous data; e.g. malociously crafted headers. +//! There's no validation for erroneous data; e.g. maliciously crafted headers. //! Only validation for accidental corruption/truncation (e.g., from partial writes) is included. use crate::{ @@ -33,7 +33,7 @@ use crate::{ use endian_writer::{EndianReader, EndianWriter, HasSize, LittleEndianReader, LittleEndianWriter}; use endian_writer_derive::EndianWritable; use std::mem::size_of; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::ptr::copy_nonoverlapping; /// Fixed v1 prelude, encoded little-endian. @@ -79,6 +79,13 @@ pub(crate) struct CacheFileData { file_bytes: Box<[u8]>, } +/// Returns a temporary path for atomic cache writes. +fn temp_cache_path(path: &Path) -> PathBuf { + let mut temp = path.as_os_str().to_os_string(); + temp.push(".tmp"); + PathBuf::from(temp) +} + impl CacheFileData { /// Returns the optional ETag as a borrowed byte slice. #[inline] @@ -134,7 +141,7 @@ pub(crate) async fn read_cache_file(path: &Path) -> CatalogResult let prelude = decode_prelude(&file_bytes[..CACHE_HEADER_LEN]); let etag_len = prelude.etag_len as usize; let payload_len_compressed = prelude.payload_len_compressed as usize; - let expected_total = CACHE_HEADER_LEN + etag_len + payload_len_compressed; + let expected_total = CACHE_HEADER_LEN + etag_len + payload_len_compressed; // unlikely to overflow. file is trusted. if file_bytes.len() != expected_total { return Err(CatalogError::CacheFormat( @@ -205,7 +212,9 @@ pub(crate) async fn write_cache_file( } let file_bytes = fs::assume_init_u8_slice(uninit); - fs::write(path, &file_bytes).await?; + let temp_path = temp_cache_path(path); + fs::write(&temp_path, &file_bytes).await?; + fs::rename(&temp_path, path).await?; Ok(()) } @@ -229,7 +238,7 @@ fn encode_prelude(prelude: CachePreludeV1) -> [u8; CACHE_HEADER_LEN] { /// Decodes prelude from little-endian bytes. #[inline] fn decode_prelude(bytes: &[u8]) -> CachePreludeV1 { - // SAFETY: Caller guarantees `bytes` is at least `CACHE_PRELUDE_LEN`. + // SAFETY: Caller guarantees `bytes` is at least `CACHE_HEADER_LEN`. unsafe { let mut reader = LittleEndianReader::new(bytes.as_ptr()); reader.read() @@ -381,4 +390,43 @@ mod tests { .expect_err("payload length mismatch should fail"); assert!(matches!(error, CatalogError::CacheFormat(_))); } + + // Verifies atomic replacement replaces existing cache file content. + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn write_replaces_existing_cache_atomically() { + let temp = TempDir::new().expect("tempdir"); + let path = temp.path().join("atomic-test.cache"); + + // Write first payload + let first_input = CacheWriteInput { + etag: Some(b"etag-1"), + payload_compressed: b"first-payload", + payload_len_decompressed: 100, + }; + write_cache_file(&path, &first_input) + .await + .expect("write first"); + + let first_data = read_cache_file(&path).await.expect("read first"); + assert_eq!(first_data.etag_bytes(), Some(b"etag-1".as_slice())); + assert_eq!(first_data.payload_compressed(), b"first-payload"); + + // Write second payload (atomic replacement) + let second_input = CacheWriteInput { + etag: Some(b"etag-2"), + payload_compressed: b"second-payload-different", + payload_len_decompressed: 200, + }; + write_cache_file(&path, &second_input) + .await + .expect("write second"); + + let second_data = read_cache_file(&path).await.expect("read second"); + assert_eq!(second_data.etag_bytes(), Some(b"etag-2".as_slice())); + assert_eq!( + second_data.payload_compressed(), + b"second-payload-different" + ); + assert_eq!(second_data.payload_len_decompressed(), 200); + } } diff --git a/src/llm-coding-tools-models-dev/src/cache/mod.rs b/src/llm-coding-tools-models-dev/src/cache/mod.rs index 966419c9..695f7660 100644 --- a/src/llm-coding-tools-models-dev/src/cache/mod.rs +++ b/src/llm-coding-tools-models-dev/src/cache/mod.rs @@ -15,6 +15,7 @@ #[allow(dead_code)] // Wired into the load/sync path down the road pub(crate) mod format; mod path; +pub(crate) mod payload; pub use crate::error::CatalogResult; -pub use path::shared_cache_path; +pub use path::{shared_cache_path, CACHE_PATH_ENV_VAR}; diff --git a/src/llm-coding-tools-models-dev/src/cache/payload.rs b/src/llm-coding-tools-models-dev/src/cache/payload.rs new file mode 100644 index 00000000..bf5bf25a --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/cache/payload.rs @@ -0,0 +1,205 @@ +use crate::error::{CatalogError, CatalogResult}; +use llm_coding_tools_core::models::{ + Modality, ModelCatalog, ModelInfo, ProviderIdx, ProviderInfo, ProviderModelSource, + ProviderSource, ProviderType, +}; + +#[derive(Debug, Clone, PartialEq, bitcode::Encode, bitcode::Decode)] +pub(crate) struct CatalogCachePayload { + pub(crate) providers: Vec, + pub(crate) models: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, bitcode::Encode, bitcode::Decode)] +pub(crate) struct CachedProviderRow { + pub(crate) provider_key: String, + pub(crate) api_url: String, + pub(crate) env_vars: Vec, + pub(crate) api_type: ProviderType, +} + +#[derive(Debug, Clone, PartialEq, bitcode::Encode, bitcode::Decode)] +pub(crate) struct CachedModelRow { + pub(crate) provider_idx: ProviderIdx, + pub(crate) model_key: String, + pub(crate) modalities_bits: u8, + pub(crate) max_input: u32, + pub(crate) max_output: u32, + pub(crate) temperature: Option, + pub(crate) top_p: Option, +} + +pub(crate) fn encode_cache_payload(payload: &CatalogCachePayload) -> Vec { + bitcode::encode(payload) +} + +pub(crate) fn decode_cache_payload(bytes: &[u8]) -> CatalogResult { + bitcode::decode(bytes).map_err(|error| CatalogError::BitcodeDecode(error.to_string())) +} + +pub(crate) fn catalog_from_cache_payload( + payload: CatalogCachePayload, +) -> CatalogResult { + let CatalogCachePayload { providers, models } = payload; + + let mut provider_sources = Vec::with_capacity(providers.len()); + for row in providers { + provider_sources.push(ProviderSource { + provider_key: row.provider_key, + provider: ProviderInfo { + api_url: row.api_url, + env_vars: row.env_vars, + api_type: row.api_type, + }, + }); + } + + let mut model_sources = Vec::with_capacity(models.len()); + for row in &models { + let provider_source = + provider_sources + .get(row.provider_idx.as_usize()) + .ok_or(CatalogError::CacheFormat( + "provider index out of range in cache payload", + ))?; + + model_sources.push(ProviderModelSource { + provider_key: provider_source.provider_key.as_str(), + model_key: row.model_key.as_str(), + model: ModelInfo { + modalities: Modality::from_bits_retain(row.modalities_bits), + max_input: row.max_input, + max_output: row.max_output, + temperature: row.temperature, + top_p: row.top_p, + }, + }); + } + + Ok(ModelCatalog::build(&provider_sources, &model_sources)?) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn sample_payload() -> CatalogCachePayload { + CatalogCachePayload { + providers: vec![CachedProviderRow { + provider_key: "openai".to_string(), + api_url: "https://api.openai.com/v1".to_string(), + env_vars: vec!["OPENAI_API_KEY".to_string()], + api_type: ProviderType::OpenAiCompletions, + }], + models: vec![CachedModelRow { + provider_idx: ProviderIdx::new(0), + model_key: "gpt-4".to_string(), + modalities_bits: Modality::TEXT.bits(), + max_input: 8192, + max_output: 4096, + temperature: Some(0.7), + top_p: Some(0.9), + }], + } + } + + #[test] + fn payload_round_trip() { + let original = sample_payload(); + let encoded = encode_cache_payload(&original); + let decoded = decode_cache_payload(&encoded).expect("decode should succeed"); + assert_eq!(original, decoded); + } + + #[test] + fn catalog_from_payload_reconstructs_provider() { + let payload = sample_payload(); + let catalog = catalog_from_cache_payload(payload).expect("catalog build should succeed"); + + let provider = catalog + .lookup_provider("openai") + .expect("provider should exist"); + assert_eq!(provider.api_url, "https://api.openai.com/v1"); + assert_eq!(provider.api_type, ProviderType::OpenAiCompletions); + } + + #[test] + fn catalog_from_payload_reconstructs_model() { + let payload = sample_payload(); + let catalog = catalog_from_cache_payload(payload).expect("catalog build should succeed"); + + let model = catalog + .lookup_provider_model("openai", "gpt-4") + .expect("model should exist"); + assert_eq!(model.max_input, 8192); + assert_eq!(model.max_output, 4096); + assert_eq!(model.modalities, Modality::TEXT); + } + + #[test] + fn catalog_from_payload_rejects_out_of_range_provider_idx() { + let payload = CatalogCachePayload { + providers: vec![CachedProviderRow { + provider_key: "test".to_string(), + api_url: "".to_string(), + env_vars: vec![], + api_type: ProviderType::Unknown, + }], + models: vec![CachedModelRow { + provider_idx: ProviderIdx::new(999), + model_key: "model".to_string(), + modalities_bits: Modality::TEXT.bits(), + max_input: 0, + max_output: 0, + temperature: None, + top_p: None, + }], + }; + + let result = catalog_from_cache_payload(payload); + assert!(matches!(result, Err(CatalogError::CacheFormat(_)))); + } + + #[test] + fn all_known_provider_types_round_trip() { + let types = [ + ProviderType::Unknown, + ProviderType::OpenAiCompletions, + ProviderType::OpenAiResponses, + ProviderType::Anthropic, + ProviderType::Google, + ProviderType::Groq, + ProviderType::Mistral, + ProviderType::Ollama, + ProviderType::Bedrock, + ProviderType::Azure, + ProviderType::OpenRouter, + ProviderType::HuggingFace, + ProviderType::Cohere, + ProviderType::ChatGptOAuth, + ProviderType::ClaudeCodeOAuth, + ProviderType::Antigravity, + ]; + + for provider_type in types { + let payload = CatalogCachePayload { + providers: vec![CachedProviderRow { + provider_key: "test".to_string(), + api_url: "".to_string(), + env_vars: vec![], + api_type: provider_type, + }], + models: vec![], + }; + + let catalog = catalog_from_cache_payload(payload).expect("should succeed"); + let provider = catalog + .lookup_provider("test") + .expect("provider should exist"); + assert_eq!( + provider.api_type, provider_type, + "provider type should round-trip correctly" + ); + } + } +} diff --git a/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs new file mode 100644 index 00000000..bc4f79c3 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs @@ -0,0 +1,89 @@ +use crate::cache::format::CacheFileData; +use crate::cache::payload::{catalog_from_cache_payload, decode_cache_payload}; +use crate::catalog::load_result::{CatalogLoadResult, CatalogLoadSource}; +use crate::error::{CatalogError, CatalogResult}; + +pub(crate) fn load_catalog_from_cache_file_data( + cache_file: &CacheFileData, + source: CatalogLoadSource, +) -> CatalogResult { + let expected_len = cache_file.payload_len_decompressed() as usize; + let decoded = zstd::bulk::decompress(cache_file.payload_compressed(), expected_len) + .map_err(|error| CatalogError::Zstd(error.to_string()))?; + if decoded.len() != expected_len { + return Err(CatalogError::CacheFormat( + "cache payload length mismatch after decompression", + )); + } + + let payload = decode_cache_payload(&decoded)?; + let catalog = catalog_from_cache_payload(payload)?; + Ok(CatalogLoadResult { catalog, source }) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cache::format::{write_cache_file, CacheWriteInput}; + use crate::cache::payload::{ + encode_cache_payload, CachedModelRow, CachedProviderRow, CatalogCachePayload, + }; + use llm_coding_tools_core::models::{Modality, ProviderIdx, ProviderType}; + use tempfile::TempDir; + + fn sample_payload() -> CatalogCachePayload { + CatalogCachePayload { + providers: vec![CachedProviderRow { + provider_key: "test".to_string(), + api_url: "https://test.example".to_string(), + env_vars: vec![], + api_type: ProviderType::OpenAiCompletions, + }], + models: vec![CachedModelRow { + provider_idx: ProviderIdx::new(0), + model_key: "model1".to_string(), + modalities_bits: Modality::TEXT.bits(), + max_input: 4096, + max_output: 2048, + temperature: None, + top_p: None, + }], + } + } + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn round_trip_through_cache_file() { + let temp = TempDir::new().expect("tempdir"); + let path = temp.path().join("test.cache"); + + let payload = sample_payload(); + let encoded = encode_cache_payload(&payload); + let compressed = + zstd::bulk::compress(&encoded, zstd::DEFAULT_COMPRESSION_LEVEL).expect("compress"); + + write_cache_file( + &path, + &CacheWriteInput { + etag: Some(b"test-etag"), + payload_compressed: &compressed, + payload_len_decompressed: encoded.len(), + }, + ) + .await + .expect("write cache"); + + let cache_file = crate::cache::format::read_cache_file(&path) + .await + .expect("read cache"); + let result = + load_catalog_from_cache_file_data(&cache_file, CatalogLoadSource::NotModifiedCache) + .expect("load from cache"); + + assert_eq!(result.source, CatalogLoadSource::NotModifiedCache); + let provider = result + .catalog + .lookup_provider("test") + .expect("provider should exist"); + assert_eq!(provider.api_type, ProviderType::OpenAiCompletions); + } +} diff --git a/src/llm-coding-tools-models-dev/src/catalog/mod.rs b/src/llm-coding-tools-models-dev/src/catalog/mod.rs index dca3b904..1764915d 100644 --- a/src/llm-coding-tools-models-dev/src/catalog/mod.rs +++ b/src/llm-coding-tools-models-dev/src/catalog/mod.rs @@ -5,10 +5,16 @@ //! - Reuse cache on `304 Not Modified` //! - Fall back to cached data if the network path fails +mod load_cache; mod load_result; +mod sync; + +#[cfg(test)] +mod test_utils; pub use load_result::{CatalogLoadResult, CatalogLoadSource}; +use crate::cache::shared_cache_path; use crate::error::CatalogError; use std::path::Path; @@ -74,7 +80,8 @@ impl ModelsDevCatalog { /// ``` #[maybe_async::maybe_async] pub async fn load() -> Result { - todo!("ModelsDevCatalog::load() not yet implemented") + let path = shared_cache_path()?; + Self::load_at(path).await } /// Loads the catalog from a specific cache file path. @@ -132,7 +139,77 @@ impl ModelsDevCatalog { /// ``` #[maybe_async::maybe_async] pub async fn load_at(path: impl AsRef) -> Result { - let _path = path.as_ref(); - todo!("ModelsDevCatalog::load_at() not yet implemented") + sync::load_catalog_at_path(path.as_ref()).await + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cache::CACHE_PATH_ENV_VAR; + use llm_coding_tools_core::models::ProviderType; + use tempfile::TempDir; + + /// Guard that restores environment variables on drop + struct EnvGuard { + cache_path_var: Option, + } + + impl EnvGuard { + fn new(value: Option<&str>) -> Self { + let cache_path_var = std::env::var(CACHE_PATH_ENV_VAR).ok(); + match value { + Some(v) => std::env::set_var(CACHE_PATH_ENV_VAR, v), + None => std::env::remove_var(CACHE_PATH_ENV_VAR), + } + Self { cache_path_var } + } + } + + impl Drop for EnvGuard { + fn drop(&mut self) { + // Clear test URL override + super::sync::set_test_models_dev_api_url(None); + + // Restore or remove cache path env var + match &self.cache_path_var { + Some(v) => std::env::set_var(CACHE_PATH_ENV_VAR, v), + None => std::env::remove_var(CACHE_PATH_ENV_VAR), + } + } + } + + use super::test_utils::{sample_api_json, start_mock_server, MockResponse}; + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + #[serial_test::serial] + async fn facade_load_uses_shared_cache_path() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("facade-test.cache"); + let _guard = EnvGuard::new(Some(cache_path.to_str().unwrap())); + + // Start mock server and set URL override + let body = String::from_utf8_lossy(sample_api_json()).to_string(); + let (_handle, url) = start_mock_server(MockResponse::Ok { + etag: "\"facade-test-etag\"", + body, + }); + super::sync::set_test_models_dev_api_url(Some(url)); + + // Call public facade + let result = ModelsDevCatalog::load().await.expect("load should succeed"); + + assert_eq!(result.source, CatalogLoadSource::Downloaded); + let provider = result + .catalog + .lookup_provider("openai") + .expect("openai provider should exist"); + assert_eq!(provider.api_type, ProviderType::OpenAiCompletions); + + // Verify cache was written + assert!( + cache_path.exists(), + "cache file should exist at shared path" + ); } } diff --git a/src/llm-coding-tools-models-dev/src/catalog/sync.rs b/src/llm-coding-tools-models-dev/src/catalog/sync.rs new file mode 100644 index 00000000..8d2b7cc5 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/catalog/sync.rs @@ -0,0 +1,210 @@ +use crate::api::catalog_sources::cache_payload_from_api_json_bytes; +use crate::cache::format::{read_cache_file, write_cache_file, CacheWriteInput}; +use crate::cache::payload::{catalog_from_cache_payload, encode_cache_payload}; +use crate::catalog::load_cache::load_catalog_from_cache_file_data; +use crate::catalog::load_result::{CatalogLoadResult, CatalogLoadSource}; +use crate::error::{CatalogError, CatalogResult}; +use reqwest::header::{ETAG, IF_NONE_MATCH}; +use reqwest::StatusCode; +use std::borrow::Cow; +use std::io::ErrorKind; +use std::path::Path; + +const MODELS_DEV_API_URL: &str = "https://models.dev/api.json"; + +#[cfg(test)] +static TEST_MODELS_DEV_API_URL: std::sync::Mutex> = std::sync::Mutex::new(None); + +#[cfg(test)] +pub(crate) fn set_test_models_dev_api_url(url: Option) { + *TEST_MODELS_DEV_API_URL.lock().unwrap() = url; +} + +fn models_dev_api_url() -> Cow<'static, str> { + #[cfg(test)] + if let Some(url) = TEST_MODELS_DEV_API_URL.lock().unwrap().clone() { + return Cow::Owned(url); + } + + Cow::Borrowed(MODELS_DEV_API_URL) +} + +#[maybe_async::maybe_async] +pub(crate) async fn load_catalog_at_path(path: &Path) -> CatalogResult { + let url = models_dev_api_url(); + load_catalog_from_url(path, url.as_ref()).await +} + +#[maybe_async::maybe_async] +pub(crate) async fn load_catalog_from_url( + path: &Path, + url: &str, +) -> CatalogResult { + let mut cache_file = None; + let mut cache_error = None; + match read_cache_file(path).await { + Ok(file) => cache_file = Some(file), + Err(CatalogError::Io(error)) if error.kind() == ErrorKind::NotFound => {} + Err(error) => cache_error = Some(error), + } + + #[cfg(feature = "tokio")] + let client = reqwest::Client::new(); + #[cfg(feature = "blocking")] + let client = reqwest::blocking::Client::new(); + + let mut request = client.get(url); + if let Some(etag) = cache_file.as_ref().and_then(|file| file.etag_bytes()) { + request = request.header(IF_NONE_MATCH, etag); + } + + let response = request.send().await?; + match response.status() { + StatusCode::OK => { + let response_etag: Option> = response + .headers() + .get(ETAG) + .map(|value| value.as_bytes().to_vec()); + let body = response.bytes().await?; + let payload = cache_payload_from_api_json_bytes(body.as_ref())?; + let payload_encoded = encode_cache_payload(&payload); + let catalog = catalog_from_cache_payload(payload)?; + let payload_compressed = + zstd::bulk::compress(payload_encoded.as_slice(), zstd::DEFAULT_COMPRESSION_LEVEL) + .map_err(|error| CatalogError::Zstd(error.to_string()))?; + + write_cache_file( + path, + &CacheWriteInput { + etag: response_etag.as_deref(), + payload_compressed: &payload_compressed, + payload_len_decompressed: payload_encoded.len(), + }, + ) + .await?; + + Ok(CatalogLoadResult { + catalog, + source: CatalogLoadSource::Downloaded, + }) + } + StatusCode::NOT_MODIFIED => { + if let Some(cache_file) = cache_file.as_ref() { + load_catalog_from_cache_file_data(cache_file, CatalogLoadSource::NotModifiedCache) + } else if let Some(error) = cache_error { + Err(error) + } else { + Err(CatalogError::CacheFormat( + "received 304 but no cached payload is available", + )) + } + } + status => Err(CatalogError::Configuration(format!( + "unexpected catalog sync status: {status}", + ))), + } +} + +#[cfg(test)] +mod tests { + use super::super::test_utils::{sample_api_json, start_mock_server, MockResponse}; + use super::*; + use crate::cache::format::CacheWriteInput; + use crate::cache::payload::{ + encode_cache_payload, CachedModelRow, CachedProviderRow, CatalogCachePayload, + }; + use llm_coding_tools_core::models::{Modality, ProviderIdx, ProviderType}; + use tempfile::TempDir; + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn sync_returns_downloaded_on_200() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("test.cache"); + + // Start mock server returning 200 OK with fresh catalog data + let body = String::from_utf8_lossy(sample_api_json()).to_string(); + let (_handle, url) = start_mock_server(MockResponse::Ok { + etag: "\"test-etag-123\"", + body, + }); + + let result = load_catalog_from_url(&cache_path, &url) + .await + .expect("sync should succeed"); + + // Verify source is Downloaded (not from cache) + assert_eq!(result.source, CatalogLoadSource::Downloaded); + let provider = result + .catalog + .lookup_provider("openai") + .expect("openai provider should exist"); + assert_eq!(provider.api_type, ProviderType::OpenAiCompletions); + assert_eq!(provider.api_url, "https://api.openai.com/v1"); + + // Verify cache file was written with the ETag from response + let cache_file = read_cache_file(&cache_path) + .await + .expect("cache should exist"); + assert_eq!( + cache_file.etag_bytes(), + Some(b"\"test-etag-123\"".as_slice()) + ); + } + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn sync_returns_cached_on_304_with_if_none_match() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("test.cache"); + + // Pre-seed cache with a valid catalog payload + let payload = CatalogCachePayload { + providers: vec![CachedProviderRow { + provider_key: "openai".to_string(), + api_url: "https://api.openai.com/v1".to_string(), + env_vars: vec!["OPENAI_API_KEY".to_string()], + api_type: ProviderType::OpenAiCompletions, + }], + models: vec![CachedModelRow { + provider_idx: ProviderIdx::new(0), + model_key: "gpt-4".to_string(), + modalities_bits: Modality::TEXT.bits(), + max_input: 8192, + max_output: 4096, + temperature: None, + top_p: None, + }], + }; + let encoded = encode_cache_payload(&payload); + let compressed = + zstd::bulk::compress(&encoded, zstd::DEFAULT_COMPRESSION_LEVEL).expect("compress"); + + // Write the seeded cache file with ETag + crate::cache::format::write_cache_file( + &cache_path, + &CacheWriteInput { + etag: Some(b"\"cached-etag-456\""), + payload_compressed: &compressed, + payload_len_decompressed: encoded.len(), + }, + ) + .await + .expect("seed cache"); + + // Server returns 304 Not Modified (ETag matches If-None-Match) + let (_handle, url) = start_mock_server(MockResponse::NotModified { + etag: "\"cached-etag-456\"", + }); + + let result = load_catalog_from_url(&cache_path, &url) + .await + .expect("sync should succeed"); + + // Verify source is NotModifiedCache (loaded from local file) + assert_eq!(result.source, CatalogLoadSource::NotModifiedCache); + let provider = result + .catalog + .lookup_provider("openai") + .expect("openai provider should exist"); + assert_eq!(provider.api_type, ProviderType::OpenAiCompletions); + } +} diff --git a/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs b/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs new file mode 100644 index 00000000..c6bec11b --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs @@ -0,0 +1,77 @@ +use std::io::{BufRead, Write}; + +pub enum MockResponse { + Ok { etag: &'static str, body: String }, + NotModified { etag: &'static str }, +} + +pub fn sample_api_json() -> &'static [u8] { + br#" + { + "openai": { + "id": "openai", + "npm": "@ai-sdk/openai", + "api": "https://api.openai.com/v1", + "env": ["OPENAI_API_KEY"], + "models": { + "gpt-4": { + "modalities": { + "input": ["text"], + "output": ["text"] + }, + "limit": { + "context": 8192, + "input": 8192, + "output": 4096 + } + } + } + } + } + "# +} + +pub fn start_mock_server(response: MockResponse) -> (std::thread::JoinHandle<()>, String) { + let listener = std::net::TcpListener::bind("127.0.0.1:0").expect("bind"); + let port = listener.local_addr().unwrap().port(); + let url = format!("http://127.0.0.1:{}/api.json", port); + + let handle = std::thread::spawn(move || { + let (mut stream, _) = listener.accept().expect("accept"); + let mut reader = std::io::BufReader::new(&stream); + let mut request = String::new(); + + loop { + let mut line = String::new(); + if reader.read_line(&mut line).expect("read line") == 0 { + break; + } + if line == "\r\n" || line.is_empty() { + break; + } + request.push_str(&line); + } + + let _has_if_none_match = request.contains("If-None-Match"); + + match response { + MockResponse::Ok { etag, body } => { + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nETag: {}\r\nContent-Length: {}\r\n\r\n{}", + etag, + body.len(), + body + ); + stream.write_all(response.as_bytes()).expect("write"); + stream.flush().expect("flush"); + } + MockResponse::NotModified { etag } => { + let response = format!("HTTP/1.1 304 Not Modified\r\nETag: {}\r\n\r\n", etag); + stream.write_all(response.as_bytes()).expect("write"); + stream.flush().expect("flush"); + } + } + }); + + (handle, url) +} diff --git a/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs index 5bfbc808..d35ffbec 100644 --- a/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs +++ b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs @@ -32,3 +32,9 @@ pub(crate) fn write(path: impl AsRef, bytes: &[u8]) -> std::io::Result<()> pub(crate) fn create_dir_all(path: impl AsRef) -> std::io::Result<()> { std::fs::create_dir_all(path) } + +/// Renames a file, replacing the destination if it exists. +#[inline] +pub(crate) fn rename(from: impl AsRef, to: impl AsRef) -> std::io::Result<()> { + std::fs::rename(from, to) +} diff --git a/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs index 53474ce1..830d29e9 100644 --- a/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs +++ b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs @@ -33,3 +33,9 @@ pub(crate) async fn write(path: impl AsRef, bytes: &[u8]) -> std::io::Resu pub(crate) async fn create_dir_all(path: impl AsRef) -> std::io::Result<()> { tokio::fs::create_dir_all(path).await } + +/// Renames a file, replacing the destination if it exists. +#[inline] +pub(crate) async fn rename(from: impl AsRef, to: impl AsRef) -> std::io::Result<()> { + tokio::fs::rename(from, to).await +} From 2929b8ee193b937226dedc6943c21fc0556162f6 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Sat, 7 Mar 2026 00:01:17 +0000 Subject: [PATCH 07/22] Changed: Replace ProviderModelSource.provider_key with ProviderIdx Replace string-based provider lookup with index-based lookup in the catalog builder hot path, moving the lookup cost out of the builder. Changes: - ProviderModelSource now stores provider_idx: ProviderIdx instead of provider_key: &'a str - Builder validates provider existence via index bounds check instead of HashMap lookup - Duplicate detection uses (ProviderIdx, &str) instead of (&str, &str) - Renamed ProviderKeyNotFoundForModel error to ProviderIdxOutOfRangeForModel - Updated benchmark to pass ProviderIdx directly - Updated models-dev cache payload to use stored provider_idx directly Benefits: - Eliminates per-model HashMap lookup in builder hot path - ~16-20% faster catalog construction (measured via benchmark) - Simpler validation logic using slice bounds checking --- .../benches/model_catalog_builder.rs | 13 +- .../src/models/catalog/internal/builder.rs | 121 +++++++++--------- .../src/models/catalog/mod.rs | 17 ++- .../models/catalog/public/builder_types.rs | 42 +++--- .../src/cache/payload.rs | 18 +-- 5 files changed, 116 insertions(+), 95 deletions(-) diff --git a/src/llm-coding-tools-core/benches/model_catalog_builder.rs b/src/llm-coding-tools-core/benches/model_catalog_builder.rs index b9fcdd7d..d772ec34 100644 --- a/src/llm-coding-tools-core/benches/model_catalog_builder.rs +++ b/src/llm-coding-tools-core/benches/model_catalog_builder.rs @@ -3,12 +3,12 @@ use core::hint::black_box; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; use llm_coding_tools_core::models::{ - Modality, ModelCatalog, ModelInfo, ProviderInfo, ProviderModelSource, ProviderSource, - ProviderType, + Modality, ModelCatalog, ModelInfo, ProviderIdx, ProviderInfo, ProviderModelSource, + ProviderSource, ProviderType, }; struct ProviderModelSpec { - provider_idx: usize, + provider_idx: ProviderIdx, model_key: String, model: ModelInfo, } @@ -22,11 +22,8 @@ impl Dataset { fn provider_model_sources(&self) -> Vec> { let mut sources = Vec::with_capacity(self.provider_models.len()); for provider_model in &self.provider_models { - let provider_key = self.providers[provider_model.provider_idx] - .provider_key - .as_str(); sources.push(ProviderModelSource::new( - provider_key, + provider_model.provider_idx, provider_model.model_key.as_str(), provider_model.model, )); @@ -57,7 +54,7 @@ fn make_dataset(provider_count: usize, model_count: usize) -> Dataset { let mut provider_models = Vec::with_capacity(model_count); let unique_cfg_count = (model_count / 5).max(1); for i in 0..model_count { - let provider_idx = i % provider_count; + let provider_idx = ProviderIdx::new((i % provider_count) as u16); let cfg = i % unique_cfg_count; let temperature = if (cfg & 1) == 0 { Some(1.0 + ((cfg % 5000) as f32 * 0.001)) diff --git a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs index 02a412d4..d00b333c 100644 --- a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs +++ b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs @@ -93,59 +93,50 @@ fn populate_tables_once( provider_models: &[ProviderModelSource<'_>], ) -> Result<(), ModelCatalogBuildError> { let mut env_start: u16 = 0; - let mut provider_idx_by_key: AHashMap<&str, ProviderIdx> = - AHashMap::with_capacity(providers.len()); - let mut seen_provider_models: AHashSet<(&str, &str)> = + let mut seen_provider_keys: AHashSet<&str> = AHashSet::with_capacity(providers.len()); + let mut seen_provider_models: AHashSet<(ProviderIdx, &str)> = AHashSet::with_capacity(provider_models.len()); for provider in providers { let provider_info = &provider.provider; let env_count = provider_info.env_vars.len() as u8; - match provider_idx_by_key.entry(provider.provider_key.as_str()) { - MapEntry::Occupied(_) => { - return Err(ModelCatalogBuildError::DuplicateKey { - table: LookupTableKind::Provider, - key: provider.provider_key.clone(), - }); - } - MapEntry::Vacant(e) => { - let provider_idx = insert_provider( - state, - &provider.provider_key, - env_start, - env_count, - provider_info.api_type, - )?; - e.insert(provider_idx); - } + if !seen_provider_keys.insert(provider.provider_key.as_str()) { + return Err(ModelCatalogBuildError::DuplicateKey { + table: LookupTableKind::Provider, + key: provider.provider_key.clone(), + }); } + insert_provider( + state, + &provider.provider_key, + env_start, + env_count, + provider_info.api_type, + )?; + // SAFETY: analyze_provider_sources bounds env_start and env_count (<= 3). env_start += u16::from(env_count); } for provider_model in provider_models { - // Validate provider exists before inserting model. - if !provider_idx_by_key.contains_key(provider_model.provider_key) { - return Err(ModelCatalogBuildError::ProviderKeyNotFoundForModel { - provider_key: provider_model.provider_key.to_owned(), + let provider = providers + .get(provider_model.provider_idx.as_usize()) + .ok_or(ModelCatalogBuildError::ProviderIdxOutOfRangeForModel { + provider_idx: provider_model.provider_idx, model_key: provider_model.model_key.to_owned(), - }); - } + })?; - // Check for duplicate (provider_key, model_key) pair. - let key = (provider_model.provider_key, provider_model.model_key); + // Check for duplicate (provider_idx, model_key) pair. + let key = (provider_model.provider_idx, provider_model.model_key); if !seen_provider_models.insert(key) { return Err(ModelCatalogBuildError::DuplicateKey { table: LookupTableKind::ProviderModel, - key: format!( - "{}/{}", - provider_model.provider_key, provider_model.model_key - ), + key: format!("{}/{}", provider.provider_key, provider_model.model_key), }); } - insert_provider_model(state, provider_model)?; + insert_provider_model(state, provider.provider_key.as_str(), provider_model)?; } Ok(()) @@ -194,6 +185,7 @@ fn insert_provider( #[inline] fn insert_provider_model( state: &mut BuildState, + provider_key: &str, provider_model: &ProviderModelSource<'_>, ) -> Result<(), ModelCatalogBuildError> { let info = provider_model.model; @@ -235,11 +227,7 @@ fn insert_provider_model( } }; - let key = hash_provider_model_key( - &state.hash_state, - provider_model.provider_key, - provider_model.model_key, - ); + let key = hash_provider_model_key(&state.hash_state, provider_key, provider_model.model_key); let hash48 = PackedProviderModelTableEntry::truncate_hash48(key.as_u64()); // Insert provider-model entry. @@ -424,7 +412,7 @@ fn build_provider_env_key_table( mod tests { use super::build_from_source; use crate::models::catalog::{ - LookupTableKind, Modality, ModelCatalogBuildError, ModelInfo, ProviderInfo, + LookupTableKind, Modality, ModelCatalogBuildError, ModelInfo, ProviderIdx, ProviderInfo, ProviderModelSource, ProviderSource, }; use crate::models::ProviderType; @@ -452,11 +440,11 @@ mod tests { } fn provider_model_source<'a>( - provider_key: &'a str, + provider_idx: ProviderIdx, model_key: &'a str, model: ModelInfo, ) -> ProviderModelSource<'a> { - ProviderModelSource::new(provider_key, model_key, model) + ProviderModelSource::new(provider_idx, model_key, model) } fn test_sources() -> (Vec, Vec>) { @@ -469,7 +457,11 @@ mod tests { ProviderType::OpenAiCompletions, ), )], - vec![provider_model_source("alpha", "m1", info(4096, 512))], + vec![provider_model_source( + ProviderIdx::new(0), + "m1", + info(4096, 512), + )], ) } @@ -496,7 +488,11 @@ mod tests { provider("https://beta.example", &["BETA_KEY"], ProviderType::Azure), ), ]; - let provider_models = vec![provider_model_source("alpha", "m1", info(4096, 512))]; + let provider_models = vec![provider_model_source( + ProviderIdx::new(0), + "m1", + info(4096, 512), + )]; match build_from_source(&providers, &provider_models) { Err(err) => { @@ -519,8 +515,8 @@ mod tests { provider("https://alpha.example", &["ALPHA_KEY"], ProviderType::Azure), )]; let provider_models = vec![ - provider_model_source("alpha", "m1", info(4096, 512)), - provider_model_source("alpha", "m1", info(4096, 512)), + provider_model_source(ProviderIdx::new(0), "m1", info(4096, 512)), + provider_model_source(ProviderIdx::new(0), "m1", info(4096, 512)), ]; match build_from_source(&providers, &provider_models) { @@ -551,7 +547,7 @@ mod tests { ]; let provider_models = vec![ provider_model_source( - "alpha", + ProviderIdx::new(0), "m1", ModelInfo { modalities: Modality::TEXT, @@ -562,7 +558,7 @@ mod tests { }, ), provider_model_source( - "beta", + ProviderIdx::new(1), "m1", ModelInfo { modalities: Modality::TEXT, @@ -589,14 +585,18 @@ mod tests { "alpha", provider("https://alpha.example", &["ALPHA_KEY"], ProviderType::Azure), )]; - let provider_models = vec![provider_model_source("beta", "m1", info(4096, 512))]; + let provider_models = vec![provider_model_source( + ProviderIdx::new(1), + "m1", + info(4096, 512), + )]; match build_from_source(&providers, &provider_models) { Err(err) => { assert_eq!( err, - ModelCatalogBuildError::ProviderKeyNotFoundForModel { - provider_key: "beta".to_string(), + ModelCatalogBuildError::ProviderIdxOutOfRangeForModel { + provider_idx: ProviderIdx::new(1), model_key: "m1".to_string(), } ); @@ -615,7 +615,11 @@ mod tests { ProviderType::Azure, ), )]; - let provider_models = vec![provider_model_source("alpha", "m1", info(4096, 512))]; + let provider_models = vec![provider_model_source( + ProviderIdx::new(0), + "m1", + info(4096, 512), + )]; match build_from_source(&providers, &provider_models) { Err(err) => { @@ -636,7 +640,7 @@ mod tests { let (providers, _) = test_sources(); let max_output = super::MAX_OUTPUT_TOKENS; let provider_models = vec![provider_model_source( - "alpha", + ProviderIdx::new(0), "m1", info(4096, max_output.saturating_add(1)), )]; @@ -660,7 +664,7 @@ mod tests { let (providers, _) = test_sources(); let max_input = super::MAX_INPUT_TOKENS; let provider_models = vec![provider_model_source( - "alpha", + ProviderIdx::new(0), "m1", info(max_input.saturating_add(1), 512), )]; @@ -695,7 +699,11 @@ mod tests { )); } let mut provider_models = Vec::with_capacity(1); - provider_models.push(provider_model_source("provider_0", "m1", info(4096, 512))); + provider_models.push(provider_model_source( + ProviderIdx::new(0), + "m1", + info(4096, 512), + )); match build_from_source(&providers, &provider_models) { Err(err) => { @@ -726,9 +734,8 @@ mod tests { ), )); } - let last_provider_key = format!("provider_{}", 5461usize); let provider_models = vec![ProviderModelSource::new( - &last_provider_key, + ProviderIdx::new(5461), "m1", info(4096, 512), )]; @@ -736,7 +743,7 @@ mod tests { let catalog = build_from_source(&providers, &provider_models).expect("boundary case should pass"); let (provider, _) = catalog - .lookup(&last_provider_key, "m1") + .lookup("provider_5461", "m1") .expect("last provider should be addressable"); assert_eq!(provider.env_vars(), &["VAR1", "VAR2", "VAR3"]); diff --git a/src/llm-coding-tools-core/src/models/catalog/mod.rs b/src/llm-coding-tools-core/src/models/catalog/mod.rs index 4d7095c6..d369d843 100644 --- a/src/llm-coding-tools-core/src/models/catalog/mod.rs +++ b/src/llm-coding-tools-core/src/models/catalog/mod.rs @@ -27,7 +27,7 @@ //! //! - [`ModelCatalog::build`] - Batch builder entry point //! - [`ProviderSource`] - Provider key + metadata input -//! - [`ProviderModelSource`] - Model key + metadata input for a provider +//! - [`ProviderModelSource`] - Model key + metadata input keyed by [`ProviderIdx`] and model key //! - [`ModelInfo`] - Model metadata input (modalities, token limits, sampling) //! - [`ProviderInfo`] - Provider metadata input (API URL, env vars, type) //! - [`Modality`] - Content modality flags (text, image, audio, video) @@ -286,14 +286,15 @@ impl ModelCatalog { /// # Parameters /// /// * `providers` - [`ProviderSource`] values keyed by provider identifier. - /// * `provider_models` - [`ProviderModelSource`] values keyed by provider and model. + /// * `provider_models` - [`ProviderModelSource`] values keyed by [`ProviderIdx`] and model key. + /// The `provider_idx` must point at an element in the `providers` slice. /// /// # Errors /// /// Returns [`ModelCatalogBuildError`] when: /// - input exceeds supported numeric limits, /// - token limits cannot be represented in packed model entries, - /// - provider model sources reference unknown providers, + /// - provider model sources reference out-of-range provider indices, /// - or all seed-retry attempts still result in collisions. #[inline] pub fn build( @@ -518,7 +519,7 @@ impl ModelCatalog { mod tests { use super::*; use crate::models::catalog::{ - Modality, ModelInfo, ProviderInfo, ProviderModelSource, ProviderSource, + Modality, ModelInfo, ProviderIdx, ProviderInfo, ProviderModelSource, ProviderSource, }; fn provider(api_url: &str, env_vars: &[&str], api_type: ProviderType) -> ProviderInfo { @@ -565,7 +566,13 @@ mod tests { let provider_model_sources: Vec> = provider_models .into_iter() .map(|(provider_key, model_key, info)| { - ProviderModelSource::new(provider_key, model_key, info) + let provider_idx = ProviderIdx::new( + provider_sources + .iter() + .position(|provider| provider.provider_key == provider_key) + .expect("provider key should exist") as u16, + ); + ProviderModelSource::new(provider_idx, model_key, info) }) .collect(); ModelCatalog::build(&provider_sources, &provider_model_sources) diff --git a/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs b/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs index 28b45227..014b17b6 100644 --- a/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs +++ b/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs @@ -3,6 +3,7 @@ //! [`ModelCatalog`]: crate::models::catalog::ModelCatalog use super::Modality; +use super::ProviderIdx; use crate::models::ProviderType; use thiserror::Error; @@ -81,14 +82,21 @@ impl From<(String, ProviderInfo)> for ProviderSource { /// This wrapper keeps builder input self-documenting and avoids tuple-position /// ambiguity at call sites. /// -/// The keys are borrowed because the catalog builder hashes them during -/// construction and does not retain them afterward. Callers must therefore keep -/// the referenced strings alive until [`crate::models::catalog::ModelCatalog::build`] +/// The `model_key` is borrowed because the catalog builder hashes it during +/// construction and does not retain it afterward. Callers must therefore keep +/// the referenced string alive until [`crate::models::catalog::ModelCatalog::build`] /// returns. +/// +/// The `provider_idx` must correspond to an entry in the `providers` slice passed +/// to [`ModelCatalog::build`]. +/// +/// [`ModelCatalog::build`]: crate::models::catalog::ModelCatalog::build #[derive(Debug, Clone, PartialEq)] pub struct ProviderModelSource<'a> { - /// Borrowed provider identifier used by lookups (for example, `"openai"`). - pub provider_key: &'a str, + /// Index into the `providers` slice passed to [`ModelCatalog::build`]. + /// + /// [`ModelCatalog::build`]: crate::models::catalog::ModelCatalog::build + pub provider_idx: ProviderIdx, /// Borrowed model identifier used by lookups (for example, `"gpt-4"`). pub model_key: &'a str, /// Model metadata associated with [`Self::model_key`]. @@ -100,28 +108,30 @@ impl<'a> ProviderModelSource<'a> { /// /// # Parameters /// - /// * `provider_key` - Provider identifier used during provider lookup. + /// * `provider_idx` - Index into the `providers` slice passed to [`ModelCatalog::build`]. /// * `model_key` - Model identifier used during model lookup for this provider. /// * `model` - Model metadata for this provider model. /// /// # Returns /// /// A new [`ProviderModelSource`]. + /// + /// [`ModelCatalog::build`]: crate::models::catalog::ModelCatalog::build #[inline] - pub fn new(provider_key: &'a str, model_key: &'a str, model: ModelInfo) -> Self { + pub fn new(provider_idx: ProviderIdx, model_key: &'a str, model: ModelInfo) -> Self { Self { - provider_key, + provider_idx, model_key, model, } } } -impl<'a> From<(&'a str, &'a str, ModelInfo)> for ProviderModelSource<'a> { +impl<'a> From<(ProviderIdx, &'a str, ModelInfo)> for ProviderModelSource<'a> { #[inline] - fn from((provider_key, model_key, model): (&'a str, &'a str, ModelInfo)) -> Self { + fn from((provider_idx, model_key, model): (ProviderIdx, &'a str, ModelInfo)) -> Self { Self { - provider_key, + provider_idx, model_key, model, } @@ -173,11 +183,11 @@ pub enum ModelCatalogBuildError { /// Maximum supported env vars for one provider. max: usize, }, - /// A provider model source references a provider key that does not exist. - #[error("provider model source references unknown provider_key={provider_key:?} for model_key={model_key:?}")] - ProviderKeyNotFoundForModel { - /// Provider key from the provider model source. - provider_key: String, + /// A provider model source references a provider index that does not exist. + #[error("provider model source references out-of-range provider_idx={} for model_key={model_key:?}", provider_idx.as_usize())] + ProviderIdxOutOfRangeForModel { + /// Provider index from the provider model source. + provider_idx: ProviderIdx, /// Model key from the provider model source. model_key: String, }, diff --git a/src/llm-coding-tools-models-dev/src/cache/payload.rs b/src/llm-coding-tools-models-dev/src/cache/payload.rs index bf5bf25a..d86d7caf 100644 --- a/src/llm-coding-tools-models-dev/src/cache/payload.rs +++ b/src/llm-coding-tools-models-dev/src/cache/payload.rs @@ -56,15 +56,8 @@ pub(crate) fn catalog_from_cache_payload( let mut model_sources = Vec::with_capacity(models.len()); for row in &models { - let provider_source = - provider_sources - .get(row.provider_idx.as_usize()) - .ok_or(CatalogError::CacheFormat( - "provider index out of range in cache payload", - ))?; - model_sources.push(ProviderModelSource { - provider_key: provider_source.provider_key.as_str(), + provider_idx: row.provider_idx, model_key: row.model_key.as_str(), model: ModelInfo { modalities: Modality::from_bits_retain(row.modalities_bits), @@ -138,6 +131,8 @@ mod tests { #[test] fn catalog_from_payload_rejects_out_of_range_provider_idx() { + use llm_coding_tools_core::models::ModelCatalogBuildError; + let payload = CatalogCachePayload { providers: vec![CachedProviderRow { provider_key: "test".to_string(), @@ -157,7 +152,12 @@ mod tests { }; let result = catalog_from_cache_payload(payload); - assert!(matches!(result, Err(CatalogError::CacheFormat(_)))); + assert!(matches!( + result, + Err(CatalogError::ModelCatalogBuild( + ModelCatalogBuildError::ProviderIdxOutOfRangeForModel { .. } + )) + )); } #[test] From 8b84247a662e38bd29bf05a2dfa4b7c6121a3a75 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Fri, 6 Mar 2026 21:10:55 +0000 Subject: [PATCH 08/22] Added: Implicit fallback behavior for models.dev catalog loading Implements cache-based fallback when network requests fail, improving catalog loading reliability for offline or intermittent connectivity. Changes: - Added `load_after_request_failure` helper to handle network failures - Modified `load_catalog_from_url` to catch send() failures and attempt cache fallback - Added test: sync_returns_fallback_cache_on_request_failure_with_valid_cache - Added test: sync_returns_request_error_when_request_fails_without_cache - Added test: sync_returns_cache_error_when_request_fails_with_corrupt_cache Benefits: - On transient network/contact failure, load cache if available - CatalogLoadSource populated accurately (FallbackCache) - Automated tests for fallback success and no-cache failure - All 31 tests pass in both tokio and blocking modes --- .../src/catalog/sync.rs | 100 +++++++++++++++++- 1 file changed, 98 insertions(+), 2 deletions(-) diff --git a/src/llm-coding-tools-models-dev/src/catalog/sync.rs b/src/llm-coding-tools-models-dev/src/catalog/sync.rs index 8d2b7cc5..49b32098 100644 --- a/src/llm-coding-tools-models-dev/src/catalog/sync.rs +++ b/src/llm-coding-tools-models-dev/src/catalog/sync.rs @@ -1,5 +1,5 @@ use crate::api::catalog_sources::cache_payload_from_api_json_bytes; -use crate::cache::format::{read_cache_file, write_cache_file, CacheWriteInput}; +use crate::cache::format::{read_cache_file, write_cache_file, CacheFileData, CacheWriteInput}; use crate::cache::payload::{catalog_from_cache_payload, encode_cache_payload}; use crate::catalog::load_cache::load_catalog_from_cache_file_data; use crate::catalog::load_result::{CatalogLoadResult, CatalogLoadSource}; @@ -29,6 +29,22 @@ fn models_dev_api_url() -> Cow<'static, str> { Cow::Borrowed(MODELS_DEV_API_URL) } +fn load_after_request_failure( + request_error: reqwest::Error, + cache_file: Option<&CacheFileData>, + cache_error: Option, +) -> CatalogResult { + if let Some(cache_file) = cache_file { + return load_catalog_from_cache_file_data(cache_file, CatalogLoadSource::FallbackCache); + } + + if let Some(cache_error) = cache_error { + return Err(cache_error); + } + + Err(CatalogError::Reqwest(request_error)) +} + #[maybe_async::maybe_async] pub(crate) async fn load_catalog_at_path(path: &Path) -> CatalogResult { let url = models_dev_api_url(); @@ -58,7 +74,12 @@ pub(crate) async fn load_catalog_from_url( request = request.header(IF_NONE_MATCH, etag); } - let response = request.send().await?; + let response = match request.send().await { + Ok(response) => response, + Err(error) => { + return load_after_request_failure(error, cache_file.as_ref(), cache_error); + } + }; match response.status() { StatusCode::OK => { let response_etag: Option> = response @@ -207,4 +228,79 @@ mod tests { .expect("openai provider should exist"); assert_eq!(provider.api_type, ProviderType::OpenAiCompletions); } + + fn refused_local_url() -> String { + let listener = std::net::TcpListener::bind("127.0.0.1:0").expect("bind"); + let port = listener.local_addr().expect("local addr").port(); + drop(listener); + format!("http://127.0.0.1:{port}/api.json") + } + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn sync_returns_fallback_cache_on_request_failure_with_valid_cache() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("test.cache"); + + let payload = CatalogCachePayload { + providers: vec![CachedProviderRow { + provider_key: "openai".to_string(), + api_url: "https://api.openai.com/v1".to_string(), + env_vars: vec!["OPENAI_API_KEY".to_string()], + api_type: ProviderType::OpenAiCompletions, + }], + models: vec![CachedModelRow { + provider_idx: ProviderIdx::new(0), + model_key: "gpt-4".to_string(), + modalities_bits: Modality::TEXT.bits(), + max_input: 8192, + max_output: 4096, + temperature: None, + top_p: None, + }], + }; + let encoded = encode_cache_payload(&payload); + let compressed = + zstd::bulk::compress(&encoded, zstd::DEFAULT_COMPRESSION_LEVEL).expect("compress"); + crate::cache::format::write_cache_file( + &cache_path, + &CacheWriteInput { + etag: Some(b"\"cached-etag-456\""), + payload_compressed: &compressed, + payload_len_decompressed: encoded.len(), + }, + ) + .await + .expect("seed cache"); + + let result = load_catalog_from_url(&cache_path, &refused_local_url()) + .await + .expect("fallback should succeed"); + + assert_eq!(result.source, CatalogLoadSource::FallbackCache); + assert!(result.catalog.lookup_provider("openai").is_some()); + } + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn sync_returns_request_error_when_request_fails_without_cache() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("missing.cache"); + + match load_catalog_from_url(&cache_path, &refused_local_url()).await { + Err(error) => assert!(matches!(error, CatalogError::Reqwest(_))), + Ok(_) => panic!("request failure without cache should error"), + } + } + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn sync_returns_cache_error_when_request_fails_with_corrupt_cache() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("corrupt.cache"); + + std::fs::write(&cache_path, [0_u8; 11]).expect("write corrupt cache"); + + match load_catalog_from_url(&cache_path, &refused_local_url()).await { + Err(error) => assert!(matches!(error, CatalogError::CacheFormat(_))), + Ok(_) => panic!("request failure with corrupt cache should error"), + } + } } From 1e5497afc2c57aad1e058a00d54dc5efad8f5d88 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Sat, 7 Mar 2026 00:23:25 +0000 Subject: [PATCH 09/22] Changed: Optimize zstd compression level to 17 for catalog caching Benchmarked compression ratios and times across all zstd levels (0-22) using a models.dev api.json snapshot. Level 17 provides a good balance of compression ratio (1.84% of JSON, ~27ms) vs higher levels with diminishing returns (level 22 takes 207ms for only marginally better compression). Changes: - Use zstd level 17 for production cache compression (was default ~3) - Use zstd level 1 in tests for faster execution - Add compression benchmark table to payload.rs module docs Benefits: - Smaller cache files (~22% smaller than level 3) - Reasonable compression time (~27ms for 1.26 MB JSON) - Documented tradeoffs for future reference --- .../src/cache/payload.rs | 38 +++++++++++++++++++ .../src/catalog/load_cache.rs | 2 +- .../src/catalog/sync.rs | 6 +-- 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/llm-coding-tools-models-dev/src/cache/payload.rs b/src/llm-coding-tools-models-dev/src/cache/payload.rs index d86d7caf..05644c50 100644 --- a/src/llm-coding-tools-models-dev/src/cache/payload.rs +++ b/src/llm-coding-tools-models-dev/src/cache/payload.rs @@ -1,3 +1,41 @@ +//! Cache payload serialization for models.dev catalog data. +//! +//! ## Compression Benchmark +//! +//! Using a 1.26 MB `api.json` snapshot (models.dev), converted to bitcode +//! then compressed with zstd at various levels: +//! +//! | Level | Size | % of JSON | Time | +//! |----------------|-----------|-----------|---------| +//! | JSON | 1260.7 KB | 100.00% | - | +//! | (raw bitcode) | 105.7 KB | 8.39% | - | +//! | 0 | 29.7 KB | 2.36% | 1.4ms | +//! | 1 | 32.1 KB | 2.55% | 1.0ms | +//! | 2 | 31.7 KB | 2.51% | 1.0ms | +//! | 3 | 29.7 KB | 2.36% | 1.1ms | +//! | 4 | 29.7 KB | 2.36% | 1.9ms | +//! | 5 | 27.5 KB | 2.18% | 2.9ms | +//! | 6 | 27.1 KB | 2.15% | 3.6ms | +//! | 7 | 26.6 KB | 2.11% | 4.8ms | +//! | 8 | 26.7 KB | 2.12% | 5.0ms | +//! | 9 | 26.7 KB | 2.12% | 6.3ms | +//! | 10 | 26.4 KB | 2.09% | 9.1ms | +//! | 11 | 26.1 KB | 2.07% | 8.5ms | +//! | 12 | 26.1 KB | 2.07% | 14.4ms | +//! | 13 | 26.0 KB | 2.06% | 12.0ms | +//! | 14 | 26.0 KB | 2.06% | 16.4ms | +//! | 15 | 25.9 KB | 2.06% | 21.6ms | +//! | 16 | 23.6 KB | 1.87% | 24.2ms | +//! | 17 | 23.2 KB | 1.84% | 27.6ms | +//! | 18 | 23.2 KB | 1.84% | 42.6ms | +//! | 19 | 23.1 KB | 1.83% | 81.3ms | +//! | 20 | 23.1 KB | 1.83% | 96.3ms | +//! | 21 | 23.1 KB | 1.83% | 125.4ms | +//! | 22 | 23.1 KB | 1.83% | 207.5ms | +//! +//! Levels 1-3 offer the best speed/ratio tradeoff (~1ms, ~2.4% of JSON). +//! Levels 19-22 provide maximal compression but take 80-200ms. + use crate::error::{CatalogError, CatalogResult}; use llm_coding_tools_core::models::{ Modality, ModelCatalog, ModelInfo, ProviderIdx, ProviderInfo, ProviderModelSource, diff --git a/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs index bc4f79c3..23869d30 100644 --- a/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs +++ b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs @@ -59,7 +59,7 @@ mod tests { let payload = sample_payload(); let encoded = encode_cache_payload(&payload); let compressed = - zstd::bulk::compress(&encoded, zstd::DEFAULT_COMPRESSION_LEVEL).expect("compress"); + zstd::bulk::compress(&encoded, 1).expect("compress"); write_cache_file( &path, diff --git a/src/llm-coding-tools-models-dev/src/catalog/sync.rs b/src/llm-coding-tools-models-dev/src/catalog/sync.rs index 49b32098..199f1cce 100644 --- a/src/llm-coding-tools-models-dev/src/catalog/sync.rs +++ b/src/llm-coding-tools-models-dev/src/catalog/sync.rs @@ -91,7 +91,7 @@ pub(crate) async fn load_catalog_from_url( let payload_encoded = encode_cache_payload(&payload); let catalog = catalog_from_cache_payload(payload)?; let payload_compressed = - zstd::bulk::compress(payload_encoded.as_slice(), zstd::DEFAULT_COMPRESSION_LEVEL) + zstd::bulk::compress(payload_encoded.as_slice(), 17) .map_err(|error| CatalogError::Zstd(error.to_string()))?; write_cache_file( @@ -197,7 +197,7 @@ mod tests { }; let encoded = encode_cache_payload(&payload); let compressed = - zstd::bulk::compress(&encoded, zstd::DEFAULT_COMPRESSION_LEVEL).expect("compress"); + zstd::bulk::compress(&encoded, 1).expect("compress"); // Write the seeded cache file with ETag crate::cache::format::write_cache_file( @@ -260,7 +260,7 @@ mod tests { }; let encoded = encode_cache_payload(&payload); let compressed = - zstd::bulk::compress(&encoded, zstd::DEFAULT_COMPRESSION_LEVEL).expect("compress"); + zstd::bulk::compress(&encoded, 1).expect("compress"); crate::cache::format::write_cache_file( &cache_path, &CacheWriteInput { From e7d0eb8b0abec95077add7e79f8aedfcf02e804b Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Sat, 7 Mar 2026 00:31:23 +0000 Subject: [PATCH 10/22] Changed: Document models.dev cache internals and cache read behavior Clarify the internal catalog and cache docs so the sync flow and cache format explain the whole-file read path and why it is the right tradeoff here. Changes: - Added module and helper docs across sync, cache loading, and payload modules - Documented that cache reads load the whole file and referenced typical compressed payload sizes - Noted that models.dev changes infrequently, so cache hits are common and one sequential read is generally faster Benefits: - Makes the models.dev internals easier to follow and maintain - Preserves the performance rationale for the current cache I/O strategy where it is used --- .../src/cache/format.rs | 19 +++++++- .../src/cache/payload.rs | 33 ++++++++++++++ .../src/catalog/load_cache.rs | 13 ++++++ .../src/catalog/sync.rs | 43 +++++++++++++++++++ 4 files changed, 107 insertions(+), 1 deletion(-) diff --git a/src/llm-coding-tools-models-dev/src/cache/format.rs b/src/llm-coding-tools-models-dev/src/cache/format.rs index fbfad751..e131810b 100644 --- a/src/llm-coding-tools-models-dev/src/cache/format.rs +++ b/src/llm-coding-tools-models-dev/src/cache/format.rs @@ -20,6 +20,14 @@ //! one pre-sized allocation, then parse/slice into `prelude`, `etag`, and //! `payload` views without additional copying. //! +//! ## Performance +//! +//! models.dev changes infrequently, so cache hits are expected to be common. +//! [`crate::cache::payload`] documents typical compressed payload sizes of about +//! 23-32 kB, which keeps the whole container small enough that a single +//! sequential read is generally the faster, simpler hot path on modern +//! NVMe-backed systems. +//! //! ## Safety //! //! Not a 'safe' parser. We assume the file was created by the user. @@ -122,10 +130,19 @@ impl CacheFileData { /// Reads a cache container from disk. /// -/// This reads only the prelude + raw blocks and does not decompress payload. +/// This reads the entire cache file into memory in one shot, then parses only +/// the prelude + raw blocks and does not decompress payload. /// Compressed payload length is validated against prelude metadata to catch /// unexpected truncation or trailing bytes before decode. /// +/// # Performance +/// +/// This intentionally performs one whole-file read. models.dev changes +/// infrequently, so cache hits are expected to be common, and +/// [`crate::cache::payload`] documents typical compressed payload sizes of about +/// 23-32 kB. That is generally faster in practice than a streaming path while +/// remaining effectively negligible on modern NVMe-backed systems. +/// /// # Errors /// /// Returns [`CatalogError::CacheFormat`] when the prelude is truncated, when diff --git a/src/llm-coding-tools-models-dev/src/cache/payload.rs b/src/llm-coding-tools-models-dev/src/cache/payload.rs index 05644c50..37bc5fd7 100644 --- a/src/llm-coding-tools-models-dev/src/cache/payload.rs +++ b/src/llm-coding-tools-models-dev/src/cache/payload.rs @@ -1,5 +1,9 @@ //! Cache payload serialization for models.dev catalog data. //! +//! The payload is stored as simple owned rows so it can be encoded compactly +//! with bitcode and rebuilt into a [`ModelCatalog`](llm_coding_tools_core::models::ModelCatalog) +//! without reparsing the original JSON. +//! //! ## Compression Benchmark //! //! Using a 1.26 MB `api.json` snapshot (models.dev), converted to bitcode @@ -42,39 +46,68 @@ use llm_coding_tools_core::models::{ ProviderSource, ProviderType, }; +/// Serializable cache representation of the models.dev catalog. #[derive(Debug, Clone, PartialEq, bitcode::Encode, bitcode::Decode)] pub(crate) struct CatalogCachePayload { + /// Provider rows in catalog order. pub(crate) providers: Vec, + /// Model rows that reference providers by index. pub(crate) models: Vec, } +/// Serializable provider row stored in the cache payload. #[derive(Debug, Clone, PartialEq, Eq, bitcode::Encode, bitcode::Decode)] pub(crate) struct CachedProviderRow { + /// Stable provider lookup key. pub(crate) provider_key: String, + /// Base API URL for requests to this provider. pub(crate) api_url: String, + /// Environment variables that can supply credentials. pub(crate) env_vars: Vec, + /// Provider protocol or API shape. pub(crate) api_type: ProviderType, } +/// Serializable model row stored in the cache payload. #[derive(Debug, Clone, PartialEq, bitcode::Encode, bitcode::Decode)] pub(crate) struct CachedModelRow { + /// Index into [`CatalogCachePayload::providers`]. pub(crate) provider_idx: ProviderIdx, + /// Stable model lookup key within the provider. pub(crate) model_key: String, + /// Serialized [`Modality`] bitflags. pub(crate) modalities_bits: u8, + /// Maximum supported input tokens. pub(crate) max_input: u32, + /// Maximum supported output tokens. pub(crate) max_output: u32, + /// Optional default temperature. pub(crate) temperature: Option, + /// Optional default top-p value. pub(crate) top_p: Option, } +/// Encodes a cache payload into bitcode bytes. pub(crate) fn encode_cache_payload(payload: &CatalogCachePayload) -> Vec { bitcode::encode(payload) } +/// Decodes bitcode bytes into an owned cache payload. +/// +/// # Errors +/// +/// Returns [`CatalogError::BitcodeDecode`] when the bytes are not a valid cache +/// payload encoding. pub(crate) fn decode_cache_payload(bytes: &[u8]) -> CatalogResult { bitcode::decode(bytes).map_err(|error| CatalogError::BitcodeDecode(error.to_string())) } +/// Rebuilds a [`ModelCatalog`] from decoded cache rows. +/// +/// # Errors +/// +/// Returns [`CatalogError`] when any cached row data cannot be used to build a +/// valid catalog, such as when a model references an out-of-range provider. pub(crate) fn catalog_from_cache_payload( payload: CatalogCachePayload, ) -> CatalogResult { diff --git a/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs index 23869d30..338f6ee5 100644 --- a/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs +++ b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs @@ -1,8 +1,21 @@ +//! Loading a model catalog from cached on-disk data. +//! +//! This module handles the offline half of catalog loading: it decompresses the +//! stored payload, decodes the serialized rows, and rebuilds a +//! [`ModelCatalog`](llm_coding_tools_core::models::ModelCatalog). + use crate::cache::format::CacheFileData; use crate::cache::payload::{catalog_from_cache_payload, decode_cache_payload}; use crate::catalog::load_result::{CatalogLoadResult, CatalogLoadSource}; use crate::error::{CatalogError, CatalogResult}; +/// Decompresses cache file data and rebuilds a catalog from it. +/// +/// # Errors +/// +/// Returns [`CatalogError`] when zstd decompression fails, the decompressed +/// length does not match the cache metadata, the serialized payload cannot be +/// decoded, or catalog reconstruction fails. pub(crate) fn load_catalog_from_cache_file_data( cache_file: &CacheFileData, source: CatalogLoadSource, diff --git a/src/llm-coding-tools-models-dev/src/catalog/sync.rs b/src/llm-coding-tools-models-dev/src/catalog/sync.rs index 199f1cce..bacc8a57 100644 --- a/src/llm-coding-tools-models-dev/src/catalog/sync.rs +++ b/src/llm-coding-tools-models-dev/src/catalog/sync.rs @@ -1,3 +1,11 @@ +//! Catalog synchronization against the remote models.dev API. +//! +//! This module owns the online-first load path used by +//! [`ModelsDevCatalog`](crate::catalog::ModelsDevCatalog). It reads any cached +//! container in one shot, sends a conditional request with the cached ETag when +//! available, refreshes the cache on `200 OK`, reuses it on `304 Not Modified`, +//! and falls back to cached data when the request fails. + use crate::api::catalog_sources::cache_payload_from_api_json_bytes; use crate::cache::format::{read_cache_file, write_cache_file, CacheFileData, CacheWriteInput}; use crate::cache::payload::{catalog_from_cache_payload, encode_cache_payload}; @@ -10,16 +18,19 @@ use std::borrow::Cow; use std::io::ErrorKind; use std::path::Path; +/// Default production endpoint for the models.dev catalog snapshot. const MODELS_DEV_API_URL: &str = "https://models.dev/api.json"; #[cfg(test)] static TEST_MODELS_DEV_API_URL: std::sync::Mutex> = std::sync::Mutex::new(None); #[cfg(test)] +/// Overrides the remote catalog URL for sync tests. pub(crate) fn set_test_models_dev_api_url(url: Option) { *TEST_MODELS_DEV_API_URL.lock().unwrap() = url; } +/// Returns the active catalog endpoint, including the test override when set. fn models_dev_api_url() -> Cow<'static, str> { #[cfg(test)] if let Some(url) = TEST_MODELS_DEV_API_URL.lock().unwrap().clone() { @@ -29,6 +40,10 @@ fn models_dev_api_url() -> Cow<'static, str> { Cow::Borrowed(MODELS_DEV_API_URL) } +/// Resolves the result to return after a request failure. +/// +/// Cached data takes precedence over surfacing the request error so callers can +/// continue with the last known-good catalog when possible. fn load_after_request_failure( request_error: reqwest::Error, cache_file: Option<&CacheFileData>, @@ -46,12 +61,40 @@ fn load_after_request_failure( } #[maybe_async::maybe_async] +/// Loads the catalog at `path` using the default models.dev endpoint. +/// +/// # Errors +/// +/// Returns the same errors as [`load_catalog_from_url`] while targeting the +/// default production URL. pub(crate) async fn load_catalog_at_path(path: &Path) -> CatalogResult { let url = models_dev_api_url(); load_catalog_from_url(path, url.as_ref()).await } #[maybe_async::maybe_async] +/// Synchronizes the cache at `path` against `url` and returns a catalog. +/// +/// The sync flow is: +/// - read any existing cache file in one whole-file read +/// - send `If-None-Match` when the cache includes an ETag +/// - on `200 OK`, decode the response and rewrite the cache +/// - on `304 Not Modified`, load the existing cache +/// - on request failure, fall back to cache when available +/// +/// # Performance +/// +/// Cache probing performs one up-front whole-file read through +/// [`read_cache_file`]. models.dev changes infrequently, so cache hits are +/// expected to be common, and [`crate::cache::payload`] documents typical +/// compressed payload sizes of about 23-32 kB. That makes a single sequential +/// read generally the faster hot path on modern NVMe-backed systems. +/// +/// # Errors +/// +/// Returns [`CatalogError`] when cache I/O fails without a usable fallback, +/// response data cannot be decoded, the cache cannot be written, or the server +/// responds with an unexpected status. pub(crate) async fn load_catalog_from_url( path: &Path, url: &str, From 82e7695474da7d9e21b146854f3694e2a82a1052 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Sun, 8 Mar 2026 23:26:10 +0000 Subject: [PATCH 11/22] Changed: Finalize Slice 10 docs and agent model integration Refresh the workspace docs and restore agent-side model parsing so the public guidance matches the current API surface and workflow behavior. Changes: - updated the root README with all workspace crates and current setup guidance - expanded the agents and models-dev READMEs around catalog integration - restored `AgentConfig::model_parts()` with coverage for valid and invalid input - corrected the workflow note for models-dev runtime support - kept rustfmt-only cleanup in models-dev cache loading tests Benefits: - makes the workspace easier to adopt without stale examples or missing docs - keeps `provider/model` parsing close to the agent config that owns it --- .github/workflows/rust.yml | 2 +- README.MD | 70 ++++++++++--------- src/llm-coding-tools-agents/README.md | 36 +++++++--- .../src/types/config.rs | 60 ++++++++++++++++ src/llm-coding-tools-models-dev/README.md | 67 +++++++++++++++--- .../src/catalog/load_cache.rs | 3 +- .../src/catalog/sync.rs | 11 ++- src/llm-coding-tools-models-dev/src/lib.rs | 5 +- 8 files changed, 192 insertions(+), 62 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index ee44bdc9..37f6b714 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -58,7 +58,7 @@ jobs: echo "Running semver checks for ${CRATE}..." # Note: llm-coding-tools-core has mutually exclusive async/blocking features, # so we must use --only-explicit-features to avoid enabling all features. - # The serdesai and models-dev crates are async-only and don't have the tokio feature. + # llm-coding-tools-serdesai is async-only. models-dev supports both tokio and blocking. if [ "${CRATE}" = "llm-coding-tools-core" ]; then cargo +stable semver-checks -p "${CRATE}" --target ${{ matrix.target }} --only-explicit-features --features tokio else diff --git a/README.MD b/README.MD index 2d728aac..303acd67 100644 --- a/README.MD +++ b/README.MD @@ -1,18 +1,30 @@ # llm-coding-tools [![Crates.io - llm-coding-tools-core](https://img.shields.io/crates/v/llm-coding-tools-core.svg)](https://crates.io/crates/llm-coding-tools-core) +[![Crates.io - llm-coding-tools-agents](https://img.shields.io/crates/v/llm-coding-tools-agents.svg)](https://crates.io/crates/llm-coding-tools-agents) [![Crates.io - llm-coding-tools-serdesai](https://img.shields.io/crates/v/llm-coding-tools-serdesai.svg)](https://crates.io/crates/llm-coding-tools-serdesai) +[![Crates.io - llm-coding-tools-models-dev](https://img.shields.io/crates/v/llm-coding-tools-models-dev.svg)](https://crates.io/crates/llm-coding-tools-models-dev) [![Docs.rs](https://docs.rs/llm-coding-tools-serdesai/badge.svg)](https://docs.rs/llm-coding-tools-serdesai) [![CI](https://github.com/Sewer56/llm-coding-tools/actions/workflows/rust.yml/badge.svg)](https://github.com/Sewer56/llm-coding-tools/actions) -Lightweight, high-performance coding tool implementations for LLM-powered development agents. Plug and play into your favourite frameworks. +Lightweight, heavily optimized coding tool implementations for LLM-powered +development agents. + +Suitable for server use (<3 MiB), or as building blocks for your own TUI coding agent. ## About This Workspace -This workspace contains multiple Rust crates for integrating coding tools with LLM agents: +This workspace contains multiple Rust crates for integrating coding tools with +LLM agents: -- **[llm-coding-tools-core](./src/llm-coding-tools-core/)**: Framework-agnostic core operations and utilities -- **[llm-coding-tools-serdesai](./src/llm-coding-tools-serdesai/)**: serdesAI framework-specific Tool implementations +- **[llm-coding-tools-core](./src/llm-coding-tools-core/)**: + Framework-agnostic core operations and utilities +- **[llm-coding-tools-agents](./src/llm-coding-tools-agents/)**: + OpenCode agent markdown loader and typed catalogue +- **[llm-coding-tools-serdesai](./src/llm-coding-tools-serdesai/)**: + serdesAI framework-specific Tool implementations +- **[llm-coding-tools-models-dev](./src/llm-coding-tools-models-dev/)**: + models.dev catalog sync with cached fallback and ETag refresh ## Features @@ -22,6 +34,9 @@ This workspace contains multiple Rust crates for integrating coding tools with L - **Web**: URL fetching with HTML-to-markdown conversion - **Path Security**: Choose between unrestricted or sandboxed file access - **Context Strings**: Embedded LLM guidance for tool usage +- **Agent Loading**: Parse OpenCode-compatible agent markdown into typed configs +- **Model Catalog Sync**: Download and cache the models.dev catalog for + provider/model lookups ## Feature Flags (llm-coding-tools-core) @@ -30,41 +45,24 @@ This workspace contains multiple Rust crates for integrating coding tools with L ## Quick Start -Add to your `Cargo.toml`: +Pick the crate that matches your use case: ```toml [dependencies] -llm-coding-tools-serdesai = "0.1" +llm-coding-tools-core = "0.2" # Framework-agnostic tool implementations +llm-coding-tools-agents = "0.1" # OpenCode agent markdown loader +llm-coding-tools-models-dev = "0.1" # models.dev catalog sync and cache +llm-coding-tools-serdesai = "0.2" # serdesAI integration ``` -```rust,no_run -use llm_coding_tools_serdesai::{AgentBuilder, BashTool, TodoTools}; -use llm_coding_tools_serdesai::absolute::{ReadTool, WriteTool, EditTool, GlobTool, GrepTool}; - -let mut builder = AgentBuilder::new(); -let todos = TodoTools::new(); - -builder - .track(ReadTool::::new()) - .track(WriteTool::new()) - .track(EditTool::::new()) - .track(GlobTool::new()) - .track(GrepTool::::new()) - .track(BashTool::new()) - .track(&todos.read) - .track(&todos.write); - -let mut agent = builder.build(); - -// Use the agent -// let response = agent.invoke("List all files").await?; -``` +For a runnable agent setup, start with `llm-coding-tools-serdesai` and the +examples below. ## Examples ```bash # serdesAI framework - Basic agent setup -cargo run --example serdesai-agents -p llm-coding-tools-serdesai +cargo run --example serdesai-basic -p llm-coding-tools-serdesai # serdesAI framework - Sandboxed file access cargo run --example serdesai-sandboxed -p llm-coding-tools-serdesai @@ -73,18 +71,26 @@ cargo run --example serdesai-sandboxed -p llm-coding-tools-serdesai ## Documentation - [llm-coding-tools-core README](./src/llm-coding-tools-core/README.md) +- [llm-coding-tools-agents README](./src/llm-coding-tools-agents/README.md) - [llm-coding-tools-serdesai README](./src/llm-coding-tools-serdesai/README.md) +- [llm-coding-tools-models-dev README](./src/llm-coding-tools-models-dev/README.md) - [Developer Guidelines](./src/AGENTS.md) ## Contributing -Contributions are welcome! Please ensure all tests pass and the code follows our guidelines. +Contributions are welcome! Please ensure all tests pass and the code follows +our guidelines. ## Deprecation Notice -**Rig framework support (`llm-coding-tools-rig`) has been removed** (commit 17158db) due to library bugs that prevented examples from running reliably. +**Rig framework support (`llm-coding-tools-rig`) has been removed** +(commit 17158db) due to library bugs that prevented examples from running +reliably. -You're welcome to submit a PR re-adding rig support if you're willing to maintain it. Since I don't use rig personally, I'm not able to actively maintain that integration. Alternatively, you can create your own crate building on `llm-coding-tools-core` directly. +You're welcome to submit a PR re-adding rig support if you're willing to +maintain it. Since I don't use rig personally, I'm not able to actively +maintain that integration. Alternatively, you can create your own crate +building on `llm-coding-tools-core` directly. ## License diff --git a/src/llm-coding-tools-agents/README.md b/src/llm-coding-tools-agents/README.md index b17ce11d..e6d6bb85 100644 --- a/src/llm-coding-tools-agents/README.md +++ b/src/llm-coding-tools-agents/README.md @@ -4,13 +4,16 @@ Load OpenCode agent markdown files into a typed Rust catalogue. This crate is a loader for the [OpenCode agent schema](https://opencode.ai/docs/agents/). -It is a drop-in replacement for OpenCode agent files: agents you create for OpenCode should load here unchanged. +It is a drop-in replacement for OpenCode agent files: agents you create for +OpenCode should load here unchanged. ## What it provides -- [`AgentLoader`] for loading agent configs from directories, files, or in-memory markdown. +- [`AgentLoader`] for loading agent configs from directories, files, or + in-memory markdown. - [`AgentCatalog`] for storing and looking up loaded [`AgentConfig`] entries. -- [`RulesetExt`] for converting frontmatter `permission` data into runtime [`Ruleset`]s. +- [`RulesetExt`] for converting frontmatter `permission` data into runtime + [`Ruleset`]s. ## Quick start @@ -43,19 +46,34 @@ permission: Prompt body here... ``` -For field behaviour, see OpenCode docs for [`mode`](https://opencode.ai/docs/agents#mode), [`model`](https://opencode.ai/docs/agents#model), and [`permissions`](https://opencode.ai/docs/agents#permissions). +For field behaviour, see OpenCode docs for +[`mode`](https://opencode.ai/docs/agents#mode), +[`model`](https://opencode.ai/docs/agents#model), and +[`permissions`](https://opencode.ai/docs/agents#permissions). ## Compatibility notes -This library does not provide interactive UX extensions (for example, TUI approval flows). -To avoid false expectations, settings that require interaction are rejected, while settings with no runtime effect are accepted and ignored: +This library does not provide interactive UX extensions (for example, TUI +approval flows). -- [`permission.task`](https://opencode.ai/docs/agents#task-permissions): `ask` is rejected with a schema validation error (`allow`/`deny` only), because `ask` is an interactive approval mode in OpenCode ([docs](https://opencode.ai/docs/permissions#what-ask-does)). -- [`hidden`](https://opencode.ai/docs/agents#hidden) is accepted for compatibility, but ignored at runtime. +To avoid false expectations, settings that require interaction are rejected, +while settings with no runtime effect are accepted and ignored: + +- [`permission.task`](https://opencode.ai/docs/agents#task-permissions): + `ask` is rejected with a schema validation error (`allow`/`deny` only), + because `ask` is an interactive approval mode in OpenCode + ([docs](https://opencode.ai/docs/permissions#what-ask-does)). +- [`hidden`](https://opencode.ai/docs/agents#hidden) is accepted for + compatibility, but ignored at runtime. ## Integration This crate only loads and validates agent configs. -Pass [`AgentCatalog`] to your runtime adapter (for example, `llm-coding-tools-serdesai`) to build registries and Task tooling. +Pass [`AgentCatalog`] to your runtime adapter (for example, +`llm-coding-tools-serdesai`) to build registries and Task tooling. + +If you want to validate `model` strings against a catalog, call +[`AgentConfig::model_parts`] and pass the returned `(provider, model)` into +your lookup layer. [`Ruleset`]: llm_coding_tools_core::permissions::Ruleset diff --git a/src/llm-coding-tools-agents/src/types/config.rs b/src/llm-coding-tools-agents/src/types/config.rs index 78010529..1d4a74ab 100644 --- a/src/llm-coding-tools-agents/src/types/config.rs +++ b/src/llm-coding-tools-agents/src/types/config.rs @@ -108,6 +108,8 @@ pub struct AgentConfig { #[serde(default)] pub description: String, /// Optional model override (format: "provider/model-id"). + /// + /// Use [`AgentConfig::model_parts`] before catalog lookup. #[serde(default)] pub model: Option, /// Legacy visibility flag accepted for compatibility only. @@ -136,6 +138,18 @@ pub struct AgentConfig { } impl AgentConfig { + /// Returns the configured model split into `(provider, model)` parts. + #[inline] + pub fn model_parts(&self) -> Option<(&str, &str)> { + let value = self.model.as_deref()?; + let (provider, model) = value.split_once('/')?; + if provider.is_empty() || model.is_empty() { + return None; + } + + Some((provider, model)) + } + /// Creates an [`AgentConfig`] from raw frontmatter and parsed prompt body. pub(crate) fn from_raw(default_name: String, raw: RawFrontmatter, prompt: String) -> Self { Self { @@ -152,3 +166,49 @@ impl AgentConfig { } } } + +#[cfg(test)] +mod tests { + use super::{AgentConfig, AgentMode}; + use ahash::AHashMap; + use indexmap::IndexMap; + + fn config_with_model(model: Option<&str>) -> AgentConfig { + AgentConfig { + name: "example".to_string(), + mode: AgentMode::All, + description: String::new(), + model: model.map(str::to_string), + hidden: false, + temperature: None, + top_p: None, + permission: IndexMap::new(), + options: AHashMap::new(), + prompt: String::new(), + } + } + + #[test] + fn model_parts_returns_provider_and_model() { + let config = config_with_model(Some("synthetic/hf:moonshotai/Kimi-K2.5")); + + assert_eq!( + config.model_parts(), + Some(("synthetic", "hf:moonshotai/Kimi-K2.5")) + ); + } + + #[test] + fn model_parts_rejects_missing_separator() { + let config = config_with_model(Some("synthetic-only")); + + assert_eq!(config.model_parts(), None); + } + + #[test] + fn model_parts_handles_absent_model() { + let config = config_with_model(None); + + assert_eq!(config.model_parts(), None); + } +} diff --git a/src/llm-coding-tools-models-dev/README.md b/src/llm-coding-tools-models-dev/README.md index 9a31b444..cf2e0d7a 100644 --- a/src/llm-coding-tools-models-dev/README.md +++ b/src/llm-coding-tools-models-dev/README.md @@ -8,8 +8,8 @@ for a cached fallback and caching via ETag(s). If you run coding agents against many providers, you want to have fresh data. [models.dev][models.dev] is one such source of data. -This crate has sufficient code to download from models.dev, distill down only -the relevant data we need; and create a llm_coding_tools_core `ModelCatalog`. +This crate downloads from models.dev, keeps only the fields we need, and +builds a `llm_coding_tools_core::models::ModelCatalog`. ## Usage @@ -18,8 +18,10 @@ the relevant data we need; and create a llm_coding_tools_core `ModelCatalog`. 1. Read cache header (if present) and get the old ETag. 2. Send request to models.dev with `If-None-Match` when ETag exists. 3. If server returns `304 Not Modified`, load catalog from cache. -4. If server returns `200 OK`, parse JSON, map it into catalog sources, write fresh cache, then build catalog. -5. If network fails, try cached data as fallback; if no valid cache exists, return an error. +4. If server returns `200 OK`, parse JSON, map it into catalog sources, + write fresh cache, then build catalog. +5. If network fails, try cached data as fallback; if no valid cache exists, + return an error. ### Non-blocking (`tokio`) @@ -31,9 +33,15 @@ async fn load_catalog() -> Result<(), Box> { let result = ModelsDevCatalog::load().await?; match result.source { - CatalogLoadSource::Downloaded => println!("Downloaded fresh catalog data."), - CatalogLoadSource::NotModifiedCache => println!("Cache is already up to date."), - CatalogLoadSource::FallbackCache => println!("Network unavailable, using cached catalog data."), + CatalogLoadSource::Downloaded => { + println!("Downloaded fresh catalog data.") + } + CatalogLoadSource::NotModifiedCache => { + println!("Cache is already up to date.") + } + CatalogLoadSource::FallbackCache => { + println!("Network unavailable, using cached catalog data.") + } } if let Some(entry) = result.catalog.lookup("openai", "gpt-4") { @@ -55,9 +63,15 @@ fn load_catalog() -> Result<(), Box> { let result = ModelsDevCatalog::load()?; match result.source { - CatalogLoadSource::Downloaded => println!("Downloaded fresh catalog data."), - CatalogLoadSource::NotModifiedCache => println!("Cache is already up to date."), - CatalogLoadSource::FallbackCache => println!("Network unavailable, using cached catalog data."), + CatalogLoadSource::Downloaded => { + println!("Downloaded fresh catalog data.") + } + CatalogLoadSource::NotModifiedCache => { + println!("Cache is already up to date.") + } + CatalogLoadSource::FallbackCache => { + println!("Network unavailable, using cached catalog data.") + } } if let Some(entry) = result.catalog.lookup("openai", "gpt-4") { @@ -69,6 +83,39 @@ fn load_catalog() -> Result<(), Box> { } ``` +### Load from a custom cache path + +```rust +use llm_coding_tools_models_dev::ModelsDevCatalog; +use std::path::PathBuf; + +#[cfg(feature = "tokio")] +async fn load_catalog() -> Result<(), Box> { + let cache_path = PathBuf::from("/tmp/models-dev.cache"); + let _result = ModelsDevCatalog::load_at(&cache_path).await?; + Ok(()) +} + +#[cfg(feature = "blocking")] +fn load_catalog() -> Result<(), Box> { + let cache_path = PathBuf::from("/tmp/models-dev.cache"); + let _result = ModelsDevCatalog::load_at(&cache_path)?; + Ok(()) +} +``` + +### Resolve the shared cache path + +```rust +use llm_coding_tools_models_dev::shared_cache_path; + +fn print_cache_path() -> Result<(), Box> { + let path = shared_cache_path()?; + println!("{}", path.display()); + Ok(()) +} +``` + ## Cache location By default, cache is stored in the platform cache directory: diff --git a/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs index 338f6ee5..8e807411 100644 --- a/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs +++ b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs @@ -71,8 +71,7 @@ mod tests { let payload = sample_payload(); let encoded = encode_cache_payload(&payload); - let compressed = - zstd::bulk::compress(&encoded, 1).expect("compress"); + let compressed = zstd::bulk::compress(&encoded, 1).expect("compress"); write_cache_file( &path, diff --git a/src/llm-coding-tools-models-dev/src/catalog/sync.rs b/src/llm-coding-tools-models-dev/src/catalog/sync.rs index bacc8a57..26d9005e 100644 --- a/src/llm-coding-tools-models-dev/src/catalog/sync.rs +++ b/src/llm-coding-tools-models-dev/src/catalog/sync.rs @@ -133,9 +133,8 @@ pub(crate) async fn load_catalog_from_url( let payload = cache_payload_from_api_json_bytes(body.as_ref())?; let payload_encoded = encode_cache_payload(&payload); let catalog = catalog_from_cache_payload(payload)?; - let payload_compressed = - zstd::bulk::compress(payload_encoded.as_slice(), 17) - .map_err(|error| CatalogError::Zstd(error.to_string()))?; + let payload_compressed = zstd::bulk::compress(payload_encoded.as_slice(), 17) + .map_err(|error| CatalogError::Zstd(error.to_string()))?; write_cache_file( path, @@ -239,8 +238,7 @@ mod tests { }], }; let encoded = encode_cache_payload(&payload); - let compressed = - zstd::bulk::compress(&encoded, 1).expect("compress"); + let compressed = zstd::bulk::compress(&encoded, 1).expect("compress"); // Write the seeded cache file with ETag crate::cache::format::write_cache_file( @@ -302,8 +300,7 @@ mod tests { }], }; let encoded = encode_cache_payload(&payload); - let compressed = - zstd::bulk::compress(&encoded, 1).expect("compress"); + let compressed = zstd::bulk::compress(&encoded, 1).expect("compress"); crate::cache::format::write_cache_file( &cache_path, &CacheWriteInput { diff --git a/src/llm-coding-tools-models-dev/src/lib.rs b/src/llm-coding-tools-models-dev/src/lib.rs index 1b4eedae..29e3bb49 100644 --- a/src/llm-coding-tools-models-dev/src/lib.rs +++ b/src/llm-coding-tools-models-dev/src/lib.rs @@ -5,7 +5,10 @@ compile_error!("Features `async` and `blocking` are mutually exclusive."); #[cfg(not(any(feature = "async", feature = "blocking")))] -compile_error!("Either an async runtime (e.g., `tokio`) or `blocking` feature must be enabled."); +compile_error!(concat!( + "Either an async runtime (e.g., `tokio`) or `blocking` feature ", + "must be enabled." +)); #[allow(dead_code)] // Wired into catalog build/load slices mod api; From 5aa4f5e097570acf8632b01a7e67b22be05eda91 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Sun, 8 Mar 2026 23:38:15 +0000 Subject: [PATCH 12/22] Changed: Clean up dead code directives and organize test-only code Remove unnecessary dead_code annotations and move test-only code into test modules. Changes: - Removed `#[allow(dead_code)]` from actually-used hash64/hash63 methods - Removed `#[allow(dead_code)]` from used api and format modules - Moved test-only `catalog_from_api_json_bytes` into tests module - Moved test-only imports into tests module - Added `#[allow(dead_code)]` with "public API" comment for `payload_len_compressed` Benefits: - Cleaner codebase without misleading dead_code suppressions - Better code organization with test code properly isolated - Accurate documentation of intentional public API surface --- .../src/internal/hash63.rs | 2 -- .../src/internal/hash64.rs | 4 ---- .../src/api/catalog_sources.rs | 23 ++++++++----------- .../src/cache/format.rs | 1 + .../src/cache/mod.rs | 1 - src/llm-coding-tools-models-dev/src/lib.rs | 1 - 6 files changed, 10 insertions(+), 22 deletions(-) diff --git a/src/llm-coding-tools-core/src/internal/hash63.rs b/src/llm-coding-tools-core/src/internal/hash63.rs index 56df018f..a81934bc 100644 --- a/src/llm-coding-tools-core/src/internal/hash63.rs +++ b/src/llm-coding-tools-core/src/internal/hash63.rs @@ -18,14 +18,12 @@ impl Hash63 { /// /// The caller is responsible for ensuring bit 63 is 0. #[inline] - #[allow(dead_code)] // internal public API pub(crate) const fn from_u64(value: u64) -> Self { Self(value) } /// Returns the underlying u64 value. #[inline] - #[allow(dead_code)] // internal public API pub(crate) const fn as_u64(&self) -> u64 { self.0 } diff --git a/src/llm-coding-tools-core/src/internal/hash64.rs b/src/llm-coding-tools-core/src/internal/hash64.rs index e880c044..6c96ea78 100644 --- a/src/llm-coding-tools-core/src/internal/hash64.rs +++ b/src/llm-coding-tools-core/src/internal/hash64.rs @@ -11,14 +11,12 @@ pub(crate) struct Hash64(u64); impl Hash64 { /// Creates a new Hash64 from a raw u64 value. #[inline] - #[allow(dead_code)] // internal public API pub(crate) fn from_u64(value: u64) -> Self { Self(value) } /// Returns the underlying u64 value. #[inline] - #[allow(dead_code)] // internal public API pub(crate) fn as_u64(&self) -> u64 { self.0 } @@ -26,14 +24,12 @@ impl Hash64 { /// Hashes a string to Hash64 using ahash64. #[inline(always)] -#[allow(dead_code)] // internal public API pub(crate) fn hash_u64(s: &str) -> Hash64 { hash_u64_bytes(s.as_bytes()) } /// Hashes raw bytes to Hash64 using ahash64. #[inline(always)] -#[allow(dead_code)] // internal public API pub(crate) fn hash_u64_bytes(bytes: &[u8]) -> Hash64 { Hash64(ahash::RandomState::with_seed(0xDEAD_CAFE).hash_one(bytes)) } diff --git a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs index 2a3c1ee7..e8b53ccf 100644 --- a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs +++ b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs @@ -14,12 +14,10 @@ //! core during catalog build. use super::schema::{parse_api_json, ApiModelEntry, ApiModelLimit, ApiModelModalities}; -use crate::cache::payload::{ - catalog_from_cache_payload, CachedModelRow, CachedProviderRow, CatalogCachePayload, -}; +use crate::cache::payload::{CachedModelRow, CachedProviderRow, CatalogCachePayload}; use crate::error::{CatalogError, CatalogResult}; use llm_coding_tools_core::models::{ - Modality, ModelCatalog, ModelCatalogBuildError, ModelInfo, ProviderIdx, ProviderType, + Modality, ModelCatalogBuildError, ModelInfo, ProviderIdx, ProviderType, }; pub(crate) fn cache_payload_from_api_json_bytes( @@ -73,12 +71,6 @@ pub(crate) fn cache_payload_from_api_json_bytes( Ok(CatalogCachePayload { providers, models }) } -/// Parses models.dev `api.json` bytes and builds a [`ModelCatalog`]. -pub(crate) fn catalog_from_api_json_bytes(json_bytes: &[u8]) -> CatalogResult { - let payload = cache_payload_from_api_json_bytes(json_bytes)?; - catalog_from_cache_payload(payload) -} - #[inline] fn model_info_from_entry(model_entry: &ApiModelEntry) -> ModelInfo { let (max_input, max_output) = match model_entry.limit.as_ref() { @@ -168,12 +160,15 @@ fn provider_type_from_models_dev_npm(npm_package: Option<&str>) -> ProviderType #[cfg(test)] mod tests { - use super::{ - cache_payload_from_api_json_bytes, catalog_from_api_json_bytes, - provider_type_from_models_dev_npm, - }; + use super::{cache_payload_from_api_json_bytes, provider_type_from_models_dev_npm}; + use crate::cache::payload::catalog_from_cache_payload; use llm_coding_tools_core::models::{Modality, ModelCatalog, ProviderIdx, ProviderType}; + fn catalog_from_api_json_bytes(json_bytes: &[u8]) -> crate::error::CatalogResult { + let payload = cache_payload_from_api_json_bytes(json_bytes)?; + catalog_from_cache_payload(payload) + } + fn catalog(json: &[u8]) -> ModelCatalog { catalog_from_api_json_bytes(json).expect("API payload should map") } diff --git a/src/llm-coding-tools-models-dev/src/cache/format.rs b/src/llm-coding-tools-models-dev/src/cache/format.rs index e131810b..1e1db3f1 100644 --- a/src/llm-coding-tools-models-dev/src/cache/format.rs +++ b/src/llm-coding-tools-models-dev/src/cache/format.rs @@ -116,6 +116,7 @@ impl CacheFileData { } /// Returns compressed payload length in bytes. + #[allow(dead_code)] // public API #[inline] pub(crate) fn payload_len_compressed(&self) -> u32 { self.payload_len_compressed diff --git a/src/llm-coding-tools-models-dev/src/cache/mod.rs b/src/llm-coding-tools-models-dev/src/cache/mod.rs index 695f7660..43af19b8 100644 --- a/src/llm-coding-tools-models-dev/src/cache/mod.rs +++ b/src/llm-coding-tools-models-dev/src/cache/mod.rs @@ -12,7 +12,6 @@ //! The public API currently exposes path resolution only; container helpers are //! crate-internal until the sync/load flow is wired. -#[allow(dead_code)] // Wired into the load/sync path down the road pub(crate) mod format; mod path; pub(crate) mod payload; diff --git a/src/llm-coding-tools-models-dev/src/lib.rs b/src/llm-coding-tools-models-dev/src/lib.rs index 29e3bb49..60fef51e 100644 --- a/src/llm-coding-tools-models-dev/src/lib.rs +++ b/src/llm-coding-tools-models-dev/src/lib.rs @@ -10,7 +10,6 @@ compile_error!(concat!( "must be enabled." )); -#[allow(dead_code)] // Wired into catalog build/load slices mod api; pub mod cache; pub mod catalog; From 7e42c9c56aa41a242f5c70370a7db386c396eaa2 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Mon, 9 Mar 2026 00:06:11 +0000 Subject: [PATCH 13/22] Fixed: Resolve intra-doc link errors in models-dev crate CI was failing due to rustdoc treating warnings as errors with RUSTDOCFLAGS="-D warnings". The local verify scripts did not catch these because they weren't documenting private modules. Changes: - Fixed unresolved link to ModelCatalog in catalog_sources.rs using full path - Fixed redundant explicit link target in payload.rs (ModelCatalog is in scope) - Added --document-private-items to verify.sh and verify.ps1 Benefits: - Local verify scripts now match CI doc validation behavior - Catches intra-doc link errors before pushing --- src/.cargo/verify.ps1 | 2 +- src/.cargo/verify.sh | 2 +- src/llm-coding-tools-models-dev/src/api/catalog_sources.rs | 2 +- src/llm-coding-tools-models-dev/src/cache/payload.rs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/.cargo/verify.ps1 b/src/.cargo/verify.ps1 index 912389f1..481ad21b 100644 --- a/src/.cargo/verify.ps1 +++ b/src/.cargo/verify.ps1 @@ -57,7 +57,7 @@ Write-Host "Docs..." $originalRustdocFlags = $env:RUSTDOCFLAGS $env:RUSTDOCFLAGS = "-D warnings" try { - Invoke-LoggedCommand "cargo" @("doc", "--workspace", "--no-deps", "--quiet") + Invoke-LoggedCommand "cargo" @("doc", "--workspace", "--document-private-items", "--no-deps", "--quiet") } finally { $env:RUSTDOCFLAGS = $originalRustdocFlags } diff --git a/src/.cargo/verify.sh b/src/.cargo/verify.sh index eefd0e0b..36ff4728 100755 --- a/src/.cargo/verify.sh +++ b/src/.cargo/verify.sh @@ -43,7 +43,7 @@ run_cmd cargo test -p llm-coding-tools-core --no-default-features --features blo run_cmd cargo test -p llm-coding-tools-models-dev --no-default-features --features blocking --quiet echo "Docs..." -run_cmd env RUSTDOCFLAGS="-D warnings" cargo doc --workspace --no-deps --quiet +run_cmd env RUSTDOCFLAGS="-D warnings" cargo doc --workspace --document-private-items --no-deps --quiet echo "Formatting..." run_cmd cargo fmt --all --check --quiet diff --git a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs index e8b53ccf..0f012ef3 100644 --- a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs +++ b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs @@ -1,7 +1,7 @@ //! models.dev API -> `ModelCatalog` mapping. //! //! This module parses models.dev `api.json`, maps provider/model metadata into -//! transient core builder inputs, and immediately constructs a [`ModelCatalog`]. +//! transient core builder inputs, and immediately constructs a [`ModelCatalog`](llm_coding_tools_core::models::ModelCatalog). //! //! Mapping policy: //! - missing limits default to `0`; diff --git a/src/llm-coding-tools-models-dev/src/cache/payload.rs b/src/llm-coding-tools-models-dev/src/cache/payload.rs index 37bc5fd7..3a1e65b0 100644 --- a/src/llm-coding-tools-models-dev/src/cache/payload.rs +++ b/src/llm-coding-tools-models-dev/src/cache/payload.rs @@ -1,7 +1,7 @@ //! Cache payload serialization for models.dev catalog data. //! //! The payload is stored as simple owned rows so it can be encoded compactly -//! with bitcode and rebuilt into a [`ModelCatalog`](llm_coding_tools_core::models::ModelCatalog) +//! with bitcode and rebuilt into a [`ModelCatalog`] //! without reparsing the original JSON. //! //! ## Compression Benchmark From 3dccb9b10dd6a5610975bcade31925da8fd40faa Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Mon, 9 Mar 2026 00:16:59 +0000 Subject: [PATCH 14/22] Changed: Document models.dev cache size and load characteristics Add concise README guidance for the cache footprint and hot-path timings so users can estimate runtime costs without reading the implementation. Changes: - Added cache size guidance based on a recent models.dev snapshot - Documented compression, decompression, and ModelCatalog load timings - Noted the single-core Ryzen 9950X3D measurement context Benefits: - Makes cache tradeoffs easier to understand at a glance - Helps consumers estimate disk and load overhead more confidently --- src/llm-coding-tools-models-dev/README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/llm-coding-tools-models-dev/README.md b/src/llm-coding-tools-models-dev/README.md index cf2e0d7a..45698787 100644 --- a/src/llm-coding-tools-models-dev/README.md +++ b/src/llm-coding-tools-models-dev/README.md @@ -126,6 +126,17 @@ By default, cache is stored in the platform cache directory: Set `LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH` to override this path. +## Cache size and performance + +Current ballpark from a recent `models.dev/api.json` snapshot: + +- Size: about `1.31 MiB` JSON -> `109 KiB` serialized payload -> `23.7 KiB` compressed cache +- Compression: about `10.1 ms` with current `zstd` level `17` +- Decompression: about `0.057 ms` (`57 us`) in `--release` +- Cache load into `ModelCatalog`: about `0.31 ms` (`read + decompress + decode + build`) + +Measured on a single core of a Ryzen `9950X3D`; these are rough guidance numbers and will drift as the upstream catalog changes. + ## Feature flags - `tokio` (default): async runtime support. From 3d27fa27c2b62c4242d12992bc0e4c99320e872c Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Mon, 9 Mar 2026 09:19:26 +0000 Subject: [PATCH 15/22] Changed: Expand provider env-var capacity to support models.dev Support providers with up to 7 env vars (e.g., amazon-bedrock with 4) while maintaining 2-byte packed storage and memory efficiency. Changes: - Repacked PackedEnvRange from 14+2 bits to 13+3 bits (start+count) - Max env vars per provider: 3 -> 7 - Max global env-var pool: 16384 -> 8192 - Provider lookup env vars: fixed array -> TinyVec (2 inline slots) - Updated boundary tests for new limits and simplified to provider-only - Benchmarks: max-provider test skips env vars to avoid pool limit Benefits: - Loads current models.dev payload (amazon-bedrock has 4 env vars) - Keeps packed range at 2 bytes with minimal trade-offs - Maintains memory-conscious lookup for common 1-2 env var providers --- src/Cargo.lock | 1 + src/llm-coding-tools-core/Cargo.toml | 1 + .../benches/model_catalog_builder.rs | 16 +++-- .../src/models/catalog/internal/builder.rs | 69 ++++++++++--------- .../catalog/internal/packed_env_range.rs | 20 +++--- .../src/models/catalog/mod.rs | 24 +++---- .../models/catalog/public/builder_types.rs | 2 +- .../src/models/catalog/public/entry.rs | 15 ++-- .../src/models/catalog/public/mod.rs | 1 + 9 files changed, 80 insertions(+), 69 deletions(-) diff --git a/src/Cargo.lock b/src/Cargo.lock index 68757b47..2b708bc7 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -1491,6 +1491,7 @@ dependencies = [ "serde_json", "tempfile", "thiserror 2.0.18", + "tinyvec", "tinyvec_string", "tokio", "wiremock", diff --git a/src/llm-coding-tools-core/Cargo.toml b/src/llm-coding-tools-core/Cargo.toml index b88ce699..3f7ec206 100644 --- a/src/llm-coding-tools-core/Cargo.toml +++ b/src/llm-coding-tools-core/Cargo.toml @@ -54,6 +54,7 @@ hashbrown = "0.16" # Inline string storage for patterns tinyvec_string = { version = "0.3", features = ["alloc"] } +tinyvec = { version = "1.10", features = ["alloc"] } # Efficient immutable string table for provider URLs and env vars lite-strtab = "0.2" diff --git a/src/llm-coding-tools-core/benches/model_catalog_builder.rs b/src/llm-coding-tools-core/benches/model_catalog_builder.rs index d772ec34..cc83be5e 100644 --- a/src/llm-coding-tools-core/benches/model_catalog_builder.rs +++ b/src/llm-coding-tools-core/benches/model_catalog_builder.rs @@ -32,7 +32,7 @@ impl Dataset { } } -fn make_dataset(provider_count: usize, model_count: usize) -> Dataset { +fn make_dataset(provider_count: usize, model_count: usize, with_env_vars: bool) -> Dataset { debug_assert!(provider_count > 0); let mut providers = Vec::with_capacity(provider_count); @@ -41,7 +41,11 @@ fn make_dataset(provider_count: usize, model_count: usize) -> Dataset { format!("provider-{i}"), ProviderInfo { api_url: format!("https://provider-{i}.example/v1"), - env_vars: vec![format!("PROVIDER_{i}_API_KEY")], + env_vars: if with_env_vars { + vec![format!("PROVIDER_{i}_API_KEY")] + } else { + Vec::new() + }, api_type: if (i & 1) == 0 { ProviderType::OpenAiCompletions } else { @@ -99,11 +103,11 @@ fn construct_batch(providers: &[ProviderSource], provider_models: &[ProviderMode fn benchmark_builder_construction(c: &mut Criterion) { let mut group = c.benchmark_group("model_catalog_builder_construct"); - for (name, provider_count, model_count) in [ - ("models_dev_snapshot", 96usize, 3031usize), - ("max", 16384usize, 65535usize), + for (name, provider_count, model_count, with_env_vars) in [ + ("models_dev_snapshot", 96usize, 3031usize, true), + ("max", 16384usize, 65535usize, false), ] { - let dataset = make_dataset(provider_count, model_count); + let dataset = make_dataset(provider_count, model_count, with_env_vars); let provider_model_sources = dataset.provider_model_sources(); group.throughput(Throughput::Elements( (provider_count + dataset.provider_models.len()) as u64, diff --git a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs index d00b333c..db1f59cf 100644 --- a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs +++ b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs @@ -116,7 +116,7 @@ fn populate_tables_once( provider_info.api_type, )?; - // SAFETY: analyze_provider_sources bounds env_start and env_count (<= 3). + // SAFETY: analyze_provider_sources bounds env_start and env_count (<= 7). env_start += u16::from(env_count); } @@ -331,7 +331,7 @@ fn analyze_provider_sources( for provider in providers { // SAFETY: total_env_keys is the start index for this provider. - // It must fit the 14-bit PackedEnvRange start field. + // It must fit the 13-bit PackedEnvRange start field. if total_env_keys > max_env_start { return Err(ModelCatalogBuildError::TooManyEnvVarKeys { count: total_env_keys, @@ -341,7 +341,7 @@ fn analyze_provider_sources( let provider_info = &provider.provider; let env_count = provider_info.env_vars.len(); - // SAFETY: per-provider count must fit the 2-bit count field. + // SAFETY: per-provider count must fit the 3-bit count field. if env_count > max_env_count { return Err( ModelCatalogBuildError::TooManyProviderEnvVarsForOneProvider { @@ -611,7 +611,7 @@ mod tests { "alpha", provider( "https://alpha.example", - &["A", "B", "C", "D"], + &["A", "B", "C", "D", "E", "F", "G", "H"], ProviderType::Azure, ), )]; @@ -626,8 +626,8 @@ mod tests { assert_eq!( err, ModelCatalogBuildError::TooManyProviderEnvVarsForOneProvider { - count: 4, - max: 3, + count: 8, + max: 7, } ); } @@ -685,17 +685,13 @@ mod tests { #[test] fn too_many_total_env_vars_returns_error() { - // 5462 providers * 3 env vars = 16386, so the 5463rd provider would have - // a start index of 16386, which exceeds MAX_ENV_START (16383). - let mut providers = Vec::with_capacity(5463); - for i in 0..5463usize { + // 8192 providers * 1 env var = 8192, so the 8193rd provider would have + // a start index of 8192, which exceeds MAX_ENV_START (8191). + let mut providers = Vec::with_capacity(8193); + for i in 0..8193usize { providers.push(provider_source( &format!("provider_{}", i), - provider( - "https://example.com", - &["VAR1", "VAR2", "VAR3"], - ProviderType::Azure, - ), + provider("https://example.com", &["VAR1"], ProviderType::Azure), )); } let mut provider_models = Vec::with_capacity(1); @@ -710,8 +706,8 @@ mod tests { assert_eq!( err, ModelCatalogBuildError::TooManyEnvVarKeys { - count: 16_386, - max: 16_383, + count: 8_192, + max: 8_191, } ); } @@ -720,32 +716,43 @@ mod tests { } #[test] - fn max_14bit_start_with_tail_entries_succeeds() { - // The last provider's start index can be 16383 and still be valid when it - // contributes 3 keys at indices 16383, 16384, and 16385. - let mut providers = Vec::with_capacity(5462); - for i in 0..5462usize { + fn max_13bit_start_with_tail_entries_succeeds() { + // The last provider's start index can be 8191 and still be valid when it + // contributes keys at indices 8191 through 8197. + let mut providers = Vec::with_capacity(1172); + for i in 0..1170usize { providers.push(provider_source( &format!("provider_{}", i), provider( "https://example.com", - &["VAR1", "VAR2", "VAR3"], + &["VAR1", "VAR2", "VAR3", "VAR4", "VAR5", "VAR6", "VAR7"], ProviderType::Azure, ), )); } - let provider_models = vec![ProviderModelSource::new( - ProviderIdx::new(5461), - "m1", - info(4096, 512), - )]; + providers.push(provider_source( + "provider_1170", + provider("https://example.com", &["VAR1"], ProviderType::Azure), + )); + providers.push(provider_source( + "provider_1171", + provider( + "https://example.com", + &["VAR1", "VAR2", "VAR3", "VAR4", "VAR5", "VAR6", "VAR7"], + ProviderType::Azure, + ), + )); + let provider_models = Vec::new(); let catalog = build_from_source(&providers, &provider_models).expect("boundary case should pass"); - let (provider, _) = catalog - .lookup("provider_5461", "m1") + let provider = catalog + .provider_from_index(ProviderIdx::new(1171)) .expect("last provider should be addressable"); - assert_eq!(provider.env_vars(), &["VAR1", "VAR2", "VAR3"]); + assert_eq!( + provider.env_vars(), + &["VAR1", "VAR2", "VAR3", "VAR4", "VAR5", "VAR6", "VAR7"] + ); } } diff --git a/src/llm-coding-tools-core/src/models/catalog/internal/packed_env_range.rs b/src/llm-coding-tools-core/src/models/catalog/internal/packed_env_range.rs index bf7b982b..6122010f 100644 --- a/src/llm-coding-tools-core/src/models/catalog/internal/packed_env_range.rs +++ b/src/llm-coding-tools-core/src/models/catalog/internal/packed_env_range.rs @@ -1,15 +1,15 @@ //! Packed env-var range entry for provider-to-env-key mapping. //! //! Layout (`u16`): -//! - `14` bits: start index into provider_env_keys StringTable -//! - `2` bits: count of env keys for this provider (0..=3) +//! - `13` bits: start index into provider_env_keys StringTable +//! - `3` bits: count of env keys for this provider (0..=7) use bitfields::bitfield; /// Maximum env-var count per provider representable by PackedEnvRange. -pub const MAX_ENV_RANGE_COUNT: u8 = 3; -/// Maximum start index representable by PackedEnvRange (14 bits). -pub const MAX_ENV_START: u16 = (1u16 << 14) - 1; // 16383 +pub const MAX_ENV_RANGE_COUNT: u8 = 7; +/// Maximum start index representable by PackedEnvRange (13 bits). +pub const MAX_ENV_START: u16 = (1u16 << 13) - 1; // 8191 /// Packed env-var range entry. /// @@ -17,9 +17,9 @@ pub const MAX_ENV_START: u16 = (1u16 << 14) - 1; // 16383 #[bitfield(u16)] #[derive(Clone, Copy, PartialEq, Eq, Hash)] pub struct PackedEnvRange { - #[bits(14)] + #[bits(13)] start: u16, - #[bits(2)] + #[bits(3)] count: u8, } @@ -27,7 +27,7 @@ impl PackedEnvRange { /// Creates one packed env-var range entry. /// /// SAFETY: The `start` parameter is not validated here. The caller must ensure - /// `start` fits within 14 bits (max 16383). This invariant is enforced in + /// `start` fits within 13 bits (max 8191). This invariant is enforced in /// `analyze_provider_sources` before `populate_tables_once` calls this function. #[inline] pub fn from_parts(start: u16, count: u8) -> Self { @@ -56,7 +56,7 @@ mod tests { #[test] fn count_capped_at_max() { - let packed = PackedEnvRange::from_parts(0, 5); - assert_eq!(packed.count(), 3); // capped to MAX_ENV_RANGE_COUNT + let packed = PackedEnvRange::from_parts(0, 8); + assert_eq!(packed.count(), 7); // capped to MAX_ENV_RANGE_COUNT } } diff --git a/src/llm-coding-tools-core/src/models/catalog/mod.rs b/src/llm-coding-tools-core/src/models/catalog/mod.rs index d369d843..79ae717f 100644 --- a/src/llm-coding-tools-core/src/models/catalog/mod.rs +++ b/src/llm-coding-tools-core/src/models/catalog/mod.rs @@ -151,8 +151,8 @@ //! | ------------------------- | ----------: | ------------------------------------------------ | //! | Max providers | 65,536 | Addressable by 16-bit provider index | //! | Max model configs | 65,536 | Addressable by 16-bit model configuration index | -//! | Max provider env vars | 16,384 | Global env-var pool offset (14-bit) | -//! | Max env vars per provider | 3 | Count field in provider range entry (2-bit) | +//! | Max provider env vars | 8,192 | Global env-var pool offset (13-bit) | +//! | Max env vars per provider | 7 | Count field in provider range entry (3-bit) | //! | Max input tokens | 536,870,911 | 29-bit packed field (≈536M) | //! | Max output tokens | 134,217,727 | 27-bit packed field (≈134M) | //! | Hash bits retained | 48 | Truncated from 64-bit hash output | @@ -231,6 +231,7 @@ use internal::{ PackedEnvRange, PackedModelEntry, PackedProviderModelTableEntry, PackedProviderTableEntry, }; use lite_strtab::{StringId, StringTable}; +use public::{ProviderEnvVars, INLINE_PROVIDER_ENV_VARS}; pub use public::builder_types::{ModelCatalogBuildError, ProviderModelSource, ProviderSource}; pub use public::*; @@ -472,21 +473,16 @@ impl ModelCatalog { let start = range.start(); let count = range.count() as usize; - let mut env_vars = ["", "", ""]; - #[allow(clippy::needless_range_loop)] + let mut env_vars: ProviderEnvVars<'_> = + ProviderEnvVars::with_capacity(count.max(INLINE_PROVIDER_ENV_VARS)); for x in 0..count { - env_vars[x] = self - .provider_env_keys - .get(StringId::new(ProviderIdx::new(start + x as u16)))?; + env_vars.push( + self.provider_env_keys + .get(StringId::new(ProviderIdx::new(start + x as u16)))?, + ); } - Some(Provider::new( - provider_idx, - api_url, - env_vars, - count as u8, - api_type, - )) + Some(Provider::new(provider_idx, api_url, env_vars, api_type)) } /// Looks up a model by its configuration index. diff --git a/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs b/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs index 014b17b6..b1749660 100644 --- a/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs +++ b/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs @@ -175,7 +175,7 @@ pub enum ModelCatalogBuildError { /// Maximum supported unique model configuration count. max: usize, }, - /// One provider has too many env vars for the packed count field (max 3). + /// One provider has too many env vars for the packed count field (max 7). #[error("provider env-var count {count} exceeds supported maximum {max}")] TooManyProviderEnvVarsForOneProvider { /// Number of env vars supplied for one provider. diff --git a/src/llm-coding-tools-core/src/models/catalog/public/entry.rs b/src/llm-coding-tools-core/src/models/catalog/public/entry.rs index fc4f4060..e747381d 100644 --- a/src/llm-coding-tools-core/src/models/catalog/public/entry.rs +++ b/src/llm-coding-tools-core/src/models/catalog/public/entry.rs @@ -14,6 +14,11 @@ use super::{Modality, ModelIdx, ProviderIdx}; use crate::models::catalog::internal::Fixed4; use crate::models::ProviderType; +use tinyvec::TinyVec; + +pub(crate) const INLINE_PROVIDER_ENV_VARS: usize = 2; + +pub(crate) type ProviderEnvVars<'a> = TinyVec<[&'a str; INLINE_PROVIDER_ENV_VARS]>; /// Provider lookup result. #[derive(Debug, Clone, PartialEq, Eq)] @@ -23,9 +28,7 @@ pub struct Provider<'a> { /// Provider base URL. pub api_url: &'a str, /// Candidate environment variables used to resolve API keys. - env_vars: [&'a str; 3], - /// Number of valid entries in `env_vars`. - env_vars_count: u8, + env_vars: ProviderEnvVars<'a>, /// Type of API used by the provider. pub api_type: ProviderType, } @@ -36,15 +39,13 @@ impl<'a> Provider<'a> { pub(crate) fn new( provider_idx: ProviderIdx, api_url: &'a str, - env_vars: [&'a str; 3], - env_vars_count: u8, + env_vars: ProviderEnvVars<'a>, api_type: ProviderType, ) -> Self { Self { provider_idx, api_url, env_vars, - env_vars_count, api_type, } } @@ -52,7 +53,7 @@ impl<'a> Provider<'a> { /// Returns the candidate environment variables used to resolve API keys. #[inline] pub fn env_vars(&self) -> &[&'a str] { - &self.env_vars[..self.env_vars_count as usize] + self.env_vars.as_slice() } } diff --git a/src/llm-coding-tools-core/src/models/catalog/public/mod.rs b/src/llm-coding-tools-core/src/models/catalog/public/mod.rs index c6b0c044..adf55b2e 100644 --- a/src/llm-coding-tools-core/src/models/catalog/public/mod.rs +++ b/src/llm-coding-tools-core/src/models/catalog/public/mod.rs @@ -7,6 +7,7 @@ pub use builder_types::{LookupTableKind, ModelInfo, ProviderInfo}; pub use entry::{Model, Provider}; +pub(crate) use entry::{ProviderEnvVars, INLINE_PROVIDER_ENV_VARS}; pub use modality::Modality; pub use model_idx::ModelIdx; pub use provider_idx::ProviderIdx; From cb327f9eb5292ed555a2cf730a620434cf59e081 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Mon, 9 Mar 2026 10:30:36 +0000 Subject: [PATCH 16/22] Fixed: Correct modality fallback documentation in catalog_sources.rs The documentation incorrectly stated that unmapped modality labels default to Modality::TEXT. The actual behavior returns Modality::empty(), which correctly indicates a data error that should be fixed at the source. Changes: - Updated doc comment to reflect actual Modality::empty() fallback behavior - Aligned documentation with existing test assertions Benefits: - Documentation now matches implementation - Clarifies that empty modalities indicate data issues, not defaults --- src/llm-coding-tools-models-dev/src/api/catalog_sources.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs index 0f012ef3..a0968293 100644 --- a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs +++ b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs @@ -8,8 +8,8 @@ //! - model modalities are mapped from `modalities.input[]`/`modalities.output[]` //! into directional [`Modality`] flags; //! - unknown npm package identifiers map to [`ProviderType::Unknown`]; -//! - unknown modality labels are ignored; if nothing maps, modalities default to -//! [`Modality::TEXT`]; +//! - unknown modality labels are ignored; if nothing maps, modalities remain +//! [`Modality::empty()`]; //! - model rows remain provider-scoped; shared configurations are deduplicated by //! core during catalog build. From 709489f191f3e0df4620e4ea2bad27c9fe85f475 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Mon, 9 Mar 2026 10:45:47 +0000 Subject: [PATCH 17/22] Fixed: Concurrent cache writers collision and Windows rename failure Use tempfile::NamedTempFile with persist() for atomic cache writes instead of fixed ".tmp" suffix that caused collisions and cross-platform issues. Changes: - Use NamedTempFile::new_in() for unique temp file names per write - Use persist() for cross-platform atomic replacement (handles Windows) - Move tempfile from dev-dependencies to production dependencies - Add JoinHandle error variant for tokio spawn_blocking errors - Remove unused write/rename fs helpers Benefits: - Eliminates race condition between concurrent cache writers - Fixes atomic replace on Windows (delete+rename fallback) - Guarantees each write uses its own temp file --- src/llm-coding-tools-models-dev/Cargo.toml | 4 +- .../src/cache/format.rs | 49 +++++++++++++------ src/llm-coding-tools-models-dev/src/error.rs | 5 ++ .../src/fs/blocking_impl.rs | 12 ----- .../src/fs/tokio_impl.rs | 12 ----- 5 files changed, 42 insertions(+), 40 deletions(-) diff --git a/src/llm-coding-tools-models-dev/Cargo.toml b/src/llm-coding-tools-models-dev/Cargo.toml index 26ef2550..85c65cd7 100644 --- a/src/llm-coding-tools-models-dev/Cargo.toml +++ b/src/llm-coding-tools-models-dev/Cargo.toml @@ -55,10 +55,12 @@ serde_json = "1.0.145" # Ergonomic error definitions thiserror = "2.0.18" +# Temp file with atomic rename support +tempfile = "3.26" + # Async runtime (when tokio feature enabled) tokio = { version = "1.49", features = ["fs", "io-util"], optional = true } [dev-dependencies] tokio = { version = "1.49", features = ["rt", "macros"] } -tempfile = "3.26" serial_test = "3" diff --git a/src/llm-coding-tools-models-dev/src/cache/format.rs b/src/llm-coding-tools-models-dev/src/cache/format.rs index 1e1db3f1..ca1ac88e 100644 --- a/src/llm-coding-tools-models-dev/src/cache/format.rs +++ b/src/llm-coding-tools-models-dev/src/cache/format.rs @@ -41,7 +41,7 @@ use crate::{ use endian_writer::{EndianReader, EndianWriter, HasSize, LittleEndianReader, LittleEndianWriter}; use endian_writer_derive::EndianWritable; use std::mem::size_of; -use std::path::{Path, PathBuf}; +use std::path::Path; use std::ptr::copy_nonoverlapping; /// Fixed v1 prelude, encoded little-endian. @@ -87,12 +87,7 @@ pub(crate) struct CacheFileData { file_bytes: Box<[u8]>, } -/// Returns a temporary path for atomic cache writes. -fn temp_cache_path(path: &Path) -> PathBuf { - let mut temp = path.as_os_str().to_os_string(); - temp.push(".tmp"); - PathBuf::from(temp) -} + impl CacheFileData { /// Returns the optional ETag as a borrowed byte slice. @@ -175,20 +170,24 @@ pub(crate) async fn read_cache_file(path: &Path) -> CatalogResult }) } -/// Writes a cache container to disk. +/// Writes a cache container to disk atomically. +/// +/// Uses `tempfile::NamedTempFile` to ensure unique temp files for concurrent +/// writers and cross-platform atomic replacement via `persist()`. /// /// # Errors /// /// Returns [`CatalogError::CacheFormat`] if a block length exceeds v1 `u32` -/// limits. +/// limits, or [`CatalogError::Io`] on I/O failure. #[maybe_async::maybe_async] pub(crate) async fn write_cache_file( path: &Path, input: &CacheWriteInput<'_>, ) -> CatalogResult<()> { - if let Some(parent) = path.parent() { - fs::create_dir_all(parent).await?; - } + let parent = path.parent().ok_or_else(|| { + CatalogError::CacheFormat("cache path has no parent directory") + })?; + fs::create_dir_all(parent).await?; let etag_bytes = input.etag.unwrap_or(&[]); let prelude = CachePreludeV1 { @@ -230,9 +229,29 @@ pub(crate) async fn write_cache_file( } let file_bytes = fs::assume_init_u8_slice(uninit); - let temp_path = temp_cache_path(path); - fs::write(&temp_path, &file_bytes).await?; - fs::rename(&temp_path, path).await?; + + #[cfg(feature = "blocking")] + { + use std::io::Write as _; + let mut temp = tempfile::NamedTempFile::new_in(parent)?; + temp.write_all(&file_bytes)?; + temp.persist(path).map_err(|e| e.error)?; + } + + #[cfg(feature = "tokio")] + { + let file_bytes: Box<[u8]> = file_bytes; + let path = path.to_path_buf(); + let parent = parent.to_path_buf(); + tokio::task::spawn_blocking(move || { + use std::io::Write as _; + let mut temp = tempfile::NamedTempFile::new_in(&parent)?; + temp.write_all(&file_bytes)?; + temp.persist(&path).map_err(|e| e.error) + }) + .await??; + } + Ok(()) } diff --git a/src/llm-coding-tools-models-dev/src/error.rs b/src/llm-coding-tools-models-dev/src/error.rs index ba0b2b21..3b3fdfb4 100644 --- a/src/llm-coding-tools-models-dev/src/error.rs +++ b/src/llm-coding-tools-models-dev/src/error.rs @@ -41,6 +41,11 @@ pub enum CatalogError { /// The catalog failed to build from source rows. #[error("catalog build error: {0}")] ModelCatalogBuild(#[from] ModelCatalogBuildError), + + /// A spawn_blocking task failed. + #[cfg(feature = "tokio")] + #[error("blocking task failed: {0}")] + JoinHandle(#[from] tokio::task::JoinError), } /// Convenience type alias for catalog operations. diff --git a/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs index d35ffbec..b087b7b1 100644 --- a/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs +++ b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs @@ -21,20 +21,8 @@ pub(crate) fn read(path: impl AsRef) -> std::io::Result> { Ok(super::assume_init_u8_slice(bytes)) } -/// Writes all bytes to a file, creating or truncating it. -#[inline] -pub(crate) fn write(path: impl AsRef, bytes: &[u8]) -> std::io::Result<()> { - std::fs::write(path, bytes) -} - /// Creates a directory and all parent directories. #[inline] pub(crate) fn create_dir_all(path: impl AsRef) -> std::io::Result<()> { std::fs::create_dir_all(path) } - -/// Renames a file, replacing the destination if it exists. -#[inline] -pub(crate) fn rename(from: impl AsRef, to: impl AsRef) -> std::io::Result<()> { - std::fs::rename(from, to) -} diff --git a/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs index 830d29e9..77f304dd 100644 --- a/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs +++ b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs @@ -22,20 +22,8 @@ pub(crate) async fn read(path: impl AsRef) -> std::io::Result> { Ok(super::assume_init_u8_slice(bytes)) } -/// Writes all bytes to a file, creating or truncating it. -#[inline] -pub(crate) async fn write(path: impl AsRef, bytes: &[u8]) -> std::io::Result<()> { - tokio::fs::write(path, bytes).await -} - /// Creates a directory and all parent directories. #[inline] pub(crate) async fn create_dir_all(path: impl AsRef) -> std::io::Result<()> { tokio::fs::create_dir_all(path).await } - -/// Renames a file, replacing the destination if it exists. -#[inline] -pub(crate) async fn rename(from: impl AsRef, to: impl AsRef) -> std::io::Result<()> { - tokio::fs::rename(from, to).await -} From 4814497c13fece7d2bef551440f643bf7fafcafd Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Wed, 11 Mar 2026 21:55:31 +0000 Subject: [PATCH 18/22] Fixed: Apply rustfmt formatting to cache module Correct formatting issues that caused CI failure. Changes: - Removed extra blank lines after struct definition - Reformatted method chain for parent directory extraction Benefits: - CI passes consistently - Code follows project style guidelines --- src/llm-coding-tools-models-dev/src/cache/format.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/llm-coding-tools-models-dev/src/cache/format.rs b/src/llm-coding-tools-models-dev/src/cache/format.rs index ca1ac88e..55e62dd6 100644 --- a/src/llm-coding-tools-models-dev/src/cache/format.rs +++ b/src/llm-coding-tools-models-dev/src/cache/format.rs @@ -87,8 +87,6 @@ pub(crate) struct CacheFileData { file_bytes: Box<[u8]>, } - - impl CacheFileData { /// Returns the optional ETag as a borrowed byte slice. #[inline] @@ -184,9 +182,9 @@ pub(crate) async fn write_cache_file( path: &Path, input: &CacheWriteInput<'_>, ) -> CatalogResult<()> { - let parent = path.parent().ok_or_else(|| { - CatalogError::CacheFormat("cache path has no parent directory") - })?; + let parent = path + .parent() + .ok_or_else(|| CatalogError::CacheFormat("cache path has no parent directory"))?; fs::create_dir_all(parent).await?; let etag_bytes = input.etag.unwrap_or(&[]); From 7da6d4d8c30b7029b9e1db598159e7d0cf05dfb3 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Wed, 11 Mar 2026 22:10:58 +0000 Subject: [PATCH 19/22] Fixed: Improve robustness of cache path handling and format parsing Use var_os for environment variable to correctly handle non-UTF8 OS paths and reject empty values explicitly. Add checked arithmetic for cache file size calculations to prevent overflow. Changes: - path.rs: Replace var with var_os for non-UTF8 path support - path.rs: Return Configuration error for empty env var values - format.rs: Use checked_add to prevent integer overflow in size calc Benefits: - Correctly handles paths with arbitrary bytes on Unix systems - Provides clear error for misconfigured empty cache path - Prevents potential panic from integer overflow on malformed cache --- src/llm-coding-tools-models-dev/src/cache/format.rs | 7 ++++++- src/llm-coding-tools-models-dev/src/cache/path.rs | 10 ++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/llm-coding-tools-models-dev/src/cache/format.rs b/src/llm-coding-tools-models-dev/src/cache/format.rs index 55e62dd6..8059ac1b 100644 --- a/src/llm-coding-tools-models-dev/src/cache/format.rs +++ b/src/llm-coding-tools-models-dev/src/cache/format.rs @@ -152,7 +152,12 @@ pub(crate) async fn read_cache_file(path: &Path) -> CatalogResult let prelude = decode_prelude(&file_bytes[..CACHE_HEADER_LEN]); let etag_len = prelude.etag_len as usize; let payload_len_compressed = prelude.payload_len_compressed as usize; - let expected_total = CACHE_HEADER_LEN + etag_len + payload_len_compressed; // unlikely to overflow. file is trusted. + let expected_total = CACHE_HEADER_LEN + .checked_add(etag_len) + .and_then(|v| v.checked_add(payload_len_compressed)) + .ok_or(CatalogError::CacheFormat( + "cache file size exceeds platform limits", + ))?; if file_bytes.len() != expected_total { return Err(CatalogError::CacheFormat( diff --git a/src/llm-coding-tools-models-dev/src/cache/path.rs b/src/llm-coding-tools-models-dev/src/cache/path.rs index 9aa5bf58..e43d256e 100644 --- a/src/llm-coding-tools-models-dev/src/cache/path.rs +++ b/src/llm-coding-tools-models-dev/src/cache/path.rs @@ -46,8 +46,14 @@ const CACHE_FILENAME: &str = "models.dev.catalog.v1.cache"; /// ``` pub fn shared_cache_path() -> CatalogResult { // 1. Check env var first - if let Ok(path) = std::env::var(CACHE_PATH_ENV_VAR) { - return Ok(PathBuf::from(path)); + if let Some(os_str) = std::env::var_os(CACHE_PATH_ENV_VAR) { + if os_str.is_empty() { + return Err(CatalogError::Configuration(format!( + "{} is set but empty", + CACHE_PATH_ENV_VAR + ))); + } + return Ok(PathBuf::from(&os_str)); } // 2. Fall back to dirs::cache_dir() From b07462ed20ffb14495d16ac54636f85fc0ddcf90 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Wed, 11 Mar 2026 22:15:56 +0000 Subject: [PATCH 20/22] Security: Add request timeouts to catalog sync HTTP clients Prevent indefinite hangs on network issues by setting connect and overall request timeouts on both async (tokio) and blocking reqwest clients. Changes: - Added 30-second timeout constant for HTTP operations - Configured connect_timeout and timeout on async client builder - Configured connect_timeout and timeout on blocking client builder Benefits: - Prevents indefinite hangs on slow/unresponsive servers - Provides predictable failure behavior for network issues - Improves reliability of catalog synchronization --- .../src/catalog/sync.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/llm-coding-tools-models-dev/src/catalog/sync.rs b/src/llm-coding-tools-models-dev/src/catalog/sync.rs index 26d9005e..629c9bf7 100644 --- a/src/llm-coding-tools-models-dev/src/catalog/sync.rs +++ b/src/llm-coding-tools-models-dev/src/catalog/sync.rs @@ -21,6 +21,9 @@ use std::path::Path; /// Default production endpoint for the models.dev catalog snapshot. const MODELS_DEV_API_URL: &str = "https://models.dev/api.json"; +/// Timeout for HTTP connections and requests in seconds. +const REQUEST_TIMEOUT_SECS: u64 = 30; + #[cfg(test)] static TEST_MODELS_DEV_API_URL: std::sync::Mutex> = std::sync::Mutex::new(None); @@ -108,9 +111,17 @@ pub(crate) async fn load_catalog_from_url( } #[cfg(feature = "tokio")] - let client = reqwest::Client::new(); + let client = reqwest::Client::builder() + .connect_timeout(std::time::Duration::from_secs(REQUEST_TIMEOUT_SECS)) + .timeout(std::time::Duration::from_secs(REQUEST_TIMEOUT_SECS)) + .build() + .expect("client builder should not fail with valid config"); #[cfg(feature = "blocking")] - let client = reqwest::blocking::Client::new(); + let client = reqwest::blocking::Client::builder() + .connect_timeout(std::time::Duration::from_secs(REQUEST_TIMEOUT_SECS)) + .timeout(std::time::Duration::from_secs(REQUEST_TIMEOUT_SECS)) + .build() + .expect("client builder should not fail with valid config"); let mut request = client.get(url); if let Some(etag) = cache_file.as_ref().and_then(|file| file.etag_bytes()) { From 28434f52c96a2701b357b7e9bb71fea57a735938 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Wed, 11 Mar 2026 22:26:22 +0000 Subject: [PATCH 21/22] Fixed: Reuse cached catalog on transient sync failures Catalog sync now keeps the last known-good payload when reading the response body fails or the server returns retryable statuses. Changes: - Fallback to cached catalog when reading a 200 response body fails - Treat 429 and 5xx responses as retryable when cache data is available - Add sync tests and mock server responses for transient status and truncated body cases Benefits: - Avoids dropping usable cached catalog data during temporary upstream issues - Keeps catalog loading stable across transient HTTP and transport failures --- .../src/catalog/sync.rs | 110 ++++++++++++++++-- .../src/catalog/test_utils.rs | 35 +++++- 2 files changed, 134 insertions(+), 11 deletions(-) diff --git a/src/llm-coding-tools-models-dev/src/catalog/sync.rs b/src/llm-coding-tools-models-dev/src/catalog/sync.rs index 629c9bf7..d63023d6 100644 --- a/src/llm-coding-tools-models-dev/src/catalog/sync.rs +++ b/src/llm-coding-tools-models-dev/src/catalog/sync.rs @@ -43,7 +43,7 @@ fn models_dev_api_url() -> Cow<'static, str> { Cow::Borrowed(MODELS_DEV_API_URL) } -/// Resolves the result to return after a request failure. +/// Resolves the result to return after a transient request failure. /// /// Cached data takes precedence over surfacing the request error so callers can /// continue with the last known-good catalog when possible. @@ -63,6 +63,11 @@ fn load_after_request_failure( Err(CatalogError::Reqwest(request_error)) } +#[inline] +fn is_transient_status(status: StatusCode) -> bool { + status == StatusCode::TOO_MANY_REQUESTS || status.is_server_error() +} + #[maybe_async::maybe_async] /// Loads the catalog at `path` using the default models.dev endpoint. /// @@ -83,7 +88,7 @@ pub(crate) async fn load_catalog_at_path(path: &Path) -> CatalogResult body, + Err(error) => { + return load_after_request_failure(error, cache_file.as_ref(), cache_error); + } + }; let payload = cache_payload_from_api_json_bytes(body.as_ref())?; let payload_encoded = encode_cache_payload(&payload); let catalog = catalog_from_cache_payload(payload)?; @@ -173,6 +183,17 @@ pub(crate) async fn load_catalog_from_url( )) } } + status if is_transient_status(status) => { + if let Some(cache_file) = cache_file.as_ref() { + load_catalog_from_cache_file_data(cache_file, CatalogLoadSource::FallbackCache) + } else if let Some(error) = cache_error { + Err(error) + } else { + Err(CatalogError::Configuration(format!( + "unexpected catalog sync status: {status}", + ))) + } + } status => Err(CatalogError::Configuration(format!( "unexpected catalog sync status: {status}", ))), @@ -288,11 +309,8 @@ mod tests { format!("http://127.0.0.1:{port}/api.json") } - #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] - async fn sync_returns_fallback_cache_on_request_failure_with_valid_cache() { - let temp = TempDir::new().expect("tempdir"); - let cache_path = temp.path().join("test.cache"); - + #[maybe_async::maybe_async] + async fn seed_cache(cache_path: &Path) { let payload = CatalogCachePayload { providers: vec![CachedProviderRow { provider_key: "openai".to_string(), @@ -313,7 +331,7 @@ mod tests { let encoded = encode_cache_payload(&payload); let compressed = zstd::bulk::compress(&encoded, 1).expect("compress"); crate::cache::format::write_cache_file( - &cache_path, + cache_path, &CacheWriteInput { etag: Some(b"\"cached-etag-456\""), payload_compressed: &compressed, @@ -322,6 +340,21 @@ mod tests { ) .await .expect("seed cache"); + } + + #[test] + fn transient_status_detection_matches_retryable_responses() { + assert!(is_transient_status(StatusCode::TOO_MANY_REQUESTS)); + assert!(is_transient_status(StatusCode::SERVICE_UNAVAILABLE)); + assert!(!is_transient_status(StatusCode::NOT_FOUND)); + } + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn sync_returns_fallback_cache_on_request_failure_with_valid_cache() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("test.cache"); + + seed_cache(&cache_path).await; let result = load_catalog_from_url(&cache_path, &refused_local_url()) .await @@ -331,6 +364,65 @@ mod tests { assert!(result.catalog.lookup_provider("openai").is_some()); } + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn sync_returns_fallback_cache_on_transient_status_with_valid_cache() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("test.cache"); + + seed_cache(&cache_path).await; + + let (_handle, url) = start_mock_server(MockResponse::Status { + code: 503, + reason: "Service Unavailable", + }); + + let result = load_catalog_from_url(&cache_path, &url) + .await + .expect("fallback should succeed"); + + assert_eq!(result.source, CatalogLoadSource::FallbackCache); + assert!(result.catalog.lookup_provider("openai").is_some()); + } + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn sync_returns_cache_error_on_transient_status_with_corrupt_cache() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("corrupt.cache"); + + std::fs::write(&cache_path, [0_u8; 11]).expect("write corrupt cache"); + + let (_handle, url) = start_mock_server(MockResponse::Status { + code: 429, + reason: "Too Many Requests", + }); + + match load_catalog_from_url(&cache_path, &url).await { + Err(error) => assert!(matches!(error, CatalogError::CacheFormat(_))), + Ok(_) => panic!("transient status with corrupt cache should error"), + } + } + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn sync_returns_fallback_cache_on_body_read_failure_with_valid_cache() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("test.cache"); + + seed_cache(&cache_path).await; + + let (_handle, url) = start_mock_server(MockResponse::PartialOk { + etag: "\"fresh-etag\"", + body: "{".to_string(), + content_length: 32, + }); + + let result = load_catalog_from_url(&cache_path, &url) + .await + .expect("fallback should succeed"); + + assert_eq!(result.source, CatalogLoadSource::FallbackCache); + assert!(result.catalog.lookup_provider("openai").is_some()); + } + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] async fn sync_returns_request_error_when_request_fails_without_cache() { let temp = TempDir::new().expect("tempdir"); diff --git a/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs b/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs index c6bec11b..a7fec883 100644 --- a/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs +++ b/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs @@ -1,8 +1,22 @@ use std::io::{BufRead, Write}; pub enum MockResponse { - Ok { etag: &'static str, body: String }, - NotModified { etag: &'static str }, + Ok { + etag: &'static str, + body: String, + }, + PartialOk { + etag: &'static str, + body: String, + content_length: usize, + }, + NotModified { + etag: &'static str, + }, + Status { + code: u16, + reason: &'static str, + }, } pub fn sample_api_json() -> &'static [u8] { @@ -65,11 +79,28 @@ pub fn start_mock_server(response: MockResponse) -> (std::thread::JoinHandle<()> stream.write_all(response.as_bytes()).expect("write"); stream.flush().expect("flush"); } + MockResponse::PartialOk { + etag, + body, + content_length, + } => { + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nETag: {}\r\nContent-Length: {}\r\n\r\n{}", + etag, content_length, body + ); + stream.write_all(response.as_bytes()).expect("write"); + stream.flush().expect("flush"); + } MockResponse::NotModified { etag } => { let response = format!("HTTP/1.1 304 Not Modified\r\nETag: {}\r\n\r\n", etag); stream.write_all(response.as_bytes()).expect("write"); stream.flush().expect("flush"); } + MockResponse::Status { code, reason } => { + let response = format!("HTTP/1.1 {code} {reason}\r\nContent-Length: 0\r\n\r\n"); + stream.write_all(response.as_bytes()).expect("write"); + stream.flush().expect("flush"); + } } }); From df24a978e8a9da77886bf20180464c73978672b6 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Wed, 11 Mar 2026 22:43:24 +0000 Subject: [PATCH 22/22] Changed: Document safety of read_exact with snapshot length Add # Safety section to read functions explaining why the pattern of snapshotting file length then calling read_exact is safe in this codebase. Changes: - Added safety documentation to blocking_impl.rs read function - Added safety documentation to tokio_impl.rs read function Benefits: - Clarifies for future reviewers why this pattern cannot cause data loss - Documents the atomic write pattern used throughout the codebase --- src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs | 8 ++++++++ src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs index b087b7b1..01252a9b 100644 --- a/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs +++ b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs @@ -4,6 +4,14 @@ use std::io::{ErrorKind, Read as _}; use std::path::Path; /// Reads a file into memory in one pre-sized allocation. +/// +/// # Safety +/// +/// We snapshot file length then call `read_exact`, which would miss data appended after +/// the metadata call if the file grew mid-read. However, within this codebase all +/// writes go to a temp file first, then rename to target — so files are never +/// appended to in place. +/// Therefore this race cannot occur. #[inline] pub(crate) fn read(path: impl AsRef) -> std::io::Result> { let mut file = std::fs::File::open(path)?; diff --git a/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs index 77f304dd..29d04d2c 100644 --- a/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs +++ b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs @@ -5,6 +5,14 @@ use std::path::Path; use tokio::io::AsyncReadExt as _; /// Reads a file into memory in one pre-sized allocation. +/// +/// # Safety +/// +/// We snapshot file length then call `read_exact`, which would miss data appended after +/// the metadata call if the file grew mid-read. However, within this codebase all +/// writes go to a temp file first, then rename to target — so files are never +/// appended to in place. +/// Therefore this race cannot occur. #[inline] pub(crate) async fn read(path: impl AsRef) -> std::io::Result> { let mut file = tokio::fs::File::open(path).await?;