diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 8ebd0db2..37f6b714 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -52,13 +52,13 @@ jobs: cargo +stable binstall --no-confirm cargo-semver-checks --force rustup +stable target add ${{ matrix.target }} - for CRATE in "llm-coding-tools-core" "llm-coding-tools-agents" "llm-coding-tools-serdesai"; do + for CRATE in "llm-coding-tools-core" "llm-coding-tools-agents" "llm-coding-tools-serdesai" "llm-coding-tools-models-dev"; do SEARCH_RESULT=$(cargo search "^${CRATE}$" --limit 1) if echo "$SEARCH_RESULT" | grep -q "^${CRATE} "; then echo "Running semver checks for ${CRATE}..." # Note: llm-coding-tools-core has mutually exclusive async/blocking features, # so we must use --only-explicit-features to avoid enabling all features. - # The serdesai crate is async-only and doesn't have the tokio feature. + # llm-coding-tools-serdesai is async-only. models-dev supports both tokio and blocking. if [ "${CRATE}" = "llm-coding-tools-core" ]; then cargo +stable semver-checks -p "${CRATE}" --target ${{ matrix.target }} --only-explicit-features --features tokio else @@ -79,6 +79,7 @@ jobs: cargo doc -p llm-coding-tools-core --features tokio --document-private-items --no-deps --target ${{ matrix.target }} cargo doc -p llm-coding-tools-agents --document-private-items --no-deps --target ${{ matrix.target }} cargo doc -p llm-coding-tools-serdesai --document-private-items --no-deps --target ${{ matrix.target }} + cargo doc -p llm-coding-tools-models-dev --document-private-items --no-deps --target ${{ matrix.target }} - name: Run linter if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/') @@ -88,6 +89,7 @@ jobs: cargo clippy -p llm-coding-tools-core --features tokio --target ${{ matrix.target }} -- -D warnings cargo clippy -p llm-coding-tools-agents --target ${{ matrix.target }} -- -D warnings cargo clippy -p llm-coding-tools-serdesai --target ${{ matrix.target }} -- -D warnings + cargo clippy -p llm-coding-tools-models-dev --target ${{ matrix.target }} -- -D warnings - name: Run formatter check uses: actions-rust-lang/rustfmt@v1 @@ -125,6 +127,7 @@ jobs: use-cross: ${{ matrix.use-cross }} packages: | llm-coding-tools-core + llm-coding-tools-models-dev no-default-features: true features: "blocking" @@ -135,12 +138,14 @@ jobs: RUSTDOCFLAGS: "-D warnings" run: | cargo doc -p llm-coding-tools-core --no-default-features --features blocking --document-private-items --no-deps --target ${{ matrix.target }} + cargo doc -p llm-coding-tools-models-dev --no-default-features --features blocking --document-private-items --no-deps --target ${{ matrix.target }} - name: Run linter (Blocking) if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/') working-directory: src run: | cargo clippy -p llm-coding-tools-core --no-default-features --features blocking --target ${{ matrix.target }} -- -D warnings + cargo clippy -p llm-coding-tools-models-dev --no-default-features --features blocking --target ${{ matrix.target }} -- -D warnings - name: Run formatter check uses: actions-rust-lang/rustfmt@v1 @@ -165,6 +170,7 @@ jobs: src/llm-coding-tools-core src/llm-coding-tools-agents src/llm-coding-tools-serdesai + src/llm-coding-tools-models-dev compression-tool: 7z artifact-groups-file: .github/artifact-groups.yml changelog-enabled: "true" diff --git a/README.MD b/README.MD index 2d728aac..303acd67 100644 --- a/README.MD +++ b/README.MD @@ -1,18 +1,30 @@ # llm-coding-tools [![Crates.io - llm-coding-tools-core](https://img.shields.io/crates/v/llm-coding-tools-core.svg)](https://crates.io/crates/llm-coding-tools-core) +[![Crates.io - llm-coding-tools-agents](https://img.shields.io/crates/v/llm-coding-tools-agents.svg)](https://crates.io/crates/llm-coding-tools-agents) [![Crates.io - llm-coding-tools-serdesai](https://img.shields.io/crates/v/llm-coding-tools-serdesai.svg)](https://crates.io/crates/llm-coding-tools-serdesai) +[![Crates.io - llm-coding-tools-models-dev](https://img.shields.io/crates/v/llm-coding-tools-models-dev.svg)](https://crates.io/crates/llm-coding-tools-models-dev) [![Docs.rs](https://docs.rs/llm-coding-tools-serdesai/badge.svg)](https://docs.rs/llm-coding-tools-serdesai) [![CI](https://github.com/Sewer56/llm-coding-tools/actions/workflows/rust.yml/badge.svg)](https://github.com/Sewer56/llm-coding-tools/actions) -Lightweight, high-performance coding tool implementations for LLM-powered development agents. Plug and play into your favourite frameworks. +Lightweight, heavily optimized coding tool implementations for LLM-powered +development agents. + +Suitable for server use (<3 MiB), or as building blocks for your own TUI coding agent. ## About This Workspace -This workspace contains multiple Rust crates for integrating coding tools with LLM agents: +This workspace contains multiple Rust crates for integrating coding tools with +LLM agents: -- **[llm-coding-tools-core](./src/llm-coding-tools-core/)**: Framework-agnostic core operations and utilities -- **[llm-coding-tools-serdesai](./src/llm-coding-tools-serdesai/)**: serdesAI framework-specific Tool implementations +- **[llm-coding-tools-core](./src/llm-coding-tools-core/)**: + Framework-agnostic core operations and utilities +- **[llm-coding-tools-agents](./src/llm-coding-tools-agents/)**: + OpenCode agent markdown loader and typed catalogue +- **[llm-coding-tools-serdesai](./src/llm-coding-tools-serdesai/)**: + serdesAI framework-specific Tool implementations +- **[llm-coding-tools-models-dev](./src/llm-coding-tools-models-dev/)**: + models.dev catalog sync with cached fallback and ETag refresh ## Features @@ -22,6 +34,9 @@ This workspace contains multiple Rust crates for integrating coding tools with L - **Web**: URL fetching with HTML-to-markdown conversion - **Path Security**: Choose between unrestricted or sandboxed file access - **Context Strings**: Embedded LLM guidance for tool usage +- **Agent Loading**: Parse OpenCode-compatible agent markdown into typed configs +- **Model Catalog Sync**: Download and cache the models.dev catalog for + provider/model lookups ## Feature Flags (llm-coding-tools-core) @@ -30,41 +45,24 @@ This workspace contains multiple Rust crates for integrating coding tools with L ## Quick Start -Add to your `Cargo.toml`: +Pick the crate that matches your use case: ```toml [dependencies] -llm-coding-tools-serdesai = "0.1" +llm-coding-tools-core = "0.2" # Framework-agnostic tool implementations +llm-coding-tools-agents = "0.1" # OpenCode agent markdown loader +llm-coding-tools-models-dev = "0.1" # models.dev catalog sync and cache +llm-coding-tools-serdesai = "0.2" # serdesAI integration ``` -```rust,no_run -use llm_coding_tools_serdesai::{AgentBuilder, BashTool, TodoTools}; -use llm_coding_tools_serdesai::absolute::{ReadTool, WriteTool, EditTool, GlobTool, GrepTool}; - -let mut builder = AgentBuilder::new(); -let todos = TodoTools::new(); - -builder - .track(ReadTool::::new()) - .track(WriteTool::new()) - .track(EditTool::::new()) - .track(GlobTool::new()) - .track(GrepTool::::new()) - .track(BashTool::new()) - .track(&todos.read) - .track(&todos.write); - -let mut agent = builder.build(); - -// Use the agent -// let response = agent.invoke("List all files").await?; -``` +For a runnable agent setup, start with `llm-coding-tools-serdesai` and the +examples below. ## Examples ```bash # serdesAI framework - Basic agent setup -cargo run --example serdesai-agents -p llm-coding-tools-serdesai +cargo run --example serdesai-basic -p llm-coding-tools-serdesai # serdesAI framework - Sandboxed file access cargo run --example serdesai-sandboxed -p llm-coding-tools-serdesai @@ -73,18 +71,26 @@ cargo run --example serdesai-sandboxed -p llm-coding-tools-serdesai ## Documentation - [llm-coding-tools-core README](./src/llm-coding-tools-core/README.md) +- [llm-coding-tools-agents README](./src/llm-coding-tools-agents/README.md) - [llm-coding-tools-serdesai README](./src/llm-coding-tools-serdesai/README.md) +- [llm-coding-tools-models-dev README](./src/llm-coding-tools-models-dev/README.md) - [Developer Guidelines](./src/AGENTS.md) ## Contributing -Contributions are welcome! Please ensure all tests pass and the code follows our guidelines. +Contributions are welcome! Please ensure all tests pass and the code follows +our guidelines. ## Deprecation Notice -**Rig framework support (`llm-coding-tools-rig`) has been removed** (commit 17158db) due to library bugs that prevented examples from running reliably. +**Rig framework support (`llm-coding-tools-rig`) has been removed** +(commit 17158db) due to library bugs that prevented examples from running +reliably. -You're welcome to submit a PR re-adding rig support if you're willing to maintain it. Since I don't use rig personally, I'm not able to actively maintain that integration. Alternatively, you can create your own crate building on `llm-coding-tools-core` directly. +You're welcome to submit a PR re-adding rig support if you're willing to +maintain it. Since I don't use rig personally, I'm not able to actively +maintain that integration. Alternatively, you can create your own crate +building on `llm-coding-tools-core` directly. ## License diff --git a/src/.cargo/verify.ps1 b/src/.cargo/verify.ps1 index 745bf015..481ad21b 100644 --- a/src/.cargo/verify.ps1 +++ b/src/.cargo/verify.ps1 @@ -2,8 +2,8 @@ # All steps must pass without warnings # Keep in sync with verify.sh # -# Note: llm-coding-tools-serdesai is async-only (implements async Tool traits). -# The blocking feature only applies to llm-coding-tools-core. +# Note: llm-coding-tools-serdesai is async-only. +# Blocking mode is validated for core and models-dev. $ErrorActionPreference = "Stop" @@ -35,25 +35,29 @@ try { Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-core", "--quiet") Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-agents", "--quiet") Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-serdesai", "--quiet") +Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-models-dev", "--quiet") Write-Host "Testing..." Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-core", "--quiet") Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-agents", "--quiet") Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-serdesai", "--quiet") +Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-models-dev", "--quiet") Write-Host "Clippy..." Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-core", "--quiet", "--", "-D", "warnings") Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-agents", "--quiet", "--", "-D", "warnings") Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-serdesai", "--quiet", "--", "-D", "warnings") +Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-models-dev", "--quiet", "--", "-D", "warnings") Write-Host "Testing blocking feature..." Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-core", "--no-default-features", "--features", "blocking", "--quiet") +Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-models-dev", "--no-default-features", "--features", "blocking", "--quiet") Write-Host "Docs..." $originalRustdocFlags = $env:RUSTDOCFLAGS $env:RUSTDOCFLAGS = "-D warnings" try { - Invoke-LoggedCommand "cargo" @("doc", "--workspace", "--no-deps", "--quiet") + Invoke-LoggedCommand "cargo" @("doc", "--workspace", "--document-private-items", "--no-deps", "--quiet") } finally { $env:RUSTDOCFLAGS = $originalRustdocFlags } @@ -65,6 +69,7 @@ Write-Host "Publish dry-run..." Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-core", "--quiet") Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-agents", "--quiet") Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-serdesai", "--quiet") +Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-models-dev", "--quiet") Write-Host "All checks passed!" } diff --git a/src/.cargo/verify.sh b/src/.cargo/verify.sh index b51897cf..36ff4728 100755 --- a/src/.cargo/verify.sh +++ b/src/.cargo/verify.sh @@ -3,8 +3,8 @@ # All steps must pass without warnings # Keep in sync with verify.ps1 # -# Note: llm-coding-tools-serdesai is async-only (implements async Tool traits). -# The blocking feature only applies to llm-coding-tools-core. +# Note: llm-coding-tools-serdesai is async-only. +# Blocking mode is validated for core and models-dev. set -e @@ -24,22 +24,26 @@ echo "Building..." run_cmd cargo build -p llm-coding-tools-core --quiet run_cmd cargo build -p llm-coding-tools-agents --quiet run_cmd cargo build -p llm-coding-tools-serdesai --quiet +run_cmd cargo build -p llm-coding-tools-models-dev --quiet echo "Testing..." run_cmd cargo test -p llm-coding-tools-core --quiet run_cmd cargo test -p llm-coding-tools-agents --quiet run_cmd cargo test -p llm-coding-tools-serdesai --quiet +run_cmd cargo test -p llm-coding-tools-models-dev --quiet echo "Clippy..." run_cmd cargo clippy -p llm-coding-tools-core --quiet -- -D warnings run_cmd cargo clippy -p llm-coding-tools-agents --quiet -- -D warnings run_cmd cargo clippy -p llm-coding-tools-serdesai --quiet -- -D warnings +run_cmd cargo clippy -p llm-coding-tools-models-dev --quiet -- -D warnings echo "Testing blocking feature..." run_cmd cargo test -p llm-coding-tools-core --no-default-features --features blocking --quiet +run_cmd cargo test -p llm-coding-tools-models-dev --no-default-features --features blocking --quiet echo "Docs..." -run_cmd env RUSTDOCFLAGS="-D warnings" cargo doc --workspace --no-deps --quiet +run_cmd env RUSTDOCFLAGS="-D warnings" cargo doc --workspace --document-private-items --no-deps --quiet echo "Formatting..." run_cmd cargo fmt --all --check --quiet @@ -48,5 +52,6 @@ echo "Publish dry-run..." run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-core --quiet run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-agents --quiet run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-serdesai --quiet +run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-models-dev --quiet echo "All checks passed!" diff --git a/src/Cargo.lock b/src/Cargo.lock index a9d43b94..2b708bc7 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -74,6 +74,12 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + [[package]] name = "assert-json-diff" version = "2.0.2" @@ -144,6 +150,30 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bitcode" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6ed1b54d8dc333e7be604d00fa9262f4635485ffea923647b6521a5fff045d" +dependencies = [ + "arrayvec", + "bitcode_derive", + "bytemuck", + "glam", + "serde", +] + +[[package]] +name = "bitcode_derive" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "238b90427dfad9da4a9abd60f3ec1cdee6b80454bde49ed37f1781dd8e9dc7f9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "bitfields" version = "1.0.2" @@ -197,6 +227,12 @@ version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" + [[package]] name = "bytes" version = "1.11.0" @@ -324,6 +360,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "concat-idents" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f76990911f2267d837d9d0ad060aa63aaad170af40904b29461734c339030d4d" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "const-random" version = "0.1.18" @@ -548,6 +594,27 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.61.2", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -595,6 +662,29 @@ dependencies = [ "encoding_rs", ] +[[package]] +name = "endian-writer" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5fba6714ed232b3a46d07255c9cb2d20e9a8aee06a20d5d2e3eb4e2b48d28ae" +dependencies = [ + "concat-idents", + "paste", +] + +[[package]] +name = "endian-writer-derive" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "160e7b32d1a63d6f02993f5ce2da2b7125480ae40c45d9a0b74d158f203f7e53" +dependencies = [ + "endian-writer", + "memoffset", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -794,6 +884,12 @@ dependencies = [ "wasip3", ] +[[package]] +name = "glam" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34627c5158214743a374170fed714833fdf4e4b0cbcc1ea98417866a4c5d4441" + [[package]] name = "globset" version = "0.4.18" @@ -1315,15 +1411,24 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "libc" -version = "0.2.180" +version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" + +[[package]] +name = "libredox" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "062b52cd41eb8d929e81b592a47df833c33c15684933a9329440137a6d9f134c" +dependencies = [ + "libc", +] [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "lite-strtab" @@ -1364,6 +1469,7 @@ name = "llm-coding-tools-core" version = "0.2.0" dependencies = [ "ahash", + "bitcode", "bitfields", "bitflags", "criterion", @@ -1385,11 +1491,32 @@ dependencies = [ "serde_json", "tempfile", "thiserror 2.0.18", + "tinyvec", "tinyvec_string", "tokio", "wiremock", ] +[[package]] +name = "llm-coding-tools-models-dev" +version = "0.1.0" +dependencies = [ + "bitcode", + "dirs", + "endian-writer", + "endian-writer-derive", + "llm-coding-tools-core", + "maybe-async", + "reqwest 0.13.1", + "serde", + "serde_json", + "serial_test", + "tempfile", + "thiserror 2.0.18", + "tokio", + "zstd", +] + [[package]] name = "llm-coding-tools-serdesai" version = "0.2.0" @@ -1475,6 +1602,15 @@ dependencies = [ "libc", ] +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.17" @@ -1500,9 +1636,9 @@ checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" [[package]] name = "nix" -version = "0.31.1" +version = "0.31.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225e7cfe711e0ba79a68baeddb2982723e4235247aefce1482f2f16c27865b66" +checksum = "5d6d0705320c1e6ba1d912b5e37cf18071b6c2e9b7fa8215a1e8a7651966f5d3" dependencies = [ "bitflags", "cfg-if", @@ -1547,6 +1683,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391" +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "page_size" version = "0.6.0" @@ -1580,6 +1722,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "percent-encoding" version = "2.3.2" @@ -1637,6 +1785,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + [[package]] name = "plotters" version = "0.3.7" @@ -1886,6 +2040,17 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_users" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" +dependencies = [ + "getrandom 0.2.17", + "libredox", + "thiserror 2.0.18", +] + [[package]] name = "ref-cast" version = "1.0.25" @@ -2036,9 +2201,9 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustix" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ "bitflags", "errno", @@ -2144,6 +2309,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scc" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc" +dependencies = [ + "sdd", +] + [[package]] name = "schannel" version = "0.1.28" @@ -2184,6 +2358,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sdd" +version = "3.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca" + [[package]] name = "security-framework" version = "3.5.1" @@ -2512,6 +2692,32 @@ dependencies = [ "tracing", ] +[[package]] +name = "serial_test" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "911bd979bf1070a3f3aa7b691a3b3e9968f339ceeec89e08c280a8a22207a32f" +dependencies = [ + "futures-executor", + "futures-util", + "log", + "once_cell", + "parking_lot", + "scc", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a7d91949b85b0d2fb687445e448b40d322b6b3e4af6b44a29b21d9a5f33e6d9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "sha2" version = "0.10.9" @@ -2642,12 +2848,12 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.25.0" +version = "3.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" +checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" dependencies = [ "fastrand", - "getrandom 0.3.4", + "getrandom 0.4.2", "once_cell", "rustix", "windows-sys 0.61.2", @@ -3773,3 +3979,31 @@ name = "zmij" version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfcd145825aace48cff44a8844de64bf75feec3080e0aa5cdbde72961ae51a65" + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/src/Cargo.toml b/src/Cargo.toml index 0dbd669e..7429dbb9 100644 --- a/src/Cargo.toml +++ b/src/Cargo.toml @@ -1,7 +1,7 @@ [workspace] resolver = "2" -members = ["llm-coding-tools-core", "llm-coding-tools-serdesai", "llm-coding-tools-agents"] +members = ["llm-coding-tools-core", "llm-coding-tools-serdesai", "llm-coding-tools-agents", "llm-coding-tools-models-dev"] # Profile Build [profile.profile] diff --git a/src/llm-coding-tools-agents/README.md b/src/llm-coding-tools-agents/README.md index b17ce11d..e6d6bb85 100644 --- a/src/llm-coding-tools-agents/README.md +++ b/src/llm-coding-tools-agents/README.md @@ -4,13 +4,16 @@ Load OpenCode agent markdown files into a typed Rust catalogue. This crate is a loader for the [OpenCode agent schema](https://opencode.ai/docs/agents/). -It is a drop-in replacement for OpenCode agent files: agents you create for OpenCode should load here unchanged. +It is a drop-in replacement for OpenCode agent files: agents you create for +OpenCode should load here unchanged. ## What it provides -- [`AgentLoader`] for loading agent configs from directories, files, or in-memory markdown. +- [`AgentLoader`] for loading agent configs from directories, files, or + in-memory markdown. - [`AgentCatalog`] for storing and looking up loaded [`AgentConfig`] entries. -- [`RulesetExt`] for converting frontmatter `permission` data into runtime [`Ruleset`]s. +- [`RulesetExt`] for converting frontmatter `permission` data into runtime + [`Ruleset`]s. ## Quick start @@ -43,19 +46,34 @@ permission: Prompt body here... ``` -For field behaviour, see OpenCode docs for [`mode`](https://opencode.ai/docs/agents#mode), [`model`](https://opencode.ai/docs/agents#model), and [`permissions`](https://opencode.ai/docs/agents#permissions). +For field behaviour, see OpenCode docs for +[`mode`](https://opencode.ai/docs/agents#mode), +[`model`](https://opencode.ai/docs/agents#model), and +[`permissions`](https://opencode.ai/docs/agents#permissions). ## Compatibility notes -This library does not provide interactive UX extensions (for example, TUI approval flows). -To avoid false expectations, settings that require interaction are rejected, while settings with no runtime effect are accepted and ignored: +This library does not provide interactive UX extensions (for example, TUI +approval flows). -- [`permission.task`](https://opencode.ai/docs/agents#task-permissions): `ask` is rejected with a schema validation error (`allow`/`deny` only), because `ask` is an interactive approval mode in OpenCode ([docs](https://opencode.ai/docs/permissions#what-ask-does)). -- [`hidden`](https://opencode.ai/docs/agents#hidden) is accepted for compatibility, but ignored at runtime. +To avoid false expectations, settings that require interaction are rejected, +while settings with no runtime effect are accepted and ignored: + +- [`permission.task`](https://opencode.ai/docs/agents#task-permissions): + `ask` is rejected with a schema validation error (`allow`/`deny` only), + because `ask` is an interactive approval mode in OpenCode + ([docs](https://opencode.ai/docs/permissions#what-ask-does)). +- [`hidden`](https://opencode.ai/docs/agents#hidden) is accepted for + compatibility, but ignored at runtime. ## Integration This crate only loads and validates agent configs. -Pass [`AgentCatalog`] to your runtime adapter (for example, `llm-coding-tools-serdesai`) to build registries and Task tooling. +Pass [`AgentCatalog`] to your runtime adapter (for example, +`llm-coding-tools-serdesai`) to build registries and Task tooling. + +If you want to validate `model` strings against a catalog, call +[`AgentConfig::model_parts`] and pass the returned `(provider, model)` into +your lookup layer. [`Ruleset`]: llm_coding_tools_core::permissions::Ruleset diff --git a/src/llm-coding-tools-agents/src/types/config.rs b/src/llm-coding-tools-agents/src/types/config.rs index 78010529..1d4a74ab 100644 --- a/src/llm-coding-tools-agents/src/types/config.rs +++ b/src/llm-coding-tools-agents/src/types/config.rs @@ -108,6 +108,8 @@ pub struct AgentConfig { #[serde(default)] pub description: String, /// Optional model override (format: "provider/model-id"). + /// + /// Use [`AgentConfig::model_parts`] before catalog lookup. #[serde(default)] pub model: Option, /// Legacy visibility flag accepted for compatibility only. @@ -136,6 +138,18 @@ pub struct AgentConfig { } impl AgentConfig { + /// Returns the configured model split into `(provider, model)` parts. + #[inline] + pub fn model_parts(&self) -> Option<(&str, &str)> { + let value = self.model.as_deref()?; + let (provider, model) = value.split_once('/')?; + if provider.is_empty() || model.is_empty() { + return None; + } + + Some((provider, model)) + } + /// Creates an [`AgentConfig`] from raw frontmatter and parsed prompt body. pub(crate) fn from_raw(default_name: String, raw: RawFrontmatter, prompt: String) -> Self { Self { @@ -152,3 +166,49 @@ impl AgentConfig { } } } + +#[cfg(test)] +mod tests { + use super::{AgentConfig, AgentMode}; + use ahash::AHashMap; + use indexmap::IndexMap; + + fn config_with_model(model: Option<&str>) -> AgentConfig { + AgentConfig { + name: "example".to_string(), + mode: AgentMode::All, + description: String::new(), + model: model.map(str::to_string), + hidden: false, + temperature: None, + top_p: None, + permission: IndexMap::new(), + options: AHashMap::new(), + prompt: String::new(), + } + } + + #[test] + fn model_parts_returns_provider_and_model() { + let config = config_with_model(Some("synthetic/hf:moonshotai/Kimi-K2.5")); + + assert_eq!( + config.model_parts(), + Some(("synthetic", "hf:moonshotai/Kimi-K2.5")) + ); + } + + #[test] + fn model_parts_rejects_missing_separator() { + let config = config_with_model(Some("synthetic-only")); + + assert_eq!(config.model_parts(), None); + } + + #[test] + fn model_parts_handles_absent_model() { + let config = config_with_model(None); + + assert_eq!(config.model_parts(), None); + } +} diff --git a/src/llm-coding-tools-core/Cargo.toml b/src/llm-coding-tools-core/Cargo.toml index 3238bfa4..3f7ec206 100644 --- a/src/llm-coding-tools-core/Cargo.toml +++ b/src/llm-coding-tools-core/Cargo.toml @@ -40,6 +40,9 @@ serde_json = "1.0" # Zero overhead compile time bitflag generation bitflags = "2.11.0" +# Fast binary serialization for catalog cache types +bitcode = "0.6.9" + # Compile-time generated packed bitfield structs for model metadata bitfields = "1.0.2" @@ -51,6 +54,7 @@ hashbrown = "0.16" # Inline string storage for patterns tinyvec_string = { version = "0.3", features = ["alloc"] } +tinyvec = { version = "1.10", features = ["alloc"] } # Efficient immutable string table for provider URLs and env vars lite-strtab = "0.2" diff --git a/src/llm-coding-tools-core/benches/model_catalog_builder.rs b/src/llm-coding-tools-core/benches/model_catalog_builder.rs index 4aedbac3..cc83be5e 100644 --- a/src/llm-coding-tools-core/benches/model_catalog_builder.rs +++ b/src/llm-coding-tools-core/benches/model_catalog_builder.rs @@ -3,16 +3,36 @@ use core::hint::black_box; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; use llm_coding_tools_core::models::{ - Modality, ModelCatalog, ModelInfo, ProviderInfo, ProviderModelSource, ProviderSource, - ProviderType, + Modality, ModelCatalog, ModelInfo, ProviderIdx, ProviderInfo, ProviderModelSource, + ProviderSource, ProviderType, }; +struct ProviderModelSpec { + provider_idx: ProviderIdx, + model_key: String, + model: ModelInfo, +} + struct Dataset { providers: Vec, - provider_models: Vec, + provider_models: Vec, +} + +impl Dataset { + fn provider_model_sources(&self) -> Vec> { + let mut sources = Vec::with_capacity(self.provider_models.len()); + for provider_model in &self.provider_models { + sources.push(ProviderModelSource::new( + provider_model.provider_idx, + provider_model.model_key.as_str(), + provider_model.model, + )); + } + sources + } } -fn make_dataset(provider_count: usize, model_count: usize) -> Dataset { +fn make_dataset(provider_count: usize, model_count: usize, with_env_vars: bool) -> Dataset { debug_assert!(provider_count > 0); let mut providers = Vec::with_capacity(provider_count); @@ -21,7 +41,11 @@ fn make_dataset(provider_count: usize, model_count: usize) -> Dataset { format!("provider-{i}"), ProviderInfo { api_url: format!("https://provider-{i}.example/v1"), - env_vars: vec![format!("PROVIDER_{i}_API_KEY")], + env_vars: if with_env_vars { + vec![format!("PROVIDER_{i}_API_KEY")] + } else { + Vec::new() + }, api_type: if (i & 1) == 0 { ProviderType::OpenAiCompletions } else { @@ -34,7 +58,7 @@ fn make_dataset(provider_count: usize, model_count: usize) -> Dataset { let mut provider_models = Vec::with_capacity(model_count); let unique_cfg_count = (model_count / 5).max(1); for i in 0..model_count { - let provider_idx = i % provider_count; + let provider_idx = ProviderIdx::new((i % provider_count) as u16); let cfg = i % unique_cfg_count; let temperature = if (cfg & 1) == 0 { Some(1.0 + ((cfg % 5000) as f32 * 0.001)) @@ -47,17 +71,17 @@ fn make_dataset(provider_count: usize, model_count: usize) -> Dataset { None }; - provider_models.push(ProviderModelSource::new( - format!("provider-{provider_idx}"), - format!("org-{}/model-{i}", i % 17), - ModelInfo { + provider_models.push(ProviderModelSpec { + provider_idx, + model_key: format!("org-{}/model-{i}", i % 17), + model: ModelInfo { modalities: Modality::TEXT, max_input: 4096 + ((cfg as u32) * 32), max_output: 512 + ((cfg as u32) * 8), temperature, top_p, }, - )); + }); } Dataset { @@ -66,9 +90,8 @@ fn make_dataset(provider_count: usize, model_count: usize) -> Dataset { } } -fn construct_batch(dataset: &Dataset) { - let catalog = - ModelCatalog::build(&dataset.providers, &dataset.provider_models).expect("batch build"); +fn construct_batch(providers: &[ProviderSource], provider_models: &[ProviderModelSource<'_>]) { + let catalog = ModelCatalog::build(providers, provider_models).expect("batch build"); black_box(( catalog.provider_count(), @@ -80,17 +103,23 @@ fn construct_batch(dataset: &Dataset) { fn benchmark_builder_construction(c: &mut Criterion) { let mut group = c.benchmark_group("model_catalog_builder_construct"); - for (name, provider_count, model_count) in [ - ("models_dev_snapshot", 96usize, 3031usize), - ("max", 16384usize, 65535usize), + for (name, provider_count, model_count, with_env_vars) in [ + ("models_dev_snapshot", 96usize, 3031usize, true), + ("max", 16384usize, 65535usize, false), ] { - let dataset = make_dataset(provider_count, model_count); + let dataset = make_dataset(provider_count, model_count, with_env_vars); + let provider_model_sources = dataset.provider_model_sources(); group.throughput(Throughput::Elements( (provider_count + dataset.provider_models.len()) as u64, )); group.bench_with_input(BenchmarkId::new("batch", name), &dataset, |b, input| { - b.iter(|| construct_batch(black_box(input))) + b.iter(|| { + construct_batch( + black_box(&input.providers), + black_box(&provider_model_sources), + ) + }) }); } diff --git a/src/llm-coding-tools-core/src/internal/hash63.rs b/src/llm-coding-tools-core/src/internal/hash63.rs index 56df018f..a81934bc 100644 --- a/src/llm-coding-tools-core/src/internal/hash63.rs +++ b/src/llm-coding-tools-core/src/internal/hash63.rs @@ -18,14 +18,12 @@ impl Hash63 { /// /// The caller is responsible for ensuring bit 63 is 0. #[inline] - #[allow(dead_code)] // internal public API pub(crate) const fn from_u64(value: u64) -> Self { Self(value) } /// Returns the underlying u64 value. #[inline] - #[allow(dead_code)] // internal public API pub(crate) const fn as_u64(&self) -> u64 { self.0 } diff --git a/src/llm-coding-tools-core/src/internal/hash64.rs b/src/llm-coding-tools-core/src/internal/hash64.rs index e880c044..6c96ea78 100644 --- a/src/llm-coding-tools-core/src/internal/hash64.rs +++ b/src/llm-coding-tools-core/src/internal/hash64.rs @@ -11,14 +11,12 @@ pub(crate) struct Hash64(u64); impl Hash64 { /// Creates a new Hash64 from a raw u64 value. #[inline] - #[allow(dead_code)] // internal public API pub(crate) fn from_u64(value: u64) -> Self { Self(value) } /// Returns the underlying u64 value. #[inline] - #[allow(dead_code)] // internal public API pub(crate) fn as_u64(&self) -> u64 { self.0 } @@ -26,14 +24,12 @@ impl Hash64 { /// Hashes a string to Hash64 using ahash64. #[inline(always)] -#[allow(dead_code)] // internal public API pub(crate) fn hash_u64(s: &str) -> Hash64 { hash_u64_bytes(s.as_bytes()) } /// Hashes raw bytes to Hash64 using ahash64. #[inline(always)] -#[allow(dead_code)] // internal public API pub(crate) fn hash_u64_bytes(bytes: &[u8]) -> Hash64 { Hash64(ahash::RandomState::with_seed(0xDEAD_CAFE).hash_one(bytes)) } diff --git a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs index eb6b1fe5..db1f59cf 100644 --- a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs +++ b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs @@ -68,7 +68,7 @@ fn build_state_with_capacity( #[inline] pub(crate) fn build_from_source( providers: &[ProviderSource], - provider_models: &[ProviderModelSource], + provider_models: &[ProviderModelSource<'_>], ) -> Result { let provider_stats = analyze_provider_sources(providers)?; let mut state = build_state_with_capacity(provider_stats.provider_count, provider_models.len()); @@ -90,65 +90,53 @@ pub(crate) fn build_from_source( fn populate_tables_once( state: &mut BuildState, providers: &[ProviderSource], - provider_models: &[ProviderModelSource], + provider_models: &[ProviderModelSource<'_>], ) -> Result<(), ModelCatalogBuildError> { let mut env_start: u16 = 0; - let mut provider_idx_by_key: AHashMap<&str, ProviderIdx> = - AHashMap::with_capacity(providers.len()); - let mut seen_provider_models: AHashSet<(&str, &str)> = + let mut seen_provider_keys: AHashSet<&str> = AHashSet::with_capacity(providers.len()); + let mut seen_provider_models: AHashSet<(ProviderIdx, &str)> = AHashSet::with_capacity(provider_models.len()); for provider in providers { let provider_info = &provider.provider; let env_count = provider_info.env_vars.len() as u8; - match provider_idx_by_key.entry(provider.provider_key.as_str()) { - MapEntry::Occupied(_) => { - return Err(ModelCatalogBuildError::DuplicateKey { - table: LookupTableKind::Provider, - key: provider.provider_key.clone(), - }); - } - MapEntry::Vacant(e) => { - let provider_idx = insert_provider( - state, - &provider.provider_key, - env_start, - env_count, - provider_info.api_type, - )?; - e.insert(provider_idx); - } + if !seen_provider_keys.insert(provider.provider_key.as_str()) { + return Err(ModelCatalogBuildError::DuplicateKey { + table: LookupTableKind::Provider, + key: provider.provider_key.clone(), + }); } - // SAFETY: analyze_provider_sources bounds env_start and env_count (<= 3). + insert_provider( + state, + &provider.provider_key, + env_start, + env_count, + provider_info.api_type, + )?; + + // SAFETY: analyze_provider_sources bounds env_start and env_count (<= 7). env_start += u16::from(env_count); } for provider_model in provider_models { - // Validate provider exists before inserting model. - if !provider_idx_by_key.contains_key(provider_model.provider_key.as_str()) { - return Err(ModelCatalogBuildError::ProviderKeyNotFoundForModel { - provider_key: provider_model.provider_key.clone(), - model_key: provider_model.model_key.clone(), - }); - } - - // Check for duplicate (provider_key, model_key) pair. - let key = ( - provider_model.provider_key.as_str(), - provider_model.model_key.as_str(), - ); + let provider = providers + .get(provider_model.provider_idx.as_usize()) + .ok_or(ModelCatalogBuildError::ProviderIdxOutOfRangeForModel { + provider_idx: provider_model.provider_idx, + model_key: provider_model.model_key.to_owned(), + })?; + + // Check for duplicate (provider_idx, model_key) pair. + let key = (provider_model.provider_idx, provider_model.model_key); if !seen_provider_models.insert(key) { return Err(ModelCatalogBuildError::DuplicateKey { table: LookupTableKind::ProviderModel, - key: format!( - "{}/{}", - provider_model.provider_key, provider_model.model_key - ), + key: format!("{}/{}", provider.provider_key, provider_model.model_key), }); } - insert_provider_model(state, provider_model)?; + insert_provider_model(state, provider.provider_key.as_str(), provider_model)?; } Ok(()) @@ -197,7 +185,8 @@ fn insert_provider( #[inline] fn insert_provider_model( state: &mut BuildState, - provider_model: &ProviderModelSource, + provider_key: &str, + provider_model: &ProviderModelSource<'_>, ) -> Result<(), ModelCatalogBuildError> { let info = provider_model.model; @@ -238,11 +227,7 @@ fn insert_provider_model( } }; - let key = hash_provider_model_key( - &state.hash_state, - &provider_model.provider_key, - &provider_model.model_key, - ); + let key = hash_provider_model_key(&state.hash_state, provider_key, provider_model.model_key); let hash48 = PackedProviderModelTableEntry::truncate_hash48(key.as_u64()); // Insert provider-model entry. @@ -346,7 +331,7 @@ fn analyze_provider_sources( for provider in providers { // SAFETY: total_env_keys is the start index for this provider. - // It must fit the 14-bit PackedEnvRange start field. + // It must fit the 13-bit PackedEnvRange start field. if total_env_keys > max_env_start { return Err(ModelCatalogBuildError::TooManyEnvVarKeys { count: total_env_keys, @@ -356,7 +341,7 @@ fn analyze_provider_sources( let provider_info = &provider.provider; let env_count = provider_info.env_vars.len(); - // SAFETY: per-provider count must fit the 2-bit count field. + // SAFETY: per-provider count must fit the 3-bit count field. if env_count > max_env_count { return Err( ModelCatalogBuildError::TooManyProviderEnvVarsForOneProvider { @@ -427,7 +412,7 @@ fn build_provider_env_key_table( mod tests { use super::build_from_source; use crate::models::catalog::{ - LookupTableKind, Modality, ModelCatalogBuildError, ModelInfo, ProviderInfo, + LookupTableKind, Modality, ModelCatalogBuildError, ModelInfo, ProviderIdx, ProviderInfo, ProviderModelSource, ProviderSource, }; use crate::models::ProviderType; @@ -454,15 +439,15 @@ mod tests { ProviderSource::new(provider_key, provider) } - fn provider_model_source( - provider_key: &str, - model_key: &str, + fn provider_model_source<'a>( + provider_idx: ProviderIdx, + model_key: &'a str, model: ModelInfo, - ) -> ProviderModelSource { - ProviderModelSource::new(provider_key, model_key, model) + ) -> ProviderModelSource<'a> { + ProviderModelSource::new(provider_idx, model_key, model) } - fn test_sources() -> (Vec, Vec) { + fn test_sources() -> (Vec, Vec>) { ( vec![provider_source( "alpha", @@ -472,7 +457,11 @@ mod tests { ProviderType::OpenAiCompletions, ), )], - vec![provider_model_source("alpha", "m1", info(4096, 512))], + vec![provider_model_source( + ProviderIdx::new(0), + "m1", + info(4096, 512), + )], ) } @@ -499,7 +488,11 @@ mod tests { provider("https://beta.example", &["BETA_KEY"], ProviderType::Azure), ), ]; - let provider_models = vec![provider_model_source("alpha", "m1", info(4096, 512))]; + let provider_models = vec![provider_model_source( + ProviderIdx::new(0), + "m1", + info(4096, 512), + )]; match build_from_source(&providers, &provider_models) { Err(err) => { @@ -522,8 +515,8 @@ mod tests { provider("https://alpha.example", &["ALPHA_KEY"], ProviderType::Azure), )]; let provider_models = vec![ - provider_model_source("alpha", "m1", info(4096, 512)), - provider_model_source("alpha", "m1", info(4096, 512)), + provider_model_source(ProviderIdx::new(0), "m1", info(4096, 512)), + provider_model_source(ProviderIdx::new(0), "m1", info(4096, 512)), ]; match build_from_source(&providers, &provider_models) { @@ -554,7 +547,7 @@ mod tests { ]; let provider_models = vec![ provider_model_source( - "alpha", + ProviderIdx::new(0), "m1", ModelInfo { modalities: Modality::TEXT, @@ -565,7 +558,7 @@ mod tests { }, ), provider_model_source( - "beta", + ProviderIdx::new(1), "m1", ModelInfo { modalities: Modality::TEXT, @@ -592,14 +585,18 @@ mod tests { "alpha", provider("https://alpha.example", &["ALPHA_KEY"], ProviderType::Azure), )]; - let provider_models = vec![provider_model_source("beta", "m1", info(4096, 512))]; + let provider_models = vec![provider_model_source( + ProviderIdx::new(1), + "m1", + info(4096, 512), + )]; match build_from_source(&providers, &provider_models) { Err(err) => { assert_eq!( err, - ModelCatalogBuildError::ProviderKeyNotFoundForModel { - provider_key: "beta".to_string(), + ModelCatalogBuildError::ProviderIdxOutOfRangeForModel { + provider_idx: ProviderIdx::new(1), model_key: "m1".to_string(), } ); @@ -614,19 +611,23 @@ mod tests { "alpha", provider( "https://alpha.example", - &["A", "B", "C", "D"], + &["A", "B", "C", "D", "E", "F", "G", "H"], ProviderType::Azure, ), )]; - let provider_models = vec![provider_model_source("alpha", "m1", info(4096, 512))]; + let provider_models = vec![provider_model_source( + ProviderIdx::new(0), + "m1", + info(4096, 512), + )]; match build_from_source(&providers, &provider_models) { Err(err) => { assert_eq!( err, ModelCatalogBuildError::TooManyProviderEnvVarsForOneProvider { - count: 4, - max: 3, + count: 8, + max: 7, } ); } @@ -639,7 +640,7 @@ mod tests { let (providers, _) = test_sources(); let max_output = super::MAX_OUTPUT_TOKENS; let provider_models = vec![provider_model_source( - "alpha", + ProviderIdx::new(0), "m1", info(4096, max_output.saturating_add(1)), )]; @@ -663,7 +664,7 @@ mod tests { let (providers, _) = test_sources(); let max_input = super::MAX_INPUT_TOKENS; let provider_models = vec![provider_model_source( - "alpha", + ProviderIdx::new(0), "m1", info(max_input.saturating_add(1), 512), )]; @@ -684,29 +685,29 @@ mod tests { #[test] fn too_many_total_env_vars_returns_error() { - // 5462 providers * 3 env vars = 16386, so the 5463rd provider would have - // a start index of 16386, which exceeds MAX_ENV_START (16383). - let mut providers = Vec::with_capacity(5463); - for i in 0..5463usize { + // 8192 providers * 1 env var = 8192, so the 8193rd provider would have + // a start index of 8192, which exceeds MAX_ENV_START (8191). + let mut providers = Vec::with_capacity(8193); + for i in 0..8193usize { providers.push(provider_source( &format!("provider_{}", i), - provider( - "https://example.com", - &["VAR1", "VAR2", "VAR3"], - ProviderType::Azure, - ), + provider("https://example.com", &["VAR1"], ProviderType::Azure), )); } let mut provider_models = Vec::with_capacity(1); - provider_models.push(provider_model_source("provider_0", "m1", info(4096, 512))); + provider_models.push(provider_model_source( + ProviderIdx::new(0), + "m1", + info(4096, 512), + )); match build_from_source(&providers, &provider_models) { Err(err) => { assert_eq!( err, ModelCatalogBuildError::TooManyEnvVarKeys { - count: 16_386, - max: 16_383, + count: 8_192, + max: 8_191, } ); } @@ -715,34 +716,43 @@ mod tests { } #[test] - fn max_14bit_start_with_tail_entries_succeeds() { - // The last provider's start index can be 16383 and still be valid when it - // contributes 3 keys at indices 16383, 16384, and 16385. - let mut providers = Vec::with_capacity(5462); - for i in 0..5462usize { + fn max_13bit_start_with_tail_entries_succeeds() { + // The last provider's start index can be 8191 and still be valid when it + // contributes keys at indices 8191 through 8197. + let mut providers = Vec::with_capacity(1172); + for i in 0..1170usize { providers.push(provider_source( &format!("provider_{}", i), provider( "https://example.com", - &["VAR1", "VAR2", "VAR3"], + &["VAR1", "VAR2", "VAR3", "VAR4", "VAR5", "VAR6", "VAR7"], ProviderType::Azure, ), )); } - let last_provider_key = format!("provider_{}", 5461usize); - let mut provider_models = Vec::with_capacity(1); - provider_models.push(provider_model_source( - &last_provider_key, - "m1", - info(4096, 512), + providers.push(provider_source( + "provider_1170", + provider("https://example.com", &["VAR1"], ProviderType::Azure), + )); + providers.push(provider_source( + "provider_1171", + provider( + "https://example.com", + &["VAR1", "VAR2", "VAR3", "VAR4", "VAR5", "VAR6", "VAR7"], + ProviderType::Azure, + ), )); + let provider_models = Vec::new(); let catalog = build_from_source(&providers, &provider_models).expect("boundary case should pass"); - let (provider, _) = catalog - .lookup(&last_provider_key, "m1") + let provider = catalog + .provider_from_index(ProviderIdx::new(1171)) .expect("last provider should be addressable"); - assert_eq!(provider.env_vars(), &["VAR1", "VAR2", "VAR3"]); + assert_eq!( + provider.env_vars(), + &["VAR1", "VAR2", "VAR3", "VAR4", "VAR5", "VAR6", "VAR7"] + ); } } diff --git a/src/llm-coding-tools-core/src/models/catalog/internal/packed_env_range.rs b/src/llm-coding-tools-core/src/models/catalog/internal/packed_env_range.rs index bf7b982b..6122010f 100644 --- a/src/llm-coding-tools-core/src/models/catalog/internal/packed_env_range.rs +++ b/src/llm-coding-tools-core/src/models/catalog/internal/packed_env_range.rs @@ -1,15 +1,15 @@ //! Packed env-var range entry for provider-to-env-key mapping. //! //! Layout (`u16`): -//! - `14` bits: start index into provider_env_keys StringTable -//! - `2` bits: count of env keys for this provider (0..=3) +//! - `13` bits: start index into provider_env_keys StringTable +//! - `3` bits: count of env keys for this provider (0..=7) use bitfields::bitfield; /// Maximum env-var count per provider representable by PackedEnvRange. -pub const MAX_ENV_RANGE_COUNT: u8 = 3; -/// Maximum start index representable by PackedEnvRange (14 bits). -pub const MAX_ENV_START: u16 = (1u16 << 14) - 1; // 16383 +pub const MAX_ENV_RANGE_COUNT: u8 = 7; +/// Maximum start index representable by PackedEnvRange (13 bits). +pub const MAX_ENV_START: u16 = (1u16 << 13) - 1; // 8191 /// Packed env-var range entry. /// @@ -17,9 +17,9 @@ pub const MAX_ENV_START: u16 = (1u16 << 14) - 1; // 16383 #[bitfield(u16)] #[derive(Clone, Copy, PartialEq, Eq, Hash)] pub struct PackedEnvRange { - #[bits(14)] + #[bits(13)] start: u16, - #[bits(2)] + #[bits(3)] count: u8, } @@ -27,7 +27,7 @@ impl PackedEnvRange { /// Creates one packed env-var range entry. /// /// SAFETY: The `start` parameter is not validated here. The caller must ensure - /// `start` fits within 14 bits (max 16383). This invariant is enforced in + /// `start` fits within 13 bits (max 8191). This invariant is enforced in /// `analyze_provider_sources` before `populate_tables_once` calls this function. #[inline] pub fn from_parts(start: u16, count: u8) -> Self { @@ -56,7 +56,7 @@ mod tests { #[test] fn count_capped_at_max() { - let packed = PackedEnvRange::from_parts(0, 5); - assert_eq!(packed.count(), 3); // capped to MAX_ENV_RANGE_COUNT + let packed = PackedEnvRange::from_parts(0, 8); + assert_eq!(packed.count(), 7); // capped to MAX_ENV_RANGE_COUNT } } diff --git a/src/llm-coding-tools-core/src/models/catalog/mod.rs b/src/llm-coding-tools-core/src/models/catalog/mod.rs index 9d99f090..79ae717f 100644 --- a/src/llm-coding-tools-core/src/models/catalog/mod.rs +++ b/src/llm-coding-tools-core/src/models/catalog/mod.rs @@ -27,7 +27,7 @@ //! //! - [`ModelCatalog::build`] - Batch builder entry point //! - [`ProviderSource`] - Provider key + metadata input -//! - [`ProviderModelSource`] - Model key + metadata input for a provider +//! - [`ProviderModelSource`] - Model key + metadata input keyed by [`ProviderIdx`] and model key //! - [`ModelInfo`] - Model metadata input (modalities, token limits, sampling) //! - [`ProviderInfo`] - Provider metadata input (API URL, env vars, type) //! - [`Modality`] - Content modality flags (text, image, audio, video) @@ -151,8 +151,8 @@ //! | ------------------------- | ----------: | ------------------------------------------------ | //! | Max providers | 65,536 | Addressable by 16-bit provider index | //! | Max model configs | 65,536 | Addressable by 16-bit model configuration index | -//! | Max provider env vars | 16,384 | Global env-var pool offset (14-bit) | -//! | Max env vars per provider | 3 | Count field in provider range entry (2-bit) | +//! | Max provider env vars | 8,192 | Global env-var pool offset (13-bit) | +//! | Max env vars per provider | 7 | Count field in provider range entry (3-bit) | //! | Max input tokens | 536,870,911 | 29-bit packed field (≈536M) | //! | Max output tokens | 134,217,727 | 27-bit packed field (≈134M) | //! | Hash bits retained | 48 | Truncated from 64-bit hash output | @@ -231,6 +231,7 @@ use internal::{ PackedEnvRange, PackedModelEntry, PackedProviderModelTableEntry, PackedProviderTableEntry, }; use lite_strtab::{StringId, StringTable}; +use public::{ProviderEnvVars, INLINE_PROVIDER_ENV_VARS}; pub use public::builder_types::{ModelCatalogBuildError, ProviderModelSource, ProviderSource}; pub use public::*; @@ -286,19 +287,20 @@ impl ModelCatalog { /// # Parameters /// /// * `providers` - [`ProviderSource`] values keyed by provider identifier. - /// * `provider_models` - [`ProviderModelSource`] values keyed by provider and model. + /// * `provider_models` - [`ProviderModelSource`] values keyed by [`ProviderIdx`] and model key. + /// The `provider_idx` must point at an element in the `providers` slice. /// /// # Errors /// /// Returns [`ModelCatalogBuildError`] when: /// - input exceeds supported numeric limits, /// - token limits cannot be represented in packed model entries, - /// - provider model sources reference unknown providers, + /// - provider model sources reference out-of-range provider indices, /// - or all seed-retry attempts still result in collisions. #[inline] pub fn build( providers: &[ProviderSource], - provider_models: &[ProviderModelSource], + provider_models: &[ProviderModelSource<'_>], ) -> Result { build_from_source(providers, provider_models) } @@ -471,21 +473,16 @@ impl ModelCatalog { let start = range.start(); let count = range.count() as usize; - let mut env_vars = ["", "", ""]; - #[allow(clippy::needless_range_loop)] + let mut env_vars: ProviderEnvVars<'_> = + ProviderEnvVars::with_capacity(count.max(INLINE_PROVIDER_ENV_VARS)); for x in 0..count { - env_vars[x] = self - .provider_env_keys - .get(StringId::new(ProviderIdx::new(start + x as u16)))?; + env_vars.push( + self.provider_env_keys + .get(StringId::new(ProviderIdx::new(start + x as u16)))?, + ); } - Some(Provider::new( - provider_idx, - api_url, - env_vars, - count as u8, - api_type, - )) + Some(Provider::new(provider_idx, api_url, env_vars, api_type)) } /// Looks up a model by its configuration index. @@ -518,7 +515,7 @@ impl ModelCatalog { mod tests { use super::*; use crate::models::catalog::{ - Modality, ModelInfo, ProviderInfo, ProviderModelSource, ProviderSource, + Modality, ModelInfo, ProviderIdx, ProviderInfo, ProviderModelSource, ProviderSource, }; fn provider(api_url: &str, env_vars: &[&str], api_type: ProviderType) -> ProviderInfo { @@ -562,10 +559,16 @@ mod tests { .into_iter() .map(|(key, info)| ProviderSource::new(key, info)) .collect(); - let provider_model_sources: Vec = provider_models + let provider_model_sources: Vec> = provider_models .into_iter() .map(|(provider_key, model_key, info)| { - ProviderModelSource::new(provider_key, model_key, info) + let provider_idx = ProviderIdx::new( + provider_sources + .iter() + .position(|provider| provider.provider_key == provider_key) + .expect("provider key should exist") as u16, + ); + ProviderModelSource::new(provider_idx, model_key, info) }) .collect(); ModelCatalog::build(&provider_sources, &provider_model_sources) diff --git a/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs b/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs index 7167c3cc..b1749660 100644 --- a/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs +++ b/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs @@ -3,6 +3,7 @@ //! [`ModelCatalog`]: crate::models::catalog::ModelCatalog use super::Modality; +use super::ProviderIdx; use crate::models::ProviderType; use thiserror::Error; @@ -80,47 +81,57 @@ impl From<(String, ProviderInfo)> for ProviderSource { /// /// This wrapper keeps builder input self-documenting and avoids tuple-position /// ambiguity at call sites. +/// +/// The `model_key` is borrowed because the catalog builder hashes it during +/// construction and does not retain it afterward. Callers must therefore keep +/// the referenced string alive until [`crate::models::catalog::ModelCatalog::build`] +/// returns. +/// +/// The `provider_idx` must correspond to an entry in the `providers` slice passed +/// to [`ModelCatalog::build`]. +/// +/// [`ModelCatalog::build`]: crate::models::catalog::ModelCatalog::build #[derive(Debug, Clone, PartialEq)] -pub struct ProviderModelSource { - /// Provider identifier used by lookups (for example, `"openai"`). - pub provider_key: String, - /// Model identifier used by lookups (for example, `"gpt-4"`). - pub model_key: String, +pub struct ProviderModelSource<'a> { + /// Index into the `providers` slice passed to [`ModelCatalog::build`]. + /// + /// [`ModelCatalog::build`]: crate::models::catalog::ModelCatalog::build + pub provider_idx: ProviderIdx, + /// Borrowed model identifier used by lookups (for example, `"gpt-4"`). + pub model_key: &'a str, /// Model metadata associated with [`Self::model_key`]. pub model: ModelInfo, } -impl ProviderModelSource { +impl<'a> ProviderModelSource<'a> { /// Creates a provider model source. /// /// # Parameters /// - /// * `provider_key` - Provider identifier used during provider lookup. + /// * `provider_idx` - Index into the `providers` slice passed to [`ModelCatalog::build`]. /// * `model_key` - Model identifier used during model lookup for this provider. /// * `model` - Model metadata for this provider model. /// /// # Returns /// /// A new [`ProviderModelSource`]. + /// + /// [`ModelCatalog::build`]: crate::models::catalog::ModelCatalog::build #[inline] - pub fn new( - provider_key: impl Into, - model_key: impl Into, - model: ModelInfo, - ) -> Self { + pub fn new(provider_idx: ProviderIdx, model_key: &'a str, model: ModelInfo) -> Self { Self { - provider_key: provider_key.into(), - model_key: model_key.into(), + provider_idx, + model_key, model, } } } -impl From<(String, String, ModelInfo)> for ProviderModelSource { +impl<'a> From<(ProviderIdx, &'a str, ModelInfo)> for ProviderModelSource<'a> { #[inline] - fn from((provider_key, model_key, model): (String, String, ModelInfo)) -> Self { + fn from((provider_idx, model_key, model): (ProviderIdx, &'a str, ModelInfo)) -> Self { Self { - provider_key, + provider_idx, model_key, model, } @@ -164,7 +175,7 @@ pub enum ModelCatalogBuildError { /// Maximum supported unique model configuration count. max: usize, }, - /// One provider has too many env vars for the packed count field (max 3). + /// One provider has too many env vars for the packed count field (max 7). #[error("provider env-var count {count} exceeds supported maximum {max}")] TooManyProviderEnvVarsForOneProvider { /// Number of env vars supplied for one provider. @@ -172,11 +183,11 @@ pub enum ModelCatalogBuildError { /// Maximum supported env vars for one provider. max: usize, }, - /// A provider model source references a provider key that does not exist. - #[error("provider model source references unknown provider_key={provider_key:?} for model_key={model_key:?}")] - ProviderKeyNotFoundForModel { - /// Provider key from the provider model source. - provider_key: String, + /// A provider model source references a provider index that does not exist. + #[error("provider model source references out-of-range provider_idx={} for model_key={model_key:?}", provider_idx.as_usize())] + ProviderIdxOutOfRangeForModel { + /// Provider index from the provider model source. + provider_idx: ProviderIdx, /// Model key from the provider model source. model_key: String, }, diff --git a/src/llm-coding-tools-core/src/models/catalog/public/entry.rs b/src/llm-coding-tools-core/src/models/catalog/public/entry.rs index fc4f4060..e747381d 100644 --- a/src/llm-coding-tools-core/src/models/catalog/public/entry.rs +++ b/src/llm-coding-tools-core/src/models/catalog/public/entry.rs @@ -14,6 +14,11 @@ use super::{Modality, ModelIdx, ProviderIdx}; use crate::models::catalog::internal::Fixed4; use crate::models::ProviderType; +use tinyvec::TinyVec; + +pub(crate) const INLINE_PROVIDER_ENV_VARS: usize = 2; + +pub(crate) type ProviderEnvVars<'a> = TinyVec<[&'a str; INLINE_PROVIDER_ENV_VARS]>; /// Provider lookup result. #[derive(Debug, Clone, PartialEq, Eq)] @@ -23,9 +28,7 @@ pub struct Provider<'a> { /// Provider base URL. pub api_url: &'a str, /// Candidate environment variables used to resolve API keys. - env_vars: [&'a str; 3], - /// Number of valid entries in `env_vars`. - env_vars_count: u8, + env_vars: ProviderEnvVars<'a>, /// Type of API used by the provider. pub api_type: ProviderType, } @@ -36,15 +39,13 @@ impl<'a> Provider<'a> { pub(crate) fn new( provider_idx: ProviderIdx, api_url: &'a str, - env_vars: [&'a str; 3], - env_vars_count: u8, + env_vars: ProviderEnvVars<'a>, api_type: ProviderType, ) -> Self { Self { provider_idx, api_url, env_vars, - env_vars_count, api_type, } } @@ -52,7 +53,7 @@ impl<'a> Provider<'a> { /// Returns the candidate environment variables used to resolve API keys. #[inline] pub fn env_vars(&self) -> &[&'a str] { - &self.env_vars[..self.env_vars_count as usize] + self.env_vars.as_slice() } } diff --git a/src/llm-coding-tools-core/src/models/catalog/public/mod.rs b/src/llm-coding-tools-core/src/models/catalog/public/mod.rs index c6b0c044..adf55b2e 100644 --- a/src/llm-coding-tools-core/src/models/catalog/public/mod.rs +++ b/src/llm-coding-tools-core/src/models/catalog/public/mod.rs @@ -7,6 +7,7 @@ pub use builder_types::{LookupTableKind, ModelInfo, ProviderInfo}; pub use entry::{Model, Provider}; +pub(crate) use entry::{ProviderEnvVars, INLINE_PROVIDER_ENV_VARS}; pub use modality::Modality; pub use model_idx::ModelIdx; pub use provider_idx::ProviderIdx; diff --git a/src/llm-coding-tools-core/src/models/catalog/public/provider_idx.rs b/src/llm-coding-tools-core/src/models/catalog/public/provider_idx.rs index ba3951e7..d82121a9 100644 --- a/src/llm-coding-tools-core/src/models/catalog/public/provider_idx.rs +++ b/src/llm-coding-tools-core/src/models/catalog/public/provider_idx.rs @@ -4,7 +4,7 @@ /// /// Used to reference a specific provider in the catalog's /// packed provider entry tables and string tables. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, bitcode::Encode, bitcode::Decode)] #[repr(transparent)] pub struct ProviderIdx(pub(crate) u16); diff --git a/src/llm-coding-tools-core/src/models/mod.rs b/src/llm-coding-tools-core/src/models/mod.rs index 17137b62..495de325 100644 --- a/src/llm-coding-tools-core/src/models/mod.rs +++ b/src/llm-coding-tools-core/src/models/mod.rs @@ -5,6 +5,6 @@ mod provider_type; pub use catalog::{ LookupTableKind, Modality, Model, ModelCatalog, ModelCatalogBuildError, ModelInfo, Provider, - ProviderInfo, ProviderModelSource, ProviderSource, + ProviderIdx, ProviderInfo, ProviderModelSource, ProviderSource, }; pub use provider_type::ProviderType; diff --git a/src/llm-coding-tools-core/src/models/provider_type.rs b/src/llm-coding-tools-core/src/models/provider_type.rs index af7c5d24..1b18e262 100644 --- a/src/llm-coding-tools-core/src/models/provider_type.rs +++ b/src/llm-coding-tools-core/src/models/provider_type.rs @@ -1,5 +1,5 @@ /// Provider behavior profile used by model resolver logic. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, bitcode::Encode, bitcode::Decode)] #[repr(u8)] pub enum ProviderType { /// Unknown or unsupported provider package. diff --git a/src/llm-coding-tools-models-dev/Cargo.toml b/src/llm-coding-tools-models-dev/Cargo.toml new file mode 100644 index 00000000..85c65cd7 --- /dev/null +++ b/src/llm-coding-tools-models-dev/Cargo.toml @@ -0,0 +1,66 @@ +[package] +name = "llm-coding-tools-models-dev" +version = "0.1.0" +edition = "2021" +description = "models.dev catalog ingestion with online-first sync pipeline" +repository = "https://github.com/Sewer56/llm-coding-tools" +license = "Apache-2.0" +include = ["src/**/*", "README.md"] +readme = "README.md" + +[features] +default = ["tokio"] +# Base async signatures (enabled by runtime features) +async = [] +# Async with tokio runtime +tokio = ["async", "dep:tokio", "dep:reqwest", "llm-coding-tools-core/tokio"] +# Blocking/sync mode - mutually exclusive with tokio/async +blocking = [ + "dep:reqwest", + "reqwest/blocking", + "llm-coding-tools-core/blocking", + "maybe-async/is_sync", +] + +[dependencies] +# Core library for ModelCatalog and related types +llm-coding-tools-core = { path = "../llm-coding-tools-core", version = "0.2.0", default-features = false } + +# Cross-platform cache directory detection +dirs = "6.0.0" + +# HTTP client for conditional GET requests +reqwest = { version = "0.13", default-features = false, features = [ + "rustls", + "rustls-native-certs", +], optional = true } + +# Fast binary serialization +bitcode = "0.6.9" + +# Compression for cache payload +zstd = "0.13.3" + +# Shared async/sync implementation for load/cache APIs +maybe-async = "0.2" + +# Endian-aware fixed-header serialization helpers +endian-writer = "2.2.0" +endian-writer-derive = "0.1.0" + +# JSON parsing for models.dev API responses +serde = { version = "1.0.228", features = ["derive"] } +serde_json = "1.0.145" + +# Ergonomic error definitions +thiserror = "2.0.18" + +# Temp file with atomic rename support +tempfile = "3.26" + +# Async runtime (when tokio feature enabled) +tokio = { version = "1.49", features = ["fs", "io-util"], optional = true } + +[dev-dependencies] +tokio = { version = "1.49", features = ["rt", "macros"] } +serial_test = "3" diff --git a/src/llm-coding-tools-models-dev/README.md b/src/llm-coding-tools-models-dev/README.md new file mode 100644 index 00000000..45698787 --- /dev/null +++ b/src/llm-coding-tools-models-dev/README.md @@ -0,0 +1,151 @@ +# llm-coding-tools-models-dev + +Reads the online models.dev catalog into llm-coding-tools-core; with support +for a cached fallback and caching via ETag(s). + +## Why this exists + +If you run coding agents against many providers, you want to have fresh data. +[models.dev][models.dev] is one such source of data. + +This crate downloads from models.dev, keeps only the fields we need, and +builds a `llm_coding_tools_core::models::ModelCatalog`. + +## Usage + +### Load flow (simple) + +1. Read cache header (if present) and get the old ETag. +2. Send request to models.dev with `If-None-Match` when ETag exists. +3. If server returns `304 Not Modified`, load catalog from cache. +4. If server returns `200 OK`, parse JSON, map it into catalog sources, + write fresh cache, then build catalog. +5. If network fails, try cached data as fallback; if no valid cache exists, + return an error. + +### Non-blocking (`tokio`) + +```rust +use llm_coding_tools_models_dev::{CatalogLoadSource, ModelsDevCatalog}; + +#[cfg(feature = "tokio")] +async fn load_catalog() -> Result<(), Box> { + let result = ModelsDevCatalog::load().await?; + + match result.source { + CatalogLoadSource::Downloaded => { + println!("Downloaded fresh catalog data.") + } + CatalogLoadSource::NotModifiedCache => { + println!("Cache is already up to date.") + } + CatalogLoadSource::FallbackCache => { + println!("Network unavailable, using cached catalog data.") + } + } + + if let Some(entry) = result.catalog.lookup("openai", "gpt-4") { + println!("provider api url: {}", entry.0.api_url); + println!("max input tokens: {}", entry.1.max_input); + } + + Ok(()) +} +``` + +### Blocking (`blocking`) + +```rust +use llm_coding_tools_models_dev::{CatalogLoadSource, ModelsDevCatalog}; + +#[cfg(feature = "blocking")] +fn load_catalog() -> Result<(), Box> { + let result = ModelsDevCatalog::load()?; + + match result.source { + CatalogLoadSource::Downloaded => { + println!("Downloaded fresh catalog data.") + } + CatalogLoadSource::NotModifiedCache => { + println!("Cache is already up to date.") + } + CatalogLoadSource::FallbackCache => { + println!("Network unavailable, using cached catalog data.") + } + } + + if let Some(entry) = result.catalog.lookup("openai", "gpt-4") { + println!("provider api url: {}", entry.0.api_url); + println!("max input tokens: {}", entry.1.max_input); + } + + Ok(()) +} +``` + +### Load from a custom cache path + +```rust +use llm_coding_tools_models_dev::ModelsDevCatalog; +use std::path::PathBuf; + +#[cfg(feature = "tokio")] +async fn load_catalog() -> Result<(), Box> { + let cache_path = PathBuf::from("/tmp/models-dev.cache"); + let _result = ModelsDevCatalog::load_at(&cache_path).await?; + Ok(()) +} + +#[cfg(feature = "blocking")] +fn load_catalog() -> Result<(), Box> { + let cache_path = PathBuf::from("/tmp/models-dev.cache"); + let _result = ModelsDevCatalog::load_at(&cache_path)?; + Ok(()) +} +``` + +### Resolve the shared cache path + +```rust +use llm_coding_tools_models_dev::shared_cache_path; + +fn print_cache_path() -> Result<(), Box> { + let path = shared_cache_path()?; + println!("{}", path.display()); + Ok(()) +} +``` + +## Cache location + +By default, cache is stored in the platform cache directory: + +- Linux: `~/.cache/llm-coding-tools/models.dev.catalog.v1.cache` +- macOS: `~/Library/Caches/llm-coding-tools/models.dev.catalog.v1.cache` +- Windows: `%LOCALAPPDATA%\llm-coding-tools\models.dev.catalog.v1.cache` + +Set `LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH` to override this path. + +## Cache size and performance + +Current ballpark from a recent `models.dev/api.json` snapshot: + +- Size: about `1.31 MiB` JSON -> `109 KiB` serialized payload -> `23.7 KiB` compressed cache +- Compression: about `10.1 ms` with current `zstd` level `17` +- Decompression: about `0.057 ms` (`57 us`) in `--release` +- Cache load into `ModelCatalog`: about `0.31 ms` (`read + decompress + decode + build`) + +Measured on a single core of a Ryzen `9950X3D`; these are rough guidance numbers and will drift as the upstream catalog changes. + +## Feature flags + +- `tokio` (default): async runtime support. +- `blocking`: synchronous runtime support. + +Exactly one runtime mode must be enabled. + +## License + +Apache-2.0 + +[models.dev]: https://models.dev diff --git a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs new file mode 100644 index 00000000..a0968293 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs @@ -0,0 +1,563 @@ +//! models.dev API -> `ModelCatalog` mapping. +//! +//! This module parses models.dev `api.json`, maps provider/model metadata into +//! transient core builder inputs, and immediately constructs a [`ModelCatalog`](llm_coding_tools_core::models::ModelCatalog). +//! +//! Mapping policy: +//! - missing limits default to `0`; +//! - model modalities are mapped from `modalities.input[]`/`modalities.output[]` +//! into directional [`Modality`] flags; +//! - unknown npm package identifiers map to [`ProviderType::Unknown`]; +//! - unknown modality labels are ignored; if nothing maps, modalities remain +//! [`Modality::empty()`]; +//! - model rows remain provider-scoped; shared configurations are deduplicated by +//! core during catalog build. + +use super::schema::{parse_api_json, ApiModelEntry, ApiModelLimit, ApiModelModalities}; +use crate::cache::payload::{CachedModelRow, CachedProviderRow, CatalogCachePayload}; +use crate::error::{CatalogError, CatalogResult}; +use llm_coding_tools_core::models::{ + Modality, ModelCatalogBuildError, ModelInfo, ProviderIdx, ProviderType, +}; + +pub(crate) fn cache_payload_from_api_json_bytes( + json_bytes: &[u8], +) -> CatalogResult { + let provider_entries = parse_api_json(json_bytes)?; + + let provider_count = provider_entries.len(); + if provider_count > (u16::MAX as usize) + 1 { + return Err(CatalogError::ModelCatalogBuild( + ModelCatalogBuildError::TooManyProviders { + count: provider_count, + max: (u16::MAX as usize) + 1, + }, + )); + } + + let mut providers = Vec::with_capacity(provider_count); + let mut models = Vec::with_capacity( + provider_entries + .values() + .map(|provider| provider.models.len()) + .sum(), + ); + + for (provider_key, provider) in provider_entries { + let provider_idx = ProviderIdx::new(providers.len() as u16); + let api_type = provider_type_from_models_dev_npm(provider.npm.as_deref()); + + providers.push(CachedProviderRow { + provider_key, + api_url: provider.api.unwrap_or_default(), + env_vars: provider.env, + api_type, + }); + + for (model_key, model_entry) in provider.models { + let model = model_info_from_entry(&model_entry); + models.push(CachedModelRow { + provider_idx, + model_key, + modalities_bits: model.modalities.bits(), + max_input: model.max_input, + max_output: model.max_output, + temperature: model.temperature, + top_p: model.top_p, + }); + } + } + + Ok(CatalogCachePayload { providers, models }) +} + +#[inline] +fn model_info_from_entry(model_entry: &ApiModelEntry) -> ModelInfo { + let (max_input, max_output) = match model_entry.limit.as_ref() { + Some(limit) => (model_max_input(limit), limit.output), + None => (0, 0), + }; + let modalities = model_modalities(model_entry.modalities.as_ref()); + + ModelInfo { + modalities, + max_input, + max_output, + temperature: None, + top_p: None, + } +} + +#[inline] +fn model_modalities(raw: Option<&ApiModelModalities>) -> Modality { + let Some(raw) = raw else { + return Modality::TEXT; + }; + + let mut modalities = Modality::empty(); + for label in &raw.input { + modalities |= input_modality_flag(label.as_str()); + } + for label in &raw.output { + modalities |= output_modality_flag(label.as_str()); + } + + modalities +} + +#[inline] +fn input_modality_flag(label: &str) -> Modality { + match label { + "text" => Modality::TEXT_INPUT, + "image" => Modality::IMAGE_INPUT, + "audio" => Modality::AUDIO_INPUT, + "video" => Modality::VIDEO_INPUT, + _ => Modality::empty(), // pdf not supported + } +} + +#[inline] +fn output_modality_flag(label: &str) -> Modality { + match label { + "text" => Modality::TEXT_OUTPUT, + "image" => Modality::IMAGE_OUTPUT, + "audio" => Modality::AUDIO_OUTPUT, + "video" => Modality::VIDEO_OUTPUT, + _ => Modality::empty(), + } +} + +#[inline] +fn model_max_input(limit: &ApiModelLimit) -> u32 { + if limit.input == 0 { + limit.context + } else { + limit.input + } +} + +#[inline] +fn provider_type_from_models_dev_npm(npm_package: Option<&str>) -> ProviderType { + match npm_package { + Some("@ai-sdk/openai") => ProviderType::OpenAiCompletions, + Some("@ai-sdk/openai-responses") => ProviderType::OpenAiResponses, + Some("@ai-sdk/anthropic") => ProviderType::Anthropic, + Some("@ai-sdk/google") => ProviderType::Google, + Some("@ai-sdk/groq") => ProviderType::Groq, + Some("@ai-sdk/mistral") => ProviderType::Mistral, + Some("@ai-sdk/ollama") => ProviderType::Ollama, + Some("@ai-sdk/amazon-bedrock") => ProviderType::Bedrock, + Some("@ai-sdk/azure") => ProviderType::Azure, + Some("@openrouter/ai-sdk-provider") => ProviderType::OpenRouter, + Some("@ai-sdk/huggingface") => ProviderType::HuggingFace, + Some("@ai-sdk/cohere") => ProviderType::Cohere, + Some("@ai-sdk/chatgpt-oauth") => ProviderType::ChatGptOAuth, + Some("@ai-sdk/claude-code-oauth") => ProviderType::ClaudeCodeOAuth, + Some("@ai-sdk/antigravity") => ProviderType::Antigravity, + Some(_) | None => ProviderType::Unknown, + } +} + +#[cfg(test)] +mod tests { + use super::{cache_payload_from_api_json_bytes, provider_type_from_models_dev_npm}; + use crate::cache::payload::catalog_from_cache_payload; + use llm_coding_tools_core::models::{Modality, ModelCatalog, ProviderIdx, ProviderType}; + + fn catalog_from_api_json_bytes(json_bytes: &[u8]) -> crate::error::CatalogResult { + let payload = cache_payload_from_api_json_bytes(json_bytes)?; + catalog_from_cache_payload(payload) + } + + fn catalog(json: &[u8]) -> ModelCatalog { + catalog_from_api_json_bytes(json).expect("API payload should map") + } + + fn provider_snapshot( + catalog: &ModelCatalog, + provider_key: &str, + ) -> (String, Vec, ProviderType) { + let provider = catalog + .lookup_provider(provider_key) + .expect("provider should exist"); + ( + provider.api_url.to_string(), + provider + .env_vars() + .iter() + .map(|env_var| (*env_var).to_string()) + .collect(), + provider.api_type, + ) + } + + fn model_snapshot( + catalog: &ModelCatalog, + provider_key: &str, + model_key: &str, + ) -> (Modality, u32, u32, Option, Option) { + let model = catalog + .lookup_provider_model(provider_key, model_key) + .expect("provider model should exist"); + ( + model.modalities, + model.max_input, + model.max_output, + model.temperature(), + model.top_p(), + ) + } + + #[test] + fn cache_payload_maps_single_provider_with_models() { + let api_json = br#" + { + "openai": { + "npm": "@ai-sdk/openai", + "api": "https://api.openai.com/v1", + "env": ["OPENAI_API_KEY"], + "models": { + "gpt-4": { + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 8192, "output": 4096 } + } + } + } + } + "#; + + let payload = cache_payload_from_api_json_bytes(api_json).expect("payload should build"); + assert_eq!(payload.providers.len(), 1); + assert_eq!(payload.models.len(), 1); + + assert_eq!(payload.providers[0].provider_key, "openai"); + assert_eq!( + payload.providers[0].api_type, + ProviderType::OpenAiCompletions + ); + + assert_eq!(payload.models[0].provider_idx, ProviderIdx::new(0)); + assert_eq!(payload.models[0].model_key, "gpt-4"); + assert_eq!(payload.models[0].modalities_bits, Modality::TEXT.bits()); + assert_eq!(payload.models[0].max_input, 8192); + assert_eq!(payload.models[0].max_output, 4096); + } + + #[test] + fn catalog_source_mapping_maps_provider_rows() { + let api_json = br#" + { + "alpha": { + "npm": "@ai-sdk/openai-responses", + "api": "https://alpha.example/v1", + "env": ["ALPHA_KEY"], + "models": {} + } + } + "#; + let catalog = catalog(api_json); + + assert_eq!(catalog.provider_count(), 1); + let provider = catalog + .lookup_provider("alpha") + .expect("alpha provider should exist"); + assert_eq!(provider.api_url, "https://alpha.example/v1"); + assert_eq!(provider.env_vars(), ["ALPHA_KEY"]); + assert_eq!(provider.api_type, ProviderType::OpenAiResponses); + } + + #[test] + fn catalog_source_mapping_defaults_missing_limits_to_zero() { + let api_json = br#" + { + "alpha": { + "npm": null, + "api": null, + "env": [], + "models": { + "m1": {} + } + } + } + "#; + let catalog = catalog(api_json); + + assert_eq!(catalog.provider_model_count(), 1); + let model = catalog + .lookup_provider_model("alpha", "m1") + .expect("alpha/m1 should exist"); + assert_eq!(model.modalities, Modality::TEXT); + assert_eq!(model.max_input, 0); + assert_eq!(model.max_output, 0); + } + + #[test] + fn catalog_source_mapping_uses_limit_input_when_present() { + let api_json = br#" + { + "alpha": { + "npm": null, + "api": null, + "env": [], + "models": { + "m1": { + "limit": { + "context": 128000, + "input": 124000, + "output": 4096 + } + } + } + } + } + "#; + let catalog = catalog(api_json); + + let model = catalog + .lookup_provider_model("alpha", "m1") + .expect("alpha/m1 should exist"); + assert_eq!(model.max_input, 124000); + assert_eq!(model.max_output, 4096); + } + + #[test] + fn catalog_source_mapping_maps_directional_modalities() { + let api_json = br#" + { + "alpha": { + "npm": null, + "api": null, + "env": [], + "models": { + "m1": { + "modalities": { + "input": ["text", "image", "pdf"], + "output": ["text", "audio"] + }, + "limit": { "context": 4096, "output": 512 } + } + } + } + } + "#; + + let catalog = catalog(api_json); + let model = catalog + .lookup_provider_model("alpha", "m1") + .expect("alpha/m1 should exist"); + assert_eq!( + model.modalities, + Modality::TEXT_INPUT + | Modality::TEXT_OUTPUT + | Modality::IMAGE_INPUT + | Modality::AUDIO_OUTPUT + ); + } + + #[test] + fn catalog_source_mapping_maps_pdf_input_to_empty() { + let api_json = br#" + { + "alpha": { + "npm": null, + "api": null, + "env": [], + "models": { + "m1": { + "modalities": { + "input": ["pdf"], + "output": [] + } + } + } + } + } + "#; + + let catalog = catalog(api_json); + let model = catalog + .lookup_provider_model("alpha", "m1") + .expect("alpha/m1 should exist"); + assert_eq!(model.modalities, Modality::empty()); + } + + #[test] + fn catalog_source_mapping_falls_back_to_empty_for_unknown_modalities() { + let api_json = br#" + { + "alpha": { + "npm": null, + "api": null, + "env": [], + "models": { + "m1": { + "modalities": { + "input": ["binary"], + "output": ["embedding"] + } + } + } + } + } + "#; + + let catalog = catalog(api_json); + let model = catalog + .lookup_provider_model("alpha", "m1") + .expect("alpha/m1 should exist"); + assert_eq!(model.modalities, Modality::empty()); + } + + #[test] + fn catalog_source_mapping_keeps_duplicate_model_ids_per_provider() { + let api_json = br#" + { + "alpha": { + "npm": "@ai-sdk/openai", + "api": null, + "env": [], + "models": { + "m1": { + "modalities": { + "input": ["image"], + "output": ["text"] + }, + "limit": { "context": 4096, "output": 512 } + } + } + }, + "beta": { + "npm": "@ai-sdk/anthropic", + "api": null, + "env": [], + "models": { + "m1": { + "modalities": { + "input": ["audio"], + "output": ["video"] + }, + "limit": { "context": 8192, "output": 256 } + } + } + } + } + "#; + let catalog = catalog(api_json); + + assert_eq!(catalog.provider_model_count(), 2); + + let alpha_model = catalog + .lookup_provider_model("alpha", "m1") + .expect("alpha/m1 should exist"); + assert_eq!(alpha_model.max_input, 4096); + assert_eq!(alpha_model.max_output, 512); + assert_eq!( + alpha_model.modalities, + Modality::IMAGE_INPUT | Modality::TEXT_OUTPUT + ); + + let beta_model = catalog + .lookup_provider_model("beta", "m1") + .expect("beta/m1 should exist"); + assert_eq!(beta_model.max_input, 8192); + assert_eq!(beta_model.max_output, 256); + assert_eq!( + beta_model.modalities, + Modality::AUDIO_INPUT | Modality::VIDEO_OUTPUT + ); + } + + #[test] + fn catalog_source_mapping_keeps_same_data_for_different_input_key_order() { + let api_json_a = br#" + { + "beta": { + "npm": "@ai-sdk/anthropic", + "api": null, + "env": [], + "models": { + "m2": { "limit": { "context": 2048, "output": 512 } } + } + }, + "alpha": { + "npm": "@ai-sdk/openai", + "api": null, + "env": [], + "models": { + "m1": { "limit": { "context": 1024, "output": 256 } } + } + } + } + "#; + + let api_json_b = br#" + { + "alpha": { + "npm": "@ai-sdk/openai", + "api": null, + "env": [], + "models": { + "m1": { "limit": { "context": 1024, "output": 256 } } + } + }, + "beta": { + "npm": "@ai-sdk/anthropic", + "api": null, + "env": [], + "models": { + "m2": { "limit": { "context": 2048, "output": 512 } } + } + } + } + "#; + + let catalog_a = catalog(api_json_a); + let catalog_b = catalog(api_json_b); + + assert_eq!(catalog_a.provider_count(), catalog_b.provider_count()); + assert_eq!( + catalog_a.provider_model_count(), + catalog_b.provider_model_count() + ); + assert_eq!( + catalog_a.model_config_count(), + catalog_b.model_config_count() + ); + assert_eq!( + provider_snapshot(&catalog_a, "alpha"), + provider_snapshot(&catalog_b, "alpha") + ); + assert_eq!( + provider_snapshot(&catalog_a, "beta"), + provider_snapshot(&catalog_b, "beta") + ); + assert_eq!( + model_snapshot(&catalog_a, "alpha", "m1"), + model_snapshot(&catalog_b, "alpha", "m1") + ); + assert_eq!( + model_snapshot(&catalog_a, "beta", "m2"), + model_snapshot(&catalog_b, "beta", "m2") + ); + } + + #[test] + fn provider_type_mapping_handles_known_and_unknown_packages() { + assert_eq!( + provider_type_from_models_dev_npm(Some("@ai-sdk/openai")), + ProviderType::OpenAiCompletions + ); + assert_eq!( + provider_type_from_models_dev_npm(Some("@ai-sdk/google")), + ProviderType::Google + ); + assert_eq!( + provider_type_from_models_dev_npm(Some("@ai-sdk/openai-compatible")), + ProviderType::Unknown + ); + assert_eq!( + provider_type_from_models_dev_npm(None), + ProviderType::Unknown + ); + } +} diff --git a/src/llm-coding-tools-models-dev/src/api/mod.rs b/src/llm-coding-tools-models-dev/src/api/mod.rs new file mode 100644 index 00000000..730624ee --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/api/mod.rs @@ -0,0 +1,12 @@ +//! models.dev API parsing and catalog-source mapping. +//! +//! - [`schema`] parses upstream `https://models.dev/api.json` into a minimal +//! serde representation. +//! - [`catalog_sources`] maps parsed data into a +//! [`llm_coding_tools_core::models::ModelCatalog`]. +//! +//! Both modules intentionally keep only fields required by core catalog +//! construction so ingest stays fast and memory-bounded. + +pub(crate) mod catalog_sources; +pub(crate) mod schema; diff --git a/src/llm-coding-tools-models-dev/src/api/schema.rs b/src/llm-coding-tools-models-dev/src/api/schema.rs new file mode 100644 index 00000000..3e0f4c12 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/api/schema.rs @@ -0,0 +1,151 @@ +//! Minimal models.dev API schema used by this crate. +//! +//! We deserialize only fields needed for catalog-source mapping: +//! provider metadata (`npm`, `api`, `env`) and model token limits +//! (`limit.context`, `limit.input`, `limit.output`) plus directional modalities +//! (`modalities.input[]`, `modalities.output[]`). +//! +//! Representative payload shape from `https://models.dev/api.json`: +//! +//! ```json +//! { +//! "openai": { +//! "id": "openai", +//! "npm": "@ai-sdk/openai", +//! "api": null, +//! "env": ["OPENAI_API_KEY"], +//! "models": { +//! "gpt-4o": { +//! "id": "gpt-4o", +//! "modalities": { +//! "input": ["text", "image"], +//! "output": ["text"] +//! }, +//! "limit": { +//! "context": 128000, +//! "output": 16384 +//! } +//! } +//! } +//! } +//! } +//! ``` +//! +//! Mapping into local structs: +//! - top-level provider map entry -> [`ApiProviderEntry`] +//! - `models.` object -> [`ApiModelEntry`] +//! - `models..modalities` object -> [`ApiModelModalities`] +//! - `models..limit` object -> [`ApiModelLimit`] +//! +//! Unknown fields are intentionally ignored so we can drop large unused sections +//! early and keep parse memory bounded. + +use crate::error::CatalogResult; +use serde::Deserialize; +use std::collections::HashMap; + +#[derive(Debug, Deserialize)] +pub(crate) struct ApiProviderEntry { + #[serde(default)] + pub(crate) npm: Option, + #[serde(default)] + pub(crate) api: Option, + #[serde(default)] + pub(crate) env: Vec, + #[serde(default)] + pub(crate) models: HashMap, +} + +#[derive(Debug, Deserialize)] +pub(crate) struct ApiModelEntry { + #[serde(default)] + pub(crate) limit: Option, + #[serde(default)] + pub(crate) modalities: Option, +} + +#[derive(Debug, Deserialize)] +pub(crate) struct ApiModelModalities { + #[serde(default)] + pub(crate) input: Vec, + #[serde(default)] + pub(crate) output: Vec, +} + +#[derive(Debug, Deserialize)] +pub(crate) struct ApiModelLimit { + #[serde(default)] + pub(crate) context: u32, + #[serde(default)] + pub(crate) input: u32, + #[serde(default)] + pub(crate) output: u32, +} + +/// Parses upstream `api.json` bytes into a provider map. +/// +/// Input must match the current models.dev shape: a flat top-level object where +/// each key is a provider id and each value is a provider entry. +#[inline] +pub(crate) fn parse_api_json( + json_bytes: &[u8], +) -> CatalogResult> { + Ok(serde_json::from_slice(json_bytes)?) +} + +#[cfg(test)] +mod tests { + use super::parse_api_json; + + #[test] + fn parse_api_json_supports_flat_provider_map() { + let api_json = br#"{"alpha":{"id":"alpha","npm":"@ai-sdk/openai","api":null,"env":["ALPHA_KEY"],"models":{"m1":{"modalities":{"input":["text","image"],"output":["text"]},"limit":{"context":4096,"output":512}}}}}"#; + let providers = parse_api_json(api_json).expect("API payload should parse"); + let provider = providers.get("alpha").expect("provider should exist"); + + assert_eq!(provider.npm.as_deref(), Some("@ai-sdk/openai")); + assert_eq!(provider.env.as_slice(), ["ALPHA_KEY"]); + + let model = provider.models.get("m1").expect("model should exist"); + let modalities = model.modalities.as_ref().expect("modalities should exist"); + let limit = model.limit.as_ref().expect("limit should exist"); + assert_eq!(modalities.input.as_slice(), ["text", "image"]); + assert_eq!(modalities.output.as_slice(), ["text"]); + assert_eq!(limit.context, 4096); + assert_eq!(limit.output, 512); + } + + #[test] + fn parse_api_json_ignores_unknown_fields() { + let api_json = br#" + { + "alpha": { + "id": "alpha", + "name": "Alpha", + "npm": "@ai-sdk/openai", + "api": "https://alpha.example/v1", + "env": ["ALPHA_KEY"], + "models": { + "m1": { + "description": "ignored", + "limit": { + "context": 128000, + "input": 124000, + "output": 4096 + } + } + } + } + } + "#; + + let providers = parse_api_json(api_json).expect("API payload should parse"); + let provider = providers.get("alpha").expect("provider should exist"); + let model = provider.models.get("m1").expect("model should exist"); + let limit = model.limit.as_ref().expect("limit should exist"); + + assert_eq!(limit.context, 128000); + assert_eq!(limit.input, 124000); + assert_eq!(limit.output, 4096); + } +} diff --git a/src/llm-coding-tools-models-dev/src/cache/format.rs b/src/llm-coding-tools-models-dev/src/cache/format.rs new file mode 100644 index 00000000..8059ac1b --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/cache/format.rs @@ -0,0 +1,472 @@ +//! Cache container layout and read/write helpers. +//! +//! The on-disk layout for `models.dev.catalog.v1.cache` is: +//! +//! ```text +//! [0..12) 12-byte fixed prelude: +//! - [0..4) etag_len: u32 little-endian +//! - [4..8) payload_len_compressed: u32 little-endian +//! - [8..12) payload_len_decompressed: u32 little-endian +//! [12..N) raw ETag bytes (etag_len bytes, may be 0) +//! [N..EOF) compressed payload (rest of file) +//! ``` +//! +//! Versioning is keyed by filename (`*.v1.cache`), so this prelude carries +//! lengths only and no magic marker. +//! `payload_len_compressed` is retained so reads can detect unexpected file +//! truncation before decode. +//! +//! Read path intentionally keeps payload compressed. We read the whole file in +//! one pre-sized allocation, then parse/slice into `prelude`, `etag`, and +//! `payload` views without additional copying. +//! +//! ## Performance +//! +//! models.dev changes infrequently, so cache hits are expected to be common. +//! [`crate::cache::payload`] documents typical compressed payload sizes of about +//! 23-32 kB, which keeps the whole container small enough that a single +//! sequential read is generally the faster, simpler hot path on modern +//! NVMe-backed systems. +//! +//! ## Safety +//! +//! Not a 'safe' parser. We assume the file was created by the user. +//! There's no validation for erroneous data; e.g. maliciously crafted headers. +//! Only validation for accidental corruption/truncation (e.g., from partial writes) is included. + +use crate::{ + error::{CatalogError, CatalogResult}, + fs, +}; +use endian_writer::{EndianReader, EndianWriter, HasSize, LittleEndianReader, LittleEndianWriter}; +use endian_writer_derive::EndianWritable; +use std::mem::size_of; +use std::path::Path; +use std::ptr::copy_nonoverlapping; + +/// Fixed v1 prelude, encoded little-endian. +#[derive(Debug, Clone, Copy, PartialEq, Eq, EndianWritable)] +#[repr(C)] +struct CachePreludeV1 { + /// Length in bytes of the optional ETag block. + etag_len: u32, + /// Length in bytes of compressed payload as written to disk. + payload_len_compressed: u32, + /// Length in bytes after decompression. + payload_len_decompressed: u32, +} + +/// Input parameters for writing a cache container. +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct CacheWriteInput<'a> { + /// Optional ETag bytes (e.g., HTTP ETag value). + pub(crate) etag: Option<&'a [u8]>, + /// Compressed payload bytes. + pub(crate) payload_compressed: &'a [u8], + /// Expected decompressed payload length in bytes. + pub(crate) payload_len_decompressed: usize, +} + +/// Fixed prelude size for v1. +const CACHE_HEADER_LEN: usize = ::SIZE; + +// SAFETY: All modern platforms have usize >= 32 bits. +// This lets us safely cast u32 lengths to usize without checked arithmetic. +const _: () = assert!(size_of::() >= size_of::()); + +/// Raw cache blocks extracted from disk. +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct CacheFileData { + /// Prefix length of ETag bytes after the fixed prelude. + etag_len: u32, + /// Length in bytes of compressed payload from prelude. + payload_len_compressed: u32, + /// Size hint for the eventual decompressed payload allocation. + payload_len_decompressed: u32, + /// Full file bytes laid out as `prelude || etag || payload_compressed`. + file_bytes: Box<[u8]>, +} + +impl CacheFileData { + /// Returns the optional ETag as a borrowed byte slice. + #[inline] + pub(crate) fn etag_bytes(&self) -> Option<&[u8]> { + let etag_start = CACHE_HEADER_LEN; + let etag_end = CACHE_HEADER_LEN + self.etag_len as usize; + let etag = &self.file_bytes[etag_start..etag_end]; + if etag.is_empty() { + None + } else { + Some(etag) + } + } + + /// Returns compressed payload bytes as a borrowed slice. + #[inline] + pub(crate) fn payload_compressed(&self) -> &[u8] { + let payload_start = CACHE_HEADER_LEN + self.etag_len as usize; + &self.file_bytes[payload_start..] + } + + /// Returns compressed payload length in bytes. + #[allow(dead_code)] // public API + #[inline] + pub(crate) fn payload_len_compressed(&self) -> u32 { + self.payload_len_compressed + } + + /// Returns expected decompressed payload length in bytes. + #[inline] + pub(crate) fn payload_len_decompressed(&self) -> u32 { + self.payload_len_decompressed + } +} + +/// Reads a cache container from disk. +/// +/// This reads the entire cache file into memory in one shot, then parses only +/// the prelude + raw blocks and does not decompress payload. +/// Compressed payload length is validated against prelude metadata to catch +/// unexpected truncation or trailing bytes before decode. +/// +/// # Performance +/// +/// This intentionally performs one whole-file read. models.dev changes +/// infrequently, so cache hits are expected to be common, and +/// [`crate::cache::payload`] documents typical compressed payload sizes of about +/// 23-32 kB. That is generally faster in practice than a streaming path while +/// remaining effectively negligible on modern NVMe-backed systems. +/// +/// # Errors +/// +/// Returns [`CatalogError::CacheFormat`] when the prelude is truncated, when +/// encoded lengths overflow platform limits, or when declared block lengths do not +/// match file contents. +#[maybe_async::maybe_async] +pub(crate) async fn read_cache_file(path: &Path) -> CatalogResult { + let file_bytes = fs::read(path).await?; + if file_bytes.len() < CACHE_HEADER_LEN { + return Err(CatalogError::CacheFormat("cache prelude is truncated")); + } + + let prelude = decode_prelude(&file_bytes[..CACHE_HEADER_LEN]); + let etag_len = prelude.etag_len as usize; + let payload_len_compressed = prelude.payload_len_compressed as usize; + let expected_total = CACHE_HEADER_LEN + .checked_add(etag_len) + .and_then(|v| v.checked_add(payload_len_compressed)) + .ok_or(CatalogError::CacheFormat( + "cache file size exceeds platform limits", + ))?; + + if file_bytes.len() != expected_total { + return Err(CatalogError::CacheFormat( + "cache file size mismatch (possible truncation or trailing data)", + )); + } + + Ok(CacheFileData { + etag_len: prelude.etag_len, + payload_len_compressed: prelude.payload_len_compressed, + payload_len_decompressed: prelude.payload_len_decompressed, + file_bytes, + }) +} + +/// Writes a cache container to disk atomically. +/// +/// Uses `tempfile::NamedTempFile` to ensure unique temp files for concurrent +/// writers and cross-platform atomic replacement via `persist()`. +/// +/// # Errors +/// +/// Returns [`CatalogError::CacheFormat`] if a block length exceeds v1 `u32` +/// limits, or [`CatalogError::Io`] on I/O failure. +#[maybe_async::maybe_async] +pub(crate) async fn write_cache_file( + path: &Path, + input: &CacheWriteInput<'_>, +) -> CatalogResult<()> { + let parent = path + .parent() + .ok_or_else(|| CatalogError::CacheFormat("cache path has no parent directory"))?; + fs::create_dir_all(parent).await?; + + let etag_bytes = input.etag.unwrap_or(&[]); + let prelude = CachePreludeV1 { + etag_len: to_u32_limit(etag_bytes.len(), "etag exceeds v1 length limits")?, + payload_len_compressed: to_u32_limit( + input.payload_compressed.len(), + "compressed payload exceeds v1 length limits", + )?, + payload_len_decompressed: to_u32_limit( + input.payload_len_decompressed, + "decompressed payload exceeds v1 length limits", + )?, + }; + + let encoded_prelude = encode_prelude(prelude); + + let encoded_len = CACHE_HEADER_LEN + .checked_add(etag_bytes.len()) + .and_then(|value| value.checked_add(input.payload_compressed.len())) + .ok_or(CatalogError::CacheFormat( + "cache file exceeds platform length limits", + ))?; + + let mut uninit = fs::alloc_uninit_u8_slice(encoded_len); + let ptr = uninit.as_mut_ptr().cast::(); + + unsafe { + copy_nonoverlapping(encoded_prelude.as_ptr(), ptr, CACHE_HEADER_LEN); + copy_nonoverlapping( + etag_bytes.as_ptr(), + ptr.add(CACHE_HEADER_LEN), + etag_bytes.len(), + ); + copy_nonoverlapping( + input.payload_compressed.as_ptr(), + ptr.add(CACHE_HEADER_LEN + etag_bytes.len()), + input.payload_compressed.len(), + ); + } + + let file_bytes = fs::assume_init_u8_slice(uninit); + + #[cfg(feature = "blocking")] + { + use std::io::Write as _; + let mut temp = tempfile::NamedTempFile::new_in(parent)?; + temp.write_all(&file_bytes)?; + temp.persist(path).map_err(|e| e.error)?; + } + + #[cfg(feature = "tokio")] + { + let file_bytes: Box<[u8]> = file_bytes; + let path = path.to_path_buf(); + let parent = parent.to_path_buf(); + tokio::task::spawn_blocking(move || { + use std::io::Write as _; + let mut temp = tempfile::NamedTempFile::new_in(&parent)?; + temp.write_all(&file_bytes)?; + temp.persist(&path).map_err(|e| e.error) + }) + .await??; + } + + Ok(()) +} + +#[inline] +fn to_u32_limit(value: usize, msg: &'static str) -> CatalogResult { + u32::try_from(value).map_err(|_| CatalogError::CacheFormat(msg)) +} + +/// Encodes prelude into little-endian bytes. +#[inline] +fn encode_prelude(prelude: CachePreludeV1) -> [u8; CACHE_HEADER_LEN] { + let mut bytes = [0_u8; CACHE_HEADER_LEN]; + // SAFETY: `bytes` has exactly the derived serialized size of `CachePreludeV1`. + unsafe { + let mut writer = LittleEndianWriter::new(bytes.as_mut_ptr()); + writer.write(&prelude); + } + bytes +} + +/// Decodes prelude from little-endian bytes. +#[inline] +fn decode_prelude(bytes: &[u8]) -> CachePreludeV1 { + // SAFETY: Caller guarantees `bytes` is at least `CACHE_HEADER_LEN`. + unsafe { + let mut reader = LittleEndianReader::new(bytes.as_ptr()); + reader.read() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + // Verifies prelude encoding/decoding preserves all fields. + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn prelude_layout_round_trips() { + let prelude = CachePreludeV1 { + etag_len: 13, + payload_len_compressed: 44, + payload_len_decompressed: 333, + }; + + let round_trip = decode_prelude(&encode_prelude(prelude)); + assert_eq!(round_trip, prelude); + } + + // Verifies full round-trip with ETag included. + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn write_then_read_round_trips_with_etag() { + let temp = TempDir::new().expect("tempdir"); + let path = temp.path().join("models.dev.catalog.v1.cache"); + + let input = CacheWriteInput { + etag: Some(b"etag-123"), + payload_compressed: b"payload-zstd-bytes", + payload_len_decompressed: 2048, + }; + write_cache_file(&path, &input).await.expect("write cache"); + let data = read_cache_file(&path).await.expect("read cache"); + + assert_eq!(data.etag_bytes(), input.etag); + assert_eq!(data.payload_compressed(), input.payload_compressed); + assert_eq!( + data.payload_len_compressed(), + input.payload_compressed.len() as u32 + ); + assert_eq!( + data.payload_len_decompressed(), + input.payload_len_decompressed as u32 + ); + } + + // Verifies full round-trip without ETag. + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn write_then_read_round_trips_without_etag() { + let temp = TempDir::new().expect("tempdir"); + let path = temp.path().join("models.dev.catalog.v1.cache"); + + let input = CacheWriteInput { + etag: None, + payload_compressed: b"payload-only", + payload_len_decompressed: 1024, + }; + write_cache_file(&path, &input).await.expect("write cache"); + let data = read_cache_file(&path).await.expect("read cache"); + + assert_eq!(data.etag_bytes(), input.etag); + assert_eq!(data.payload_compressed(), input.payload_compressed); + assert_eq!( + data.payload_len_decompressed(), + input.payload_len_decompressed as u32 + ); + } + + // Rejects files shorter than the fixed header. + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn read_rejects_truncated_prelude() { + // File is 1 byte shorter than required header + let temp = TempDir::new().expect("tempdir"); + let path = temp.path().join("short-prelude.cache"); + + std::fs::write(&path, [0_u8; CACHE_HEADER_LEN - 1]).expect("write fixture"); + let error = read_cache_file(&path) + .await + .expect_err("truncated prelude should fail"); + assert!(matches!(error, CatalogError::CacheFormat(_))); + } + + // Rejects when file ends before etag_len bytes after header. + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn read_rejects_short_etag_length() { + // Header claims 12 bytes of etag but only 4 provided + let temp = TempDir::new().expect("tempdir"); + let path = temp.path().join("short-etag.cache"); + + let prelude = CachePreludeV1 { + etag_len: 12, + payload_len_compressed: 0, + payload_len_decompressed: 0, + }; + let mut bytes = encode_prelude(prelude).to_vec(); + bytes.extend_from_slice(b"tiny"); // 'tiny' etag is 4 bytes + std::fs::write(&path, bytes).expect("write fixture"); + + // Header claims 12 bytes of etag but only 4 'tiny' provided, so 8 bytes short. + let error = read_cache_file(&path) + .await + .expect_err("short etag should fail"); + assert!(matches!(error, CatalogError::CacheFormat(_))); + } + + // Accepts minimal valid file with all zero-length fields. + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn read_supports_empty_etag_and_payload() { + let temp = TempDir::new().expect("tempdir"); + let path = temp.path().join("empty.cache"); + + let prelude = CachePreludeV1 { + etag_len: 0, + payload_len_compressed: 0, + payload_len_decompressed: 0, + }; + std::fs::write(&path, encode_prelude(prelude)).expect("write fixture"); + let data = read_cache_file(&path).await.expect("read empty cache"); + + assert_eq!(data.etag_bytes(), None); + assert!(data.payload_compressed().is_empty()); + assert_eq!(data.payload_len_compressed(), 0); + assert_eq!(data.payload_len_decompressed(), 0); + } + + // Rejects when declared compressed payload length does not match file size. + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn read_rejects_mismatched_payload_length() { + // Header claims 10 bytes payload but only 5 provided + let temp = TempDir::new().expect("tempdir"); + let path = temp.path().join("mismatched-payload-len.cache"); + + let prelude = CachePreludeV1 { + etag_len: 4, + payload_len_compressed: 10, + payload_len_decompressed: 0, + }; + let mut bytes = encode_prelude(prelude).to_vec(); + bytes.extend_from_slice(b"etag"); + bytes.extend_from_slice(b"short"); // only 5 bytes, not 10 here. + std::fs::write(&path, bytes).expect("write fixture"); + + let error = read_cache_file(&path) + .await + .expect_err("payload length mismatch should fail"); + assert!(matches!(error, CatalogError::CacheFormat(_))); + } + + // Verifies atomic replacement replaces existing cache file content. + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn write_replaces_existing_cache_atomically() { + let temp = TempDir::new().expect("tempdir"); + let path = temp.path().join("atomic-test.cache"); + + // Write first payload + let first_input = CacheWriteInput { + etag: Some(b"etag-1"), + payload_compressed: b"first-payload", + payload_len_decompressed: 100, + }; + write_cache_file(&path, &first_input) + .await + .expect("write first"); + + let first_data = read_cache_file(&path).await.expect("read first"); + assert_eq!(first_data.etag_bytes(), Some(b"etag-1".as_slice())); + assert_eq!(first_data.payload_compressed(), b"first-payload"); + + // Write second payload (atomic replacement) + let second_input = CacheWriteInput { + etag: Some(b"etag-2"), + payload_compressed: b"second-payload-different", + payload_len_decompressed: 200, + }; + write_cache_file(&path, &second_input) + .await + .expect("write second"); + + let second_data = read_cache_file(&path).await.expect("read second"); + assert_eq!(second_data.etag_bytes(), Some(b"etag-2".as_slice())); + assert_eq!( + second_data.payload_compressed(), + b"second-payload-different" + ); + assert_eq!(second_data.payload_len_decompressed(), 200); + } +} diff --git a/src/llm-coding-tools-models-dev/src/cache/mod.rs b/src/llm-coding-tools-models-dev/src/cache/mod.rs new file mode 100644 index 00000000..43af19b8 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/cache/mod.rs @@ -0,0 +1,20 @@ +//! Cache path and container utilities for models.dev catalog data. +//! +//! Responsibilities are split by concern: +//! +//! - `path` resolves the shared cache location. +//! - `format` defines the cache container layout and read/write helpers. +//! +//! Runtime behavior follows crate features: +//! - `tokio` (default): async file I/O APIs. +//! - `blocking`: sync file I/O APIs. +//! +//! The public API currently exposes path resolution only; container helpers are +//! crate-internal until the sync/load flow is wired. + +pub(crate) mod format; +mod path; +pub(crate) mod payload; + +pub use crate::error::CatalogResult; +pub use path::{shared_cache_path, CACHE_PATH_ENV_VAR}; diff --git a/src/llm-coding-tools-models-dev/src/cache/path.rs b/src/llm-coding-tools-models-dev/src/cache/path.rs new file mode 100644 index 00000000..e43d256e --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/cache/path.rs @@ -0,0 +1,63 @@ +//! Cross-platform cache path resolution. + +use crate::{error::CatalogResult, CatalogError}; +use std::path::PathBuf; + +/// Environment variable name for overriding the default cache path. +pub const CACHE_PATH_ENV_VAR: &str = "LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH"; + +const CACHE_SUBDIR: &str = "llm-coding-tools"; +const CACHE_FILENAME: &str = "models.dev.catalog.v1.cache"; + +/// Returns the shared cache path for the models.dev catalog. +/// +/// This function determines the appropriate cache location using the following +/// precedence: +/// +/// 1. `LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH` environment variable (if set) +/// 2. Platform cache directory + `llm-coding-tools/models.dev.catalog.v1.cache` +/// +/// # Platform Cache Locations +/// +/// - **Linux**: `~/.cache/llm-coding-tools/models.dev.catalog.v1.cache` +/// - **macOS**: `~/Library/Caches/llm-coding-tools/models.dev.catalog.v1.cache` +/// - **Windows**: `%LOCALAPPDATA%\llm-coding-tools\models.dev.catalog.v1.cache` +/// +/// # Returns +/// +/// The full path to the cache file. +/// +/// # Errors +/// +/// Returns [`CatalogError::CachePathNotFound`] when: +/// - The environment variable is not set AND +/// - The platform cache directory cannot be determined +/// +/// # Examples +/// +/// ``` +/// use llm_coding_tools_models_dev::shared_cache_path; +/// +/// # fn example() -> Result<(), Box> { +/// let path = shared_cache_path()?; +/// println!("Cache location: {}", path.display()); +/// # Ok(()) +/// # } +/// ``` +pub fn shared_cache_path() -> CatalogResult { + // 1. Check env var first + if let Some(os_str) = std::env::var_os(CACHE_PATH_ENV_VAR) { + if os_str.is_empty() { + return Err(CatalogError::Configuration(format!( + "{} is set but empty", + CACHE_PATH_ENV_VAR + ))); + } + return Ok(PathBuf::from(&os_str)); + } + + // 2. Fall back to dirs::cache_dir() + let cache_dir = dirs::cache_dir().ok_or(CatalogError::CachePathNotFound)?; + + Ok(cache_dir.join(CACHE_SUBDIR).join(CACHE_FILENAME)) +} diff --git a/src/llm-coding-tools-models-dev/src/cache/payload.rs b/src/llm-coding-tools-models-dev/src/cache/payload.rs new file mode 100644 index 00000000..3a1e65b0 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/cache/payload.rs @@ -0,0 +1,276 @@ +//! Cache payload serialization for models.dev catalog data. +//! +//! The payload is stored as simple owned rows so it can be encoded compactly +//! with bitcode and rebuilt into a [`ModelCatalog`] +//! without reparsing the original JSON. +//! +//! ## Compression Benchmark +//! +//! Using a 1.26 MB `api.json` snapshot (models.dev), converted to bitcode +//! then compressed with zstd at various levels: +//! +//! | Level | Size | % of JSON | Time | +//! |----------------|-----------|-----------|---------| +//! | JSON | 1260.7 KB | 100.00% | - | +//! | (raw bitcode) | 105.7 KB | 8.39% | - | +//! | 0 | 29.7 KB | 2.36% | 1.4ms | +//! | 1 | 32.1 KB | 2.55% | 1.0ms | +//! | 2 | 31.7 KB | 2.51% | 1.0ms | +//! | 3 | 29.7 KB | 2.36% | 1.1ms | +//! | 4 | 29.7 KB | 2.36% | 1.9ms | +//! | 5 | 27.5 KB | 2.18% | 2.9ms | +//! | 6 | 27.1 KB | 2.15% | 3.6ms | +//! | 7 | 26.6 KB | 2.11% | 4.8ms | +//! | 8 | 26.7 KB | 2.12% | 5.0ms | +//! | 9 | 26.7 KB | 2.12% | 6.3ms | +//! | 10 | 26.4 KB | 2.09% | 9.1ms | +//! | 11 | 26.1 KB | 2.07% | 8.5ms | +//! | 12 | 26.1 KB | 2.07% | 14.4ms | +//! | 13 | 26.0 KB | 2.06% | 12.0ms | +//! | 14 | 26.0 KB | 2.06% | 16.4ms | +//! | 15 | 25.9 KB | 2.06% | 21.6ms | +//! | 16 | 23.6 KB | 1.87% | 24.2ms | +//! | 17 | 23.2 KB | 1.84% | 27.6ms | +//! | 18 | 23.2 KB | 1.84% | 42.6ms | +//! | 19 | 23.1 KB | 1.83% | 81.3ms | +//! | 20 | 23.1 KB | 1.83% | 96.3ms | +//! | 21 | 23.1 KB | 1.83% | 125.4ms | +//! | 22 | 23.1 KB | 1.83% | 207.5ms | +//! +//! Levels 1-3 offer the best speed/ratio tradeoff (~1ms, ~2.4% of JSON). +//! Levels 19-22 provide maximal compression but take 80-200ms. + +use crate::error::{CatalogError, CatalogResult}; +use llm_coding_tools_core::models::{ + Modality, ModelCatalog, ModelInfo, ProviderIdx, ProviderInfo, ProviderModelSource, + ProviderSource, ProviderType, +}; + +/// Serializable cache representation of the models.dev catalog. +#[derive(Debug, Clone, PartialEq, bitcode::Encode, bitcode::Decode)] +pub(crate) struct CatalogCachePayload { + /// Provider rows in catalog order. + pub(crate) providers: Vec, + /// Model rows that reference providers by index. + pub(crate) models: Vec, +} + +/// Serializable provider row stored in the cache payload. +#[derive(Debug, Clone, PartialEq, Eq, bitcode::Encode, bitcode::Decode)] +pub(crate) struct CachedProviderRow { + /// Stable provider lookup key. + pub(crate) provider_key: String, + /// Base API URL for requests to this provider. + pub(crate) api_url: String, + /// Environment variables that can supply credentials. + pub(crate) env_vars: Vec, + /// Provider protocol or API shape. + pub(crate) api_type: ProviderType, +} + +/// Serializable model row stored in the cache payload. +#[derive(Debug, Clone, PartialEq, bitcode::Encode, bitcode::Decode)] +pub(crate) struct CachedModelRow { + /// Index into [`CatalogCachePayload::providers`]. + pub(crate) provider_idx: ProviderIdx, + /// Stable model lookup key within the provider. + pub(crate) model_key: String, + /// Serialized [`Modality`] bitflags. + pub(crate) modalities_bits: u8, + /// Maximum supported input tokens. + pub(crate) max_input: u32, + /// Maximum supported output tokens. + pub(crate) max_output: u32, + /// Optional default temperature. + pub(crate) temperature: Option, + /// Optional default top-p value. + pub(crate) top_p: Option, +} + +/// Encodes a cache payload into bitcode bytes. +pub(crate) fn encode_cache_payload(payload: &CatalogCachePayload) -> Vec { + bitcode::encode(payload) +} + +/// Decodes bitcode bytes into an owned cache payload. +/// +/// # Errors +/// +/// Returns [`CatalogError::BitcodeDecode`] when the bytes are not a valid cache +/// payload encoding. +pub(crate) fn decode_cache_payload(bytes: &[u8]) -> CatalogResult { + bitcode::decode(bytes).map_err(|error| CatalogError::BitcodeDecode(error.to_string())) +} + +/// Rebuilds a [`ModelCatalog`] from decoded cache rows. +/// +/// # Errors +/// +/// Returns [`CatalogError`] when any cached row data cannot be used to build a +/// valid catalog, such as when a model references an out-of-range provider. +pub(crate) fn catalog_from_cache_payload( + payload: CatalogCachePayload, +) -> CatalogResult { + let CatalogCachePayload { providers, models } = payload; + + let mut provider_sources = Vec::with_capacity(providers.len()); + for row in providers { + provider_sources.push(ProviderSource { + provider_key: row.provider_key, + provider: ProviderInfo { + api_url: row.api_url, + env_vars: row.env_vars, + api_type: row.api_type, + }, + }); + } + + let mut model_sources = Vec::with_capacity(models.len()); + for row in &models { + model_sources.push(ProviderModelSource { + provider_idx: row.provider_idx, + model_key: row.model_key.as_str(), + model: ModelInfo { + modalities: Modality::from_bits_retain(row.modalities_bits), + max_input: row.max_input, + max_output: row.max_output, + temperature: row.temperature, + top_p: row.top_p, + }, + }); + } + + Ok(ModelCatalog::build(&provider_sources, &model_sources)?) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn sample_payload() -> CatalogCachePayload { + CatalogCachePayload { + providers: vec![CachedProviderRow { + provider_key: "openai".to_string(), + api_url: "https://api.openai.com/v1".to_string(), + env_vars: vec!["OPENAI_API_KEY".to_string()], + api_type: ProviderType::OpenAiCompletions, + }], + models: vec![CachedModelRow { + provider_idx: ProviderIdx::new(0), + model_key: "gpt-4".to_string(), + modalities_bits: Modality::TEXT.bits(), + max_input: 8192, + max_output: 4096, + temperature: Some(0.7), + top_p: Some(0.9), + }], + } + } + + #[test] + fn payload_round_trip() { + let original = sample_payload(); + let encoded = encode_cache_payload(&original); + let decoded = decode_cache_payload(&encoded).expect("decode should succeed"); + assert_eq!(original, decoded); + } + + #[test] + fn catalog_from_payload_reconstructs_provider() { + let payload = sample_payload(); + let catalog = catalog_from_cache_payload(payload).expect("catalog build should succeed"); + + let provider = catalog + .lookup_provider("openai") + .expect("provider should exist"); + assert_eq!(provider.api_url, "https://api.openai.com/v1"); + assert_eq!(provider.api_type, ProviderType::OpenAiCompletions); + } + + #[test] + fn catalog_from_payload_reconstructs_model() { + let payload = sample_payload(); + let catalog = catalog_from_cache_payload(payload).expect("catalog build should succeed"); + + let model = catalog + .lookup_provider_model("openai", "gpt-4") + .expect("model should exist"); + assert_eq!(model.max_input, 8192); + assert_eq!(model.max_output, 4096); + assert_eq!(model.modalities, Modality::TEXT); + } + + #[test] + fn catalog_from_payload_rejects_out_of_range_provider_idx() { + use llm_coding_tools_core::models::ModelCatalogBuildError; + + let payload = CatalogCachePayload { + providers: vec![CachedProviderRow { + provider_key: "test".to_string(), + api_url: "".to_string(), + env_vars: vec![], + api_type: ProviderType::Unknown, + }], + models: vec![CachedModelRow { + provider_idx: ProviderIdx::new(999), + model_key: "model".to_string(), + modalities_bits: Modality::TEXT.bits(), + max_input: 0, + max_output: 0, + temperature: None, + top_p: None, + }], + }; + + let result = catalog_from_cache_payload(payload); + assert!(matches!( + result, + Err(CatalogError::ModelCatalogBuild( + ModelCatalogBuildError::ProviderIdxOutOfRangeForModel { .. } + )) + )); + } + + #[test] + fn all_known_provider_types_round_trip() { + let types = [ + ProviderType::Unknown, + ProviderType::OpenAiCompletions, + ProviderType::OpenAiResponses, + ProviderType::Anthropic, + ProviderType::Google, + ProviderType::Groq, + ProviderType::Mistral, + ProviderType::Ollama, + ProviderType::Bedrock, + ProviderType::Azure, + ProviderType::OpenRouter, + ProviderType::HuggingFace, + ProviderType::Cohere, + ProviderType::ChatGptOAuth, + ProviderType::ClaudeCodeOAuth, + ProviderType::Antigravity, + ]; + + for provider_type in types { + let payload = CatalogCachePayload { + providers: vec![CachedProviderRow { + provider_key: "test".to_string(), + api_url: "".to_string(), + env_vars: vec![], + api_type: provider_type, + }], + models: vec![], + }; + + let catalog = catalog_from_cache_payload(payload).expect("should succeed"); + let provider = catalog + .lookup_provider("test") + .expect("provider should exist"); + assert_eq!( + provider.api_type, provider_type, + "provider type should round-trip correctly" + ); + } + } +} diff --git a/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs new file mode 100644 index 00000000..8e807411 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs @@ -0,0 +1,101 @@ +//! Loading a model catalog from cached on-disk data. +//! +//! This module handles the offline half of catalog loading: it decompresses the +//! stored payload, decodes the serialized rows, and rebuilds a +//! [`ModelCatalog`](llm_coding_tools_core::models::ModelCatalog). + +use crate::cache::format::CacheFileData; +use crate::cache::payload::{catalog_from_cache_payload, decode_cache_payload}; +use crate::catalog::load_result::{CatalogLoadResult, CatalogLoadSource}; +use crate::error::{CatalogError, CatalogResult}; + +/// Decompresses cache file data and rebuilds a catalog from it. +/// +/// # Errors +/// +/// Returns [`CatalogError`] when zstd decompression fails, the decompressed +/// length does not match the cache metadata, the serialized payload cannot be +/// decoded, or catalog reconstruction fails. +pub(crate) fn load_catalog_from_cache_file_data( + cache_file: &CacheFileData, + source: CatalogLoadSource, +) -> CatalogResult { + let expected_len = cache_file.payload_len_decompressed() as usize; + let decoded = zstd::bulk::decompress(cache_file.payload_compressed(), expected_len) + .map_err(|error| CatalogError::Zstd(error.to_string()))?; + if decoded.len() != expected_len { + return Err(CatalogError::CacheFormat( + "cache payload length mismatch after decompression", + )); + } + + let payload = decode_cache_payload(&decoded)?; + let catalog = catalog_from_cache_payload(payload)?; + Ok(CatalogLoadResult { catalog, source }) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cache::format::{write_cache_file, CacheWriteInput}; + use crate::cache::payload::{ + encode_cache_payload, CachedModelRow, CachedProviderRow, CatalogCachePayload, + }; + use llm_coding_tools_core::models::{Modality, ProviderIdx, ProviderType}; + use tempfile::TempDir; + + fn sample_payload() -> CatalogCachePayload { + CatalogCachePayload { + providers: vec![CachedProviderRow { + provider_key: "test".to_string(), + api_url: "https://test.example".to_string(), + env_vars: vec![], + api_type: ProviderType::OpenAiCompletions, + }], + models: vec![CachedModelRow { + provider_idx: ProviderIdx::new(0), + model_key: "model1".to_string(), + modalities_bits: Modality::TEXT.bits(), + max_input: 4096, + max_output: 2048, + temperature: None, + top_p: None, + }], + } + } + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn round_trip_through_cache_file() { + let temp = TempDir::new().expect("tempdir"); + let path = temp.path().join("test.cache"); + + let payload = sample_payload(); + let encoded = encode_cache_payload(&payload); + let compressed = zstd::bulk::compress(&encoded, 1).expect("compress"); + + write_cache_file( + &path, + &CacheWriteInput { + etag: Some(b"test-etag"), + payload_compressed: &compressed, + payload_len_decompressed: encoded.len(), + }, + ) + .await + .expect("write cache"); + + let cache_file = crate::cache::format::read_cache_file(&path) + .await + .expect("read cache"); + let result = + load_catalog_from_cache_file_data(&cache_file, CatalogLoadSource::NotModifiedCache) + .expect("load from cache"); + + assert_eq!(result.source, CatalogLoadSource::NotModifiedCache); + let provider = result + .catalog + .lookup_provider("test") + .expect("provider should exist"); + assert_eq!(provider.api_type, ProviderType::OpenAiCompletions); + } +} diff --git a/src/llm-coding-tools-models-dev/src/catalog/load_result.rs b/src/llm-coding-tools-models-dev/src/catalog/load_result.rs new file mode 100644 index 00000000..aee2d46e --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/catalog/load_result.rs @@ -0,0 +1,52 @@ +//! Result types for catalog load operations. + +use llm_coding_tools_core::models::ModelCatalog; + +/// Result of a successful catalog load operation. +/// +/// This struct provides both the loaded catalog and metadata about +/// how the catalog was obtained (fresh download, cached, etc.). +pub struct CatalogLoadResult { + /// The loaded model catalog ready for lookups. + pub catalog: ModelCatalog, + + /// Information about how the catalog was loaded. + pub source: CatalogLoadSource, +} + +/// Indicates how the catalog was loaded. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CatalogLoadSource { + /// The catalog was downloaded fresh (HTTP 200 OK) and the cache was updated. + Downloaded, + + /// The cache was up to date (HTTP 304 Not Modified) and loaded from disk. + NotModifiedCache, + + /// A network failure occurred, but a valid cached copy was available + /// and loaded as a fallback. + FallbackCache, +} + +impl CatalogLoadSource { + /// Returns true if the catalog was loaded from the network (fresh download). + #[inline] + pub fn is_fresh(&self) -> bool { + matches!(self, CatalogLoadSource::Downloaded) + } + + /// Returns true if the catalog was loaded from cache (either fresh cache or fallback). + #[inline] + pub fn is_cached(&self) -> bool { + matches!( + self, + CatalogLoadSource::NotModifiedCache | CatalogLoadSource::FallbackCache + ) + } + + /// Returns true if this was a fallback load due to network failure. + #[inline] + pub fn is_fallback(&self) -> bool { + matches!(self, CatalogLoadSource::FallbackCache) + } +} diff --git a/src/llm-coding-tools-models-dev/src/catalog/mod.rs b/src/llm-coding-tools-models-dev/src/catalog/mod.rs new file mode 100644 index 00000000..1764915d --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/catalog/mod.rs @@ -0,0 +1,215 @@ +//! Catalog loading and synchronization with models.dev. +//! +//! Flow is simple: +//! - Try online sync first using conditional HTTP (`If-None-Match`) +//! - Reuse cache on `304 Not Modified` +//! - Fall back to cached data if the network path fails + +mod load_cache; +mod load_result; +mod sync; + +#[cfg(test)] +mod test_utils; + +pub use load_result::{CatalogLoadResult, CatalogLoadSource}; + +use crate::cache::shared_cache_path; +use crate::error::CatalogError; +use std::path::Path; + +/// Entry point for loading models.dev catalogs. +/// +/// This struct provides static methods for loading the catalog either +/// from the default shared cache location or from a custom path. +pub struct ModelsDevCatalog; + +impl ModelsDevCatalog { + /// Loads the catalog from the default shared cache location. + /// + /// This is the primary entry point for most use cases. It will: + /// 1. Check for an existing cache and extract its ETag + /// 2. Send a conditional GET request with `If-None-Match` + /// 3. On `200 OK`: download, map the API payload into catalog sources, + /// cache it, and return fresh data + /// 4. On `304 Not Modified`: decode and return cached data + /// 5. On network failure: fall back to cached data if available + /// + /// The cache location is determined by: + /// - `LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH` environment variable (if set) + /// - Platform cache directory + `llm-coding-tools/models.dev.catalog.v1.cache` + /// + /// # Returns + /// + /// A [`CatalogLoadResult`] containing the loaded catalog and information + /// about how it was loaded (downloaded fresh, from cache, or fallback). + /// + /// # Errors + /// + /// Returns [`CatalogError`] when: + /// - The cache path cannot be determined and no cache exists + /// - An HTTP error occurs and no cache is available for fallback + /// - The cache is corrupted and cannot be decoded + /// - Catalog construction from mapped catalog sources fails + /// + /// # Examples + /// + /// ``` + /// use llm_coding_tools_models_dev::ModelsDevCatalog; + /// + /// # #[cfg(feature = "tokio")] + /// # async fn example() -> Result<(), Box> { + /// let result = ModelsDevCatalog::load().await?; + /// + /// // Use the catalog + /// if let Some(entry) = result.catalog.lookup("openai", "gpt-4") { + /// println!("API URL: {}", entry.0.api_url); + /// } + /// # Ok(()) + /// # } + /// + /// # #[cfg(feature = "blocking")] + /// # fn example() -> Result<(), Box> { + /// # let result = ModelsDevCatalog::load()?; + /// // Use the catalog + /// # if let Some(entry) = result.catalog.lookup("openai", "gpt-4") { + /// # println!("API URL: {}", entry.0.api_url); + /// # } + /// # Ok(()) + /// # } + /// ``` + #[maybe_async::maybe_async] + pub async fn load() -> Result { + let path = shared_cache_path()?; + Self::load_at(path).await + } + + /// Loads the catalog from a specific cache file path. + /// + /// This method provides the same behavior as [`load`](Self::load), but + /// allows specifying a custom cache file path. This is useful for: + /// - Testing with temporary cache files + /// - Custom deployment scenarios + /// - Isolated cache locations + /// + /// # Parameters + /// + /// * `path` - The path to the cache file. Parent directories will be + /// created if they don't exist. + /// + /// # Returns + /// + /// A [`CatalogLoadResult`] containing the loaded catalog and source + /// information. + /// + /// # Errors + /// + /// Returns [`CatalogError`] under the same conditions as [`load`](Self::load), + /// plus: + /// - The parent directory cannot be created + /// - The path is not a valid file path + /// + /// # Examples + /// + /// ``` + /// use llm_coding_tools_models_dev::ModelsDevCatalog; + /// use std::path::PathBuf; + /// + /// # #[cfg(feature = "tokio")] + /// # async fn example() -> Result<(), Box> { + /// let cache_path = PathBuf::from("/tmp/my-cache.cache"); + /// let result = ModelsDevCatalog::load_at(&cache_path).await?; + /// + /// // Use the catalog + /// if let Some(entry) = result.catalog.lookup("openai", "gpt-4") { + /// println!("API URL: {}", entry.0.api_url); + /// } + /// # Ok(()) + /// # } + /// + /// # #[cfg(feature = "blocking")] + /// # fn example() -> Result<(), Box> { + /// # let cache_path = PathBuf::from("/tmp/my-cache.cache"); + /// # let result = ModelsDevCatalog::load_at(&cache_path)?; + /// # if let Some(entry) = result.catalog.lookup("openai", "gpt-4") { + /// # println!("API URL: {}", entry.0.api_url); + /// # } + /// # Ok(()) + /// # } + /// ``` + #[maybe_async::maybe_async] + pub async fn load_at(path: impl AsRef) -> Result { + sync::load_catalog_at_path(path.as_ref()).await + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cache::CACHE_PATH_ENV_VAR; + use llm_coding_tools_core::models::ProviderType; + use tempfile::TempDir; + + /// Guard that restores environment variables on drop + struct EnvGuard { + cache_path_var: Option, + } + + impl EnvGuard { + fn new(value: Option<&str>) -> Self { + let cache_path_var = std::env::var(CACHE_PATH_ENV_VAR).ok(); + match value { + Some(v) => std::env::set_var(CACHE_PATH_ENV_VAR, v), + None => std::env::remove_var(CACHE_PATH_ENV_VAR), + } + Self { cache_path_var } + } + } + + impl Drop for EnvGuard { + fn drop(&mut self) { + // Clear test URL override + super::sync::set_test_models_dev_api_url(None); + + // Restore or remove cache path env var + match &self.cache_path_var { + Some(v) => std::env::set_var(CACHE_PATH_ENV_VAR, v), + None => std::env::remove_var(CACHE_PATH_ENV_VAR), + } + } + } + + use super::test_utils::{sample_api_json, start_mock_server, MockResponse}; + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + #[serial_test::serial] + async fn facade_load_uses_shared_cache_path() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("facade-test.cache"); + let _guard = EnvGuard::new(Some(cache_path.to_str().unwrap())); + + // Start mock server and set URL override + let body = String::from_utf8_lossy(sample_api_json()).to_string(); + let (_handle, url) = start_mock_server(MockResponse::Ok { + etag: "\"facade-test-etag\"", + body, + }); + super::sync::set_test_models_dev_api_url(Some(url)); + + // Call public facade + let result = ModelsDevCatalog::load().await.expect("load should succeed"); + + assert_eq!(result.source, CatalogLoadSource::Downloaded); + let provider = result + .catalog + .lookup_provider("openai") + .expect("openai provider should exist"); + assert_eq!(provider.api_type, ProviderType::OpenAiCompletions); + + // Verify cache was written + assert!( + cache_path.exists(), + "cache file should exist at shared path" + ); + } +} diff --git a/src/llm-coding-tools-models-dev/src/catalog/sync.rs b/src/llm-coding-tools-models-dev/src/catalog/sync.rs new file mode 100644 index 00000000..d63023d6 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/catalog/sync.rs @@ -0,0 +1,449 @@ +//! Catalog synchronization against the remote models.dev API. +//! +//! This module owns the online-first load path used by +//! [`ModelsDevCatalog`](crate::catalog::ModelsDevCatalog). It reads any cached +//! container in one shot, sends a conditional request with the cached ETag when +//! available, refreshes the cache on `200 OK`, reuses it on `304 Not Modified`, +//! and falls back to cached data when the request fails. + +use crate::api::catalog_sources::cache_payload_from_api_json_bytes; +use crate::cache::format::{read_cache_file, write_cache_file, CacheFileData, CacheWriteInput}; +use crate::cache::payload::{catalog_from_cache_payload, encode_cache_payload}; +use crate::catalog::load_cache::load_catalog_from_cache_file_data; +use crate::catalog::load_result::{CatalogLoadResult, CatalogLoadSource}; +use crate::error::{CatalogError, CatalogResult}; +use reqwest::header::{ETAG, IF_NONE_MATCH}; +use reqwest::StatusCode; +use std::borrow::Cow; +use std::io::ErrorKind; +use std::path::Path; + +/// Default production endpoint for the models.dev catalog snapshot. +const MODELS_DEV_API_URL: &str = "https://models.dev/api.json"; + +/// Timeout for HTTP connections and requests in seconds. +const REQUEST_TIMEOUT_SECS: u64 = 30; + +#[cfg(test)] +static TEST_MODELS_DEV_API_URL: std::sync::Mutex> = std::sync::Mutex::new(None); + +#[cfg(test)] +/// Overrides the remote catalog URL for sync tests. +pub(crate) fn set_test_models_dev_api_url(url: Option) { + *TEST_MODELS_DEV_API_URL.lock().unwrap() = url; +} + +/// Returns the active catalog endpoint, including the test override when set. +fn models_dev_api_url() -> Cow<'static, str> { + #[cfg(test)] + if let Some(url) = TEST_MODELS_DEV_API_URL.lock().unwrap().clone() { + return Cow::Owned(url); + } + + Cow::Borrowed(MODELS_DEV_API_URL) +} + +/// Resolves the result to return after a transient request failure. +/// +/// Cached data takes precedence over surfacing the request error so callers can +/// continue with the last known-good catalog when possible. +fn load_after_request_failure( + request_error: reqwest::Error, + cache_file: Option<&CacheFileData>, + cache_error: Option, +) -> CatalogResult { + if let Some(cache_file) = cache_file { + return load_catalog_from_cache_file_data(cache_file, CatalogLoadSource::FallbackCache); + } + + if let Some(cache_error) = cache_error { + return Err(cache_error); + } + + Err(CatalogError::Reqwest(request_error)) +} + +#[inline] +fn is_transient_status(status: StatusCode) -> bool { + status == StatusCode::TOO_MANY_REQUESTS || status.is_server_error() +} + +#[maybe_async::maybe_async] +/// Loads the catalog at `path` using the default models.dev endpoint. +/// +/// # Errors +/// +/// Returns the same errors as [`load_catalog_from_url`] while targeting the +/// default production URL. +pub(crate) async fn load_catalog_at_path(path: &Path) -> CatalogResult { + let url = models_dev_api_url(); + load_catalog_from_url(path, url.as_ref()).await +} + +#[maybe_async::maybe_async] +/// Synchronizes the cache at `path` against `url` and returns a catalog. +/// +/// The sync flow is: +/// - read any existing cache file in one whole-file read +/// - send `If-None-Match` when the cache includes an ETag +/// - on `200 OK`, decode the response and rewrite the cache +/// - on `304 Not Modified`, load the existing cache +/// - on request, response-body, or transient status failure, fall back to cache when available +/// +/// # Performance +/// +/// Cache probing performs one up-front whole-file read through +/// [`read_cache_file`]. models.dev changes infrequently, so cache hits are +/// expected to be common, and [`crate::cache::payload`] documents typical +/// compressed payload sizes of about 23-32 kB. That makes a single sequential +/// read generally the faster hot path on modern NVMe-backed systems. +/// +/// # Errors +/// +/// Returns [`CatalogError`] when cache I/O fails without a usable fallback, +/// response data cannot be decoded, the cache cannot be written, or the server +/// responds with an unexpected status. +pub(crate) async fn load_catalog_from_url( + path: &Path, + url: &str, +) -> CatalogResult { + let mut cache_file = None; + let mut cache_error = None; + match read_cache_file(path).await { + Ok(file) => cache_file = Some(file), + Err(CatalogError::Io(error)) if error.kind() == ErrorKind::NotFound => {} + Err(error) => cache_error = Some(error), + } + + #[cfg(feature = "tokio")] + let client = reqwest::Client::builder() + .connect_timeout(std::time::Duration::from_secs(REQUEST_TIMEOUT_SECS)) + .timeout(std::time::Duration::from_secs(REQUEST_TIMEOUT_SECS)) + .build() + .expect("client builder should not fail with valid config"); + #[cfg(feature = "blocking")] + let client = reqwest::blocking::Client::builder() + .connect_timeout(std::time::Duration::from_secs(REQUEST_TIMEOUT_SECS)) + .timeout(std::time::Duration::from_secs(REQUEST_TIMEOUT_SECS)) + .build() + .expect("client builder should not fail with valid config"); + + let mut request = client.get(url); + if let Some(etag) = cache_file.as_ref().and_then(|file| file.etag_bytes()) { + request = request.header(IF_NONE_MATCH, etag); + } + + let response = match request.send().await { + Ok(response) => response, + Err(error) => { + return load_after_request_failure(error, cache_file.as_ref(), cache_error); + } + }; + match response.status() { + StatusCode::OK => { + let response_etag: Option> = response + .headers() + .get(ETAG) + .map(|value| value.as_bytes().to_vec()); + let body = match response.bytes().await { + Ok(body) => body, + Err(error) => { + return load_after_request_failure(error, cache_file.as_ref(), cache_error); + } + }; + let payload = cache_payload_from_api_json_bytes(body.as_ref())?; + let payload_encoded = encode_cache_payload(&payload); + let catalog = catalog_from_cache_payload(payload)?; + let payload_compressed = zstd::bulk::compress(payload_encoded.as_slice(), 17) + .map_err(|error| CatalogError::Zstd(error.to_string()))?; + + write_cache_file( + path, + &CacheWriteInput { + etag: response_etag.as_deref(), + payload_compressed: &payload_compressed, + payload_len_decompressed: payload_encoded.len(), + }, + ) + .await?; + + Ok(CatalogLoadResult { + catalog, + source: CatalogLoadSource::Downloaded, + }) + } + StatusCode::NOT_MODIFIED => { + if let Some(cache_file) = cache_file.as_ref() { + load_catalog_from_cache_file_data(cache_file, CatalogLoadSource::NotModifiedCache) + } else if let Some(error) = cache_error { + Err(error) + } else { + Err(CatalogError::CacheFormat( + "received 304 but no cached payload is available", + )) + } + } + status if is_transient_status(status) => { + if let Some(cache_file) = cache_file.as_ref() { + load_catalog_from_cache_file_data(cache_file, CatalogLoadSource::FallbackCache) + } else if let Some(error) = cache_error { + Err(error) + } else { + Err(CatalogError::Configuration(format!( + "unexpected catalog sync status: {status}", + ))) + } + } + status => Err(CatalogError::Configuration(format!( + "unexpected catalog sync status: {status}", + ))), + } +} + +#[cfg(test)] +mod tests { + use super::super::test_utils::{sample_api_json, start_mock_server, MockResponse}; + use super::*; + use crate::cache::format::CacheWriteInput; + use crate::cache::payload::{ + encode_cache_payload, CachedModelRow, CachedProviderRow, CatalogCachePayload, + }; + use llm_coding_tools_core::models::{Modality, ProviderIdx, ProviderType}; + use tempfile::TempDir; + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn sync_returns_downloaded_on_200() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("test.cache"); + + // Start mock server returning 200 OK with fresh catalog data + let body = String::from_utf8_lossy(sample_api_json()).to_string(); + let (_handle, url) = start_mock_server(MockResponse::Ok { + etag: "\"test-etag-123\"", + body, + }); + + let result = load_catalog_from_url(&cache_path, &url) + .await + .expect("sync should succeed"); + + // Verify source is Downloaded (not from cache) + assert_eq!(result.source, CatalogLoadSource::Downloaded); + let provider = result + .catalog + .lookup_provider("openai") + .expect("openai provider should exist"); + assert_eq!(provider.api_type, ProviderType::OpenAiCompletions); + assert_eq!(provider.api_url, "https://api.openai.com/v1"); + + // Verify cache file was written with the ETag from response + let cache_file = read_cache_file(&cache_path) + .await + .expect("cache should exist"); + assert_eq!( + cache_file.etag_bytes(), + Some(b"\"test-etag-123\"".as_slice()) + ); + } + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn sync_returns_cached_on_304_with_if_none_match() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("test.cache"); + + // Pre-seed cache with a valid catalog payload + let payload = CatalogCachePayload { + providers: vec![CachedProviderRow { + provider_key: "openai".to_string(), + api_url: "https://api.openai.com/v1".to_string(), + env_vars: vec!["OPENAI_API_KEY".to_string()], + api_type: ProviderType::OpenAiCompletions, + }], + models: vec![CachedModelRow { + provider_idx: ProviderIdx::new(0), + model_key: "gpt-4".to_string(), + modalities_bits: Modality::TEXT.bits(), + max_input: 8192, + max_output: 4096, + temperature: None, + top_p: None, + }], + }; + let encoded = encode_cache_payload(&payload); + let compressed = zstd::bulk::compress(&encoded, 1).expect("compress"); + + // Write the seeded cache file with ETag + crate::cache::format::write_cache_file( + &cache_path, + &CacheWriteInput { + etag: Some(b"\"cached-etag-456\""), + payload_compressed: &compressed, + payload_len_decompressed: encoded.len(), + }, + ) + .await + .expect("seed cache"); + + // Server returns 304 Not Modified (ETag matches If-None-Match) + let (_handle, url) = start_mock_server(MockResponse::NotModified { + etag: "\"cached-etag-456\"", + }); + + let result = load_catalog_from_url(&cache_path, &url) + .await + .expect("sync should succeed"); + + // Verify source is NotModifiedCache (loaded from local file) + assert_eq!(result.source, CatalogLoadSource::NotModifiedCache); + let provider = result + .catalog + .lookup_provider("openai") + .expect("openai provider should exist"); + assert_eq!(provider.api_type, ProviderType::OpenAiCompletions); + } + + fn refused_local_url() -> String { + let listener = std::net::TcpListener::bind("127.0.0.1:0").expect("bind"); + let port = listener.local_addr().expect("local addr").port(); + drop(listener); + format!("http://127.0.0.1:{port}/api.json") + } + + #[maybe_async::maybe_async] + async fn seed_cache(cache_path: &Path) { + let payload = CatalogCachePayload { + providers: vec![CachedProviderRow { + provider_key: "openai".to_string(), + api_url: "https://api.openai.com/v1".to_string(), + env_vars: vec!["OPENAI_API_KEY".to_string()], + api_type: ProviderType::OpenAiCompletions, + }], + models: vec![CachedModelRow { + provider_idx: ProviderIdx::new(0), + model_key: "gpt-4".to_string(), + modalities_bits: Modality::TEXT.bits(), + max_input: 8192, + max_output: 4096, + temperature: None, + top_p: None, + }], + }; + let encoded = encode_cache_payload(&payload); + let compressed = zstd::bulk::compress(&encoded, 1).expect("compress"); + crate::cache::format::write_cache_file( + cache_path, + &CacheWriteInput { + etag: Some(b"\"cached-etag-456\""), + payload_compressed: &compressed, + payload_len_decompressed: encoded.len(), + }, + ) + .await + .expect("seed cache"); + } + + #[test] + fn transient_status_detection_matches_retryable_responses() { + assert!(is_transient_status(StatusCode::TOO_MANY_REQUESTS)); + assert!(is_transient_status(StatusCode::SERVICE_UNAVAILABLE)); + assert!(!is_transient_status(StatusCode::NOT_FOUND)); + } + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn sync_returns_fallback_cache_on_request_failure_with_valid_cache() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("test.cache"); + + seed_cache(&cache_path).await; + + let result = load_catalog_from_url(&cache_path, &refused_local_url()) + .await + .expect("fallback should succeed"); + + assert_eq!(result.source, CatalogLoadSource::FallbackCache); + assert!(result.catalog.lookup_provider("openai").is_some()); + } + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn sync_returns_fallback_cache_on_transient_status_with_valid_cache() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("test.cache"); + + seed_cache(&cache_path).await; + + let (_handle, url) = start_mock_server(MockResponse::Status { + code: 503, + reason: "Service Unavailable", + }); + + let result = load_catalog_from_url(&cache_path, &url) + .await + .expect("fallback should succeed"); + + assert_eq!(result.source, CatalogLoadSource::FallbackCache); + assert!(result.catalog.lookup_provider("openai").is_some()); + } + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn sync_returns_cache_error_on_transient_status_with_corrupt_cache() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("corrupt.cache"); + + std::fs::write(&cache_path, [0_u8; 11]).expect("write corrupt cache"); + + let (_handle, url) = start_mock_server(MockResponse::Status { + code: 429, + reason: "Too Many Requests", + }); + + match load_catalog_from_url(&cache_path, &url).await { + Err(error) => assert!(matches!(error, CatalogError::CacheFormat(_))), + Ok(_) => panic!("transient status with corrupt cache should error"), + } + } + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn sync_returns_fallback_cache_on_body_read_failure_with_valid_cache() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("test.cache"); + + seed_cache(&cache_path).await; + + let (_handle, url) = start_mock_server(MockResponse::PartialOk { + etag: "\"fresh-etag\"", + body: "{".to_string(), + content_length: 32, + }); + + let result = load_catalog_from_url(&cache_path, &url) + .await + .expect("fallback should succeed"); + + assert_eq!(result.source, CatalogLoadSource::FallbackCache); + assert!(result.catalog.lookup_provider("openai").is_some()); + } + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn sync_returns_request_error_when_request_fails_without_cache() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("missing.cache"); + + match load_catalog_from_url(&cache_path, &refused_local_url()).await { + Err(error) => assert!(matches!(error, CatalogError::Reqwest(_))), + Ok(_) => panic!("request failure without cache should error"), + } + } + + #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))] + async fn sync_returns_cache_error_when_request_fails_with_corrupt_cache() { + let temp = TempDir::new().expect("tempdir"); + let cache_path = temp.path().join("corrupt.cache"); + + std::fs::write(&cache_path, [0_u8; 11]).expect("write corrupt cache"); + + match load_catalog_from_url(&cache_path, &refused_local_url()).await { + Err(error) => assert!(matches!(error, CatalogError::CacheFormat(_))), + Ok(_) => panic!("request failure with corrupt cache should error"), + } + } +} diff --git a/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs b/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs new file mode 100644 index 00000000..a7fec883 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs @@ -0,0 +1,108 @@ +use std::io::{BufRead, Write}; + +pub enum MockResponse { + Ok { + etag: &'static str, + body: String, + }, + PartialOk { + etag: &'static str, + body: String, + content_length: usize, + }, + NotModified { + etag: &'static str, + }, + Status { + code: u16, + reason: &'static str, + }, +} + +pub fn sample_api_json() -> &'static [u8] { + br#" + { + "openai": { + "id": "openai", + "npm": "@ai-sdk/openai", + "api": "https://api.openai.com/v1", + "env": ["OPENAI_API_KEY"], + "models": { + "gpt-4": { + "modalities": { + "input": ["text"], + "output": ["text"] + }, + "limit": { + "context": 8192, + "input": 8192, + "output": 4096 + } + } + } + } + } + "# +} + +pub fn start_mock_server(response: MockResponse) -> (std::thread::JoinHandle<()>, String) { + let listener = std::net::TcpListener::bind("127.0.0.1:0").expect("bind"); + let port = listener.local_addr().unwrap().port(); + let url = format!("http://127.0.0.1:{}/api.json", port); + + let handle = std::thread::spawn(move || { + let (mut stream, _) = listener.accept().expect("accept"); + let mut reader = std::io::BufReader::new(&stream); + let mut request = String::new(); + + loop { + let mut line = String::new(); + if reader.read_line(&mut line).expect("read line") == 0 { + break; + } + if line == "\r\n" || line.is_empty() { + break; + } + request.push_str(&line); + } + + let _has_if_none_match = request.contains("If-None-Match"); + + match response { + MockResponse::Ok { etag, body } => { + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nETag: {}\r\nContent-Length: {}\r\n\r\n{}", + etag, + body.len(), + body + ); + stream.write_all(response.as_bytes()).expect("write"); + stream.flush().expect("flush"); + } + MockResponse::PartialOk { + etag, + body, + content_length, + } => { + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nETag: {}\r\nContent-Length: {}\r\n\r\n{}", + etag, content_length, body + ); + stream.write_all(response.as_bytes()).expect("write"); + stream.flush().expect("flush"); + } + MockResponse::NotModified { etag } => { + let response = format!("HTTP/1.1 304 Not Modified\r\nETag: {}\r\n\r\n", etag); + stream.write_all(response.as_bytes()).expect("write"); + stream.flush().expect("flush"); + } + MockResponse::Status { code, reason } => { + let response = format!("HTTP/1.1 {code} {reason}\r\nContent-Length: 0\r\n\r\n"); + stream.write_all(response.as_bytes()).expect("write"); + stream.flush().expect("flush"); + } + } + }); + + (handle, url) +} diff --git a/src/llm-coding-tools-models-dev/src/error.rs b/src/llm-coding-tools-models-dev/src/error.rs new file mode 100644 index 00000000..3b3fdfb4 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/error.rs @@ -0,0 +1,52 @@ +//! Error types for models.dev catalog operations. + +use llm_coding_tools_core::models::ModelCatalogBuildError; +use thiserror::Error; + +/// Errors that can occur during catalog loading and synchronization. +#[derive(Debug, Error)] +pub enum CatalogError { + /// The platform's cache directory could not be determined. + #[error("cache directory not found on this platform")] + CachePathNotFound, + + /// A configuration error occurred (e.g., invalid environment variable). + #[error("configuration error: {0}")] + Configuration(String), + + /// An I/O error occurred while reading or writing the cache. + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + /// An HTTP error occurred during the sync request. + #[error("HTTP error: {0}")] + Reqwest(#[from] reqwest::Error), + + /// A JSON parse error occurred while decoding models.dev API JSON. + #[error("JSON parse error: {0}")] + Json(#[from] serde_json::Error), + + /// A zstd decompression error occurred. + #[error("decompression error: {0}")] + Zstd(String), + + /// A bitcode deserialization error occurred. + #[error("decode error: {0}")] + BitcodeDecode(String), + + /// The on-disk cache file is malformed or incompatible. + #[error("cache format error: {0}")] + CacheFormat(&'static str), + + /// The catalog failed to build from source rows. + #[error("catalog build error: {0}")] + ModelCatalogBuild(#[from] ModelCatalogBuildError), + + /// A spawn_blocking task failed. + #[cfg(feature = "tokio")] + #[error("blocking task failed: {0}")] + JoinHandle(#[from] tokio::task::JoinError), +} + +/// Convenience type alias for catalog operations. +pub type CatalogResult = Result; diff --git a/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs new file mode 100644 index 00000000..01252a9b --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs @@ -0,0 +1,36 @@ +//! Blocking/sync filesystem operations. + +use std::io::{ErrorKind, Read as _}; +use std::path::Path; + +/// Reads a file into memory in one pre-sized allocation. +/// +/// # Safety +/// +/// We snapshot file length then call `read_exact`, which would miss data appended after +/// the metadata call if the file grew mid-read. However, within this codebase all +/// writes go to a temp file first, then rename to target — so files are never +/// appended to in place. +/// Therefore this race cannot occur. +#[inline] +pub(crate) fn read(path: impl AsRef) -> std::io::Result> { + let mut file = std::fs::File::open(path)?; + let file_len_u64 = file.metadata()?.len(); + let file_len = usize::try_from(file_len_u64).map_err(|_| { + std::io::Error::new(ErrorKind::InvalidData, "file is too large to fit in memory") + })?; + + let mut bytes = super::alloc_uninit_u8_slice(file_len); + if file_len != 0 { + let buf = super::uninit_u8_slice_as_mut_bytes(&mut bytes); + file.read_exact(buf)?; + } + + Ok(super::assume_init_u8_slice(bytes)) +} + +/// Creates a directory and all parent directories. +#[inline] +pub(crate) fn create_dir_all(path: impl AsRef) -> std::io::Result<()> { + std::fs::create_dir_all(path) +} diff --git a/src/llm-coding-tools-models-dev/src/fs/mod.rs b/src/llm-coding-tools-models-dev/src/fs/mod.rs new file mode 100644 index 00000000..e08a6304 --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/fs/mod.rs @@ -0,0 +1,45 @@ +//! Filesystem abstraction layer. +//! +//! Provides unified APIs that work with both sync and async runtimes. +//! Exactly one runtime feature must be enabled: +//! - `tokio`: Async operations using the tokio runtime +//! - `blocking`: Synchronous operations + +use std::mem::MaybeUninit; + +#[cfg(all(feature = "tokio", feature = "blocking"))] +compile_error!("Features `tokio` and `blocking` are mutually exclusive."); + +#[cfg(not(any(feature = "tokio", feature = "blocking")))] +compile_error!("Either `tokio` or `blocking` feature must be enabled for the fs module."); + +/// Allocates an uninitialized boxed byte slice with logical length `len`. +#[inline] +pub(crate) fn alloc_uninit_u8_slice(len: usize) -> Box<[MaybeUninit]> { + Box::<[u8]>::new_uninit_slice(len) +} + +/// Views an uninitialized `u8` slice as mutable bytes for initialization. +#[inline] +pub(crate) fn uninit_u8_slice_as_mut_bytes(bytes: &mut [MaybeUninit]) -> &mut [u8] { + // SAFETY: `MaybeUninit` has identical layout to `u8`; caller only uses + // returned slice for writes before reading. + unsafe { std::slice::from_raw_parts_mut(bytes.as_mut_ptr().cast::(), bytes.len()) } +} + +/// Converts a fully-initialized boxed uninitialized slice into initialized bytes. +#[inline] +pub(crate) fn assume_init_u8_slice(bytes: Box<[MaybeUninit]>) -> Box<[u8]> { + // SAFETY: caller guarantees all bytes were initialized. + unsafe { bytes.assume_init() } +} + +#[cfg(feature = "tokio")] +mod tokio_impl; +#[cfg(feature = "tokio")] +pub(crate) use tokio_impl::*; + +#[cfg(feature = "blocking")] +mod blocking_impl; +#[cfg(feature = "blocking")] +pub(crate) use blocking_impl::*; diff --git a/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs new file mode 100644 index 00000000..29d04d2c --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs @@ -0,0 +1,37 @@ +//! Tokio-based async filesystem operations. + +use std::io::ErrorKind; +use std::path::Path; +use tokio::io::AsyncReadExt as _; + +/// Reads a file into memory in one pre-sized allocation. +/// +/// # Safety +/// +/// We snapshot file length then call `read_exact`, which would miss data appended after +/// the metadata call if the file grew mid-read. However, within this codebase all +/// writes go to a temp file first, then rename to target — so files are never +/// appended to in place. +/// Therefore this race cannot occur. +#[inline] +pub(crate) async fn read(path: impl AsRef) -> std::io::Result> { + let mut file = tokio::fs::File::open(path).await?; + let file_len_u64 = file.metadata().await?.len(); + let file_len = usize::try_from(file_len_u64).map_err(|_| { + std::io::Error::new(ErrorKind::InvalidData, "file is too large to fit in memory") + })?; + + let mut bytes = super::alloc_uninit_u8_slice(file_len); + if file_len != 0 { + let buf = super::uninit_u8_slice_as_mut_bytes(&mut bytes); + file.read_exact(buf).await?; + } + + Ok(super::assume_init_u8_slice(bytes)) +} + +/// Creates a directory and all parent directories. +#[inline] +pub(crate) async fn create_dir_all(path: impl AsRef) -> std::io::Result<()> { + tokio::fs::create_dir_all(path).await +} diff --git a/src/llm-coding-tools-models-dev/src/lib.rs b/src/llm-coding-tools-models-dev/src/lib.rs new file mode 100644 index 00000000..60fef51e --- /dev/null +++ b/src/llm-coding-tools-models-dev/src/lib.rs @@ -0,0 +1,21 @@ +#![doc = include_str!(concat!("../", env!("CARGO_PKG_README")))] + +// Validate feature combinations at compile time. +#[cfg(all(feature = "async", feature = "blocking"))] +compile_error!("Features `async` and `blocking` are mutually exclusive."); + +#[cfg(not(any(feature = "async", feature = "blocking")))] +compile_error!(concat!( + "Either an async runtime (e.g., `tokio`) or `blocking` feature ", + "must be enabled." +)); + +mod api; +pub mod cache; +pub mod catalog; +pub mod error; +mod fs; + +pub use cache::shared_cache_path; +pub use catalog::{CatalogLoadResult, CatalogLoadSource, ModelsDevCatalog}; +pub use error::{CatalogError, CatalogResult};