From 1ef109ae35c59d8b13268ec4cacc62778dfe3ac8 Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Sat, 28 Feb 2026 15:03:58 +0000
Subject: [PATCH 01/22] Added: llm-models-dev crate skeleton

- Create llm-coding-tools-models-dev crate (v0.1.0)
- Add public API: ModelsDevCatalog::load(), load_at()
- Define CatalogError, CatalogLoadResult, CatalogLoadSource
- Add cache path helpers and documentation
- Add workspace integration and CI workflow updates
- Include comprehensive documentation for all public items
---
 .github/workflows/rust.yml                    |   7 +-
 src/.cargo/verify.ps1                         |   6 +-
 src/.cargo/verify.sh                          |   6 +-
 src/Cargo.lock                                | 158 ++++++++++++++++--
 src/Cargo.toml                                |   2 +-
 src/llm-coding-tools-models-dev/Cargo.toml    |  48 ++++++
 src/llm-coding-tools-models-dev/README.md     |  64 +++++++
 .../src/cache/mod.rs                          |   8 +
 .../src/cache/path.rs                         |  48 ++++++
 .../src/catalog/load_result.rs                |  52 ++++++
 .../src/catalog/mod.rs                        | 113 +++++++++++++
 src/llm-coding-tools-models-dev/src/error.rs  |  36 ++++
 src/llm-coding-tools-models-dev/src/lib.rs    |   9 +
 13 files changed, 542 insertions(+), 15 deletions(-)
 create mode 100644 src/llm-coding-tools-models-dev/Cargo.toml
 create mode 100644 src/llm-coding-tools-models-dev/README.md
 create mode 100644 src/llm-coding-tools-models-dev/src/cache/mod.rs
 create mode 100644 src/llm-coding-tools-models-dev/src/cache/path.rs
 create mode 100644 src/llm-coding-tools-models-dev/src/catalog/load_result.rs
 create mode 100644 src/llm-coding-tools-models-dev/src/catalog/mod.rs
 create mode 100644 src/llm-coding-tools-models-dev/src/error.rs
 create mode 100644 src/llm-coding-tools-models-dev/src/lib.rs

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 8ebd0db2..f24cfd1f 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -52,13 +52,13 @@ jobs:
           cargo +stable binstall --no-confirm cargo-semver-checks --force
           rustup +stable target add ${{ matrix.target }}
 
-          for CRATE in "llm-coding-tools-core" "llm-coding-tools-agents" "llm-coding-tools-serdesai"; do
+          for CRATE in "llm-coding-tools-core" "llm-coding-tools-agents" "llm-coding-tools-serdesai" "llm-coding-tools-models-dev"; do
             SEARCH_RESULT=$(cargo search "^${CRATE}$" --limit 1)
             if echo "$SEARCH_RESULT" | grep -q "^${CRATE} "; then
               echo "Running semver checks for ${CRATE}..."
               # Note: llm-coding-tools-core has mutually exclusive async/blocking features,
               # so we must use --only-explicit-features to avoid enabling all features.
-              # The serdesai crate is async-only and doesn't have the tokio feature.
+              # The serdesai and models-dev crates are async-only and don't have the tokio feature.
               if [ "${CRATE}" = "llm-coding-tools-core" ]; then
                 cargo +stable semver-checks -p "${CRATE}" --target ${{ matrix.target }} --only-explicit-features --features tokio
               else
@@ -79,6 +79,7 @@ jobs:
           cargo doc -p llm-coding-tools-core --features tokio --document-private-items --no-deps --target ${{ matrix.target }}
           cargo doc -p llm-coding-tools-agents --document-private-items --no-deps --target ${{ matrix.target }}
           cargo doc -p llm-coding-tools-serdesai --document-private-items --no-deps --target ${{ matrix.target }}
+          cargo doc -p llm-coding-tools-models-dev --document-private-items --no-deps --target ${{ matrix.target }}
 
       - name: Run linter
         if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/')
@@ -88,6 +89,7 @@ jobs:
           cargo clippy -p llm-coding-tools-core --features tokio --target ${{ matrix.target }} -- -D warnings
           cargo clippy -p llm-coding-tools-agents --target ${{ matrix.target }} -- -D warnings
           cargo clippy -p llm-coding-tools-serdesai --target ${{ matrix.target }} -- -D warnings
+          cargo clippy -p llm-coding-tools-models-dev --target ${{ matrix.target }} -- -D warnings
 
       - name: Run formatter check
         uses: actions-rust-lang/rustfmt@v1
@@ -165,6 +167,7 @@ jobs:
             src/llm-coding-tools-core
             src/llm-coding-tools-agents
             src/llm-coding-tools-serdesai
+            src/llm-coding-tools-models-dev
           compression-tool: 7z
           artifact-groups-file: .github/artifact-groups.yml
           changelog-enabled: "true"
diff --git a/src/.cargo/verify.ps1 b/src/.cargo/verify.ps1
index 745bf015..b90d0507 100644
--- a/src/.cargo/verify.ps1
+++ b/src/.cargo/verify.ps1
@@ -2,7 +2,7 @@
 # All steps must pass without warnings
 # Keep in sync with verify.sh
 #
-# Note: llm-coding-tools-serdesai is async-only (implements async Tool traits).
+# Note: llm-coding-tools-serdesai and llm-coding-tools-models-dev are async-only.
 # The blocking feature only applies to llm-coding-tools-core.
 
 $ErrorActionPreference = "Stop"
@@ -35,16 +35,19 @@ try {
 Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-core", "--quiet")
 Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-agents", "--quiet")
 Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-serdesai", "--quiet")
+Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-models-dev", "--quiet")
 
 Write-Host "Testing..."
 Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-core", "--quiet")
 Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-agents", "--quiet")
 Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-serdesai", "--quiet")
+Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-models-dev", "--quiet")
 
 Write-Host "Clippy..."
 Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-core", "--quiet", "--", "-D", "warnings")
 Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-agents", "--quiet", "--", "-D", "warnings")
 Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-serdesai", "--quiet", "--", "-D", "warnings")
+Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-models-dev", "--quiet", "--", "-D", "warnings")
 
 Write-Host "Testing blocking feature..."
 Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-core", "--no-default-features", "--features", "blocking", "--quiet")
@@ -65,6 +68,7 @@ Write-Host "Publish dry-run..."
 Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-core", "--quiet")
 Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-agents", "--quiet")
 Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-serdesai", "--quiet")
+Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-models-dev", "--quiet")
 
 Write-Host "All checks passed!"
 }
diff --git a/src/.cargo/verify.sh b/src/.cargo/verify.sh
index b51897cf..a6f4f416 100755
--- a/src/.cargo/verify.sh
+++ b/src/.cargo/verify.sh
@@ -3,7 +3,7 @@
 # All steps must pass without warnings
 # Keep in sync with verify.ps1
 #
-# Note: llm-coding-tools-serdesai is async-only (implements async Tool traits).
+# Note: llm-coding-tools-serdesai and llm-coding-tools-models-dev are async-only.
 # The blocking feature only applies to llm-coding-tools-core.
 
 set -e
@@ -24,16 +24,19 @@ echo "Building..."
 run_cmd cargo build -p llm-coding-tools-core --quiet
 run_cmd cargo build -p llm-coding-tools-agents --quiet
 run_cmd cargo build -p llm-coding-tools-serdesai --quiet
+run_cmd cargo build -p llm-coding-tools-models-dev --quiet
 
 echo "Testing..."
 run_cmd cargo test -p llm-coding-tools-core --quiet
 run_cmd cargo test -p llm-coding-tools-agents --quiet
 run_cmd cargo test -p llm-coding-tools-serdesai --quiet
+run_cmd cargo test -p llm-coding-tools-models-dev --quiet
 
 echo "Clippy..."
 run_cmd cargo clippy -p llm-coding-tools-core --quiet -- -D warnings
 run_cmd cargo clippy -p llm-coding-tools-agents --quiet -- -D warnings
 run_cmd cargo clippy -p llm-coding-tools-serdesai --quiet -- -D warnings
+run_cmd cargo clippy -p llm-coding-tools-models-dev --quiet -- -D warnings
 
 echo "Testing blocking feature..."
 run_cmd cargo test -p llm-coding-tools-core --no-default-features --features blocking --quiet
@@ -48,5 +51,6 @@ echo "Publish dry-run..."
 run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-core --quiet
 run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-agents --quiet
 run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-serdesai --quiet
+run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-models-dev --quiet
 
 echo "All checks passed!"
diff --git a/src/Cargo.lock b/src/Cargo.lock
index a9d43b94..08986572 100644
--- a/src/Cargo.lock
+++ b/src/Cargo.lock
@@ -74,6 +74,12 @@ version = "1.0.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
 
+[[package]]
+name = "arrayvec"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
+
 [[package]]
 name = "assert-json-diff"
 version = "2.0.2"
@@ -144,6 +150,30 @@ version = "0.22.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
 
+[[package]]
+name = "bitcode"
+version = "0.6.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a6ed1b54d8dc333e7be604d00fa9262f4635485ffea923647b6521a5fff045d"
+dependencies = [
+ "arrayvec",
+ "bitcode_derive",
+ "bytemuck",
+ "glam",
+ "serde",
+]
+
+[[package]]
+name = "bitcode_derive"
+version = "0.6.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "238b90427dfad9da4a9abd60f3ec1cdee6b80454bde49ed37f1781dd8e9dc7f9"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "bitfields"
 version = "1.0.2"
@@ -197,6 +227,12 @@ version = "3.19.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510"
 
+[[package]]
+name = "bytemuck"
+version = "1.25.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
+
 [[package]]
 name = "bytes"
 version = "1.11.0"
@@ -548,6 +584,27 @@ dependencies = [
  "crypto-common",
 ]
 
+[[package]]
+name = "dirs"
+version = "6.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e"
+dependencies = [
+ "dirs-sys",
+]
+
+[[package]]
+name = "dirs-sys"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab"
+dependencies = [
+ "libc",
+ "option-ext",
+ "redox_users",
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "displaydoc"
 version = "0.2.5"
@@ -794,6 +851,12 @@ dependencies = [
  "wasip3",
 ]
 
+[[package]]
+name = "glam"
+version = "0.32.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34627c5158214743a374170fed714833fdf4e4b0cbcc1ea98417866a4c5d4441"
+
 [[package]]
 name = "globset"
 version = "0.4.18"
@@ -1315,15 +1378,24 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
 
 [[package]]
 name = "libc"
-version = "0.2.180"
+version = "0.2.182"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
+checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
+
+[[package]]
+name = "libredox"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "062b52cd41eb8d929e81b592a47df833c33c15684933a9329440137a6d9f134c"
+dependencies = [
+ "libc",
+]
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.11.0"
+version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
+checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
 
 [[package]]
 name = "lite-strtab"
@@ -1390,6 +1462,21 @@ dependencies = [
  "wiremock",
 ]
 
+[[package]]
+name = "llm-coding-tools-models-dev"
+version = "0.1.0"
+dependencies = [
+ "bitcode",
+ "dirs",
+ "llm-coding-tools-core",
+ "reqwest 0.13.1",
+ "serde",
+ "tempfile",
+ "thiserror 2.0.18",
+ "tokio",
+ "zstd",
+]
+
 [[package]]
 name = "llm-coding-tools-serdesai"
 version = "0.2.0"
@@ -1500,9 +1587,9 @@ checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
 
 [[package]]
 name = "nix"
-version = "0.31.1"
+version = "0.31.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "225e7cfe711e0ba79a68baeddb2982723e4235247aefce1482f2f16c27865b66"
+checksum = "5d6d0705320c1e6ba1d912b5e37cf18071b6c2e9b7fa8215a1e8a7651966f5d3"
 dependencies = [
  "bitflags",
  "cfg-if",
@@ -1547,6 +1634,12 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391"
 
+[[package]]
+name = "option-ext"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
+
 [[package]]
 name = "page_size"
 version = "0.6.0"
@@ -1637,6 +1730,12 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
+[[package]]
+name = "pkg-config"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
+
 [[package]]
 name = "plotters"
 version = "0.3.7"
@@ -1886,6 +1985,17 @@ dependencies = [
  "bitflags",
 ]
 
+[[package]]
+name = "redox_users"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac"
+dependencies = [
+ "getrandom 0.2.17",
+ "libredox",
+ "thiserror 2.0.18",
+]
+
 [[package]]
 name = "ref-cast"
 version = "1.0.25"
@@ -2036,9 +2146,9 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
 
 [[package]]
 name = "rustix"
-version = "1.1.3"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34"
+checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
 dependencies = [
  "bitflags",
  "errno",
@@ -2642,9 +2752,9 @@ dependencies = [
 
 [[package]]
 name = "tempfile"
-version = "3.25.0"
+version = "3.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1"
+checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0"
 dependencies = [
  "fastrand",
  "getrandom 0.3.4",
@@ -3773,3 +3883,31 @@ name = "zmij"
 version = "1.0.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dfcd145825aace48cff44a8844de64bf75feec3080e0aa5cdbde72961ae51a65"
+
+[[package]]
+name = "zstd"
+version = "0.13.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
+dependencies = [
+ "zstd-safe",
+]
+
+[[package]]
+name = "zstd-safe"
+version = "7.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
+dependencies = [
+ "zstd-sys",
+]
+
+[[package]]
+name = "zstd-sys"
+version = "2.0.16+zstd.1.5.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
+dependencies = [
+ "cc",
+ "pkg-config",
+]
diff --git a/src/Cargo.toml b/src/Cargo.toml
index 0dbd669e..7429dbb9 100644
--- a/src/Cargo.toml
+++ b/src/Cargo.toml
@@ -1,7 +1,7 @@
 
 [workspace]
 resolver = "2"
-members = ["llm-coding-tools-core", "llm-coding-tools-serdesai", "llm-coding-tools-agents"]
+members = ["llm-coding-tools-core", "llm-coding-tools-serdesai", "llm-coding-tools-agents", "llm-coding-tools-models-dev"]
 
 # Profile Build
 [profile.profile]
diff --git a/src/llm-coding-tools-models-dev/Cargo.toml b/src/llm-coding-tools-models-dev/Cargo.toml
new file mode 100644
index 00000000..5ee01d31
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/Cargo.toml
@@ -0,0 +1,48 @@
+[package]
+name = "llm-coding-tools-models-dev"
+version = "0.1.0"
+edition = "2021"
+description = "models.dev catalog ingestion with online-first sync pipeline"
+repository = "https://github.com/Sewer56/llm-coding-tools"
+license = "Apache-2.0"
+include = ["src/**/*", "README.md"]
+readme = "README.md"
+
+[features]
+default = ["tokio"]
+# Async with tokio runtime
+tokio = ["dep:tokio", "dep:reqwest"]
+
+[dependencies]
+# Core library for ModelCatalog and related types
+llm-coding-tools-core = { path = "../llm-coding-tools-core", version = "0.2.0", features = [
+    "tokio",
+] }
+
+# Cross-platform cache directory detection
+dirs = "6.0.0"
+
+# HTTP client for conditional GET requests
+reqwest = { version = "0.13", default-features = false, features = [
+    "rustls",
+    "rustls-native-certs",
+], optional = true }
+
+# Fast binary serialization
+bitcode = "0.6.9"
+
+# Compression for cache payload
+zstd = "0.13.3"
+
+# JSON parsing for models.dev API responses
+serde = { version = "1.0.228", features = ["derive"] }
+
+# Ergonomic error definitions
+thiserror = "2.0.18"
+
+# Async runtime (when tokio feature enabled)
+tokio = { version = "1.49", features = ["fs", "io-util"], optional = true }
+
+[dev-dependencies]
+tokio = { version = "1.49", features = ["rt", "macros"] }
+tempfile = "3.26"
diff --git a/src/llm-coding-tools-models-dev/README.md b/src/llm-coding-tools-models-dev/README.md
new file mode 100644
index 00000000..8ea3ad47
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/README.md
@@ -0,0 +1,64 @@
+# llm-coding-tools-models-dev
+
+models.dev catalog ingestion with online-first sync and local cache fallback.
+
+This crate loads provider/model data from models.dev and builds a
+`llm_coding_tools_core::models::ModelCatalog`.
+
+## Why this exists
+
+If you run coding agents against many providers, you usually want all three:
+
+- Fresh data when online.
+- A reliable fallback when offline.
+- A compact cache that is cheap to load.
+
+That is the core goal here.
+
+## What it does
+
+- **Online-first sync**: Sends conditional requests with `If-None-Match` and reuses cache on `304 Not Modified`.
+- **Implicit fallback**: If network sync fails, loads the last valid cache automatically.
+- **Compact storage**: Stores cache as prelude + ETag + `zstd(bitcode(payload))`.
+- **Minimal API**: Exposes `ModelsDevCatalog::load()` and `ModelsDevCatalog::load_at(...)`.
+
+## Usage
+
+```rust
+use llm_coding_tools_models_dev::{CatalogLoadSource, ModelsDevCatalog};
+
+async fn load_catalog() -> Result<(), Box<dyn std::error::Error>> {
+    let result = ModelsDevCatalog::load().await?;
+
+    match result.source {
+        CatalogLoadSource::Downloaded => println!("Downloaded fresh snapshot."),
+        CatalogLoadSource::NotModifiedCache => println!("Cache is already up to date."),
+        CatalogLoadSource::FallbackCache => println!("Network unavailable, using cached snapshot."),
+    }
+
+    if let Some(entry) = result.catalog.lookup("openai", "gpt-4") {
+        println!("provider api url: {}", entry.0.api_url);
+        println!("max input tokens: {}", entry.1.max_input);
+    }
+
+    Ok(())
+}
+```
+
+## Cache location
+
+By default, cache is stored in the platform cache directory:
+
+- Linux: `~/.cache/llm-coding-tools/models.dev.catalog.v1.cache`
+- macOS: `~/Library/Caches/llm-coding-tools/models.dev.catalog.v1.cache`
+- Windows: `%LOCALAPPDATA%\llm-coding-tools\models.dev.catalog.v1.cache`
+
+Set `LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH` to override this path.
+
+## Feature flags
+
+- `tokio` (default): async runtime support.
+
+## License
+
+Apache-2.0
diff --git a/src/llm-coding-tools-models-dev/src/cache/mod.rs b/src/llm-coding-tools-models-dev/src/cache/mod.rs
new file mode 100644
index 00000000..9150d79d
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/cache/mod.rs
@@ -0,0 +1,8 @@
+//! Cache path resolution and management.
+//!
+//! This module handles cross-platform cache directory detection and
+//! the default cache file path for models.dev catalogs.
+
+mod path;
+
+pub use path::shared_cache_path;
diff --git a/src/llm-coding-tools-models-dev/src/cache/path.rs b/src/llm-coding-tools-models-dev/src/cache/path.rs
new file mode 100644
index 00000000..d8841147
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/cache/path.rs
@@ -0,0 +1,48 @@
+//! Cross-platform cache path resolution.
+
+#![allow(dead_code)]
+
+use crate::error::CatalogError;
+use std::path::PathBuf;
+
+/// Environment variable name for overriding the default cache path.
+pub const CACHE_PATH_ENV_VAR: &str = "LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH";
+
+/// Returns the shared cache path for the models.dev catalog.
+///
+/// This function determines the appropriate cache location using the following
+/// precedence:
+///
+/// 1. `LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH` environment variable (if set)
+/// 2. Platform cache directory + `llm-coding-tools/models.dev.catalog.v1.cache`
+///
+/// # Platform Cache Locations
+///
+/// - **Linux**: `~/.cache/llm-coding-tools/models.dev.catalog.v1.cache`
+/// - **macOS**: `~/Library/Caches/llm-coding-tools/models.dev.catalog.v1.cache`
+/// - **Windows**: `%LOCALAPPDATA%\llm-coding-tools\models.dev.catalog.v1.cache`
+///
+/// # Returns
+///
+/// The full path to the cache file.
+///
+/// # Errors
+///
+/// Returns [`CatalogError::CachePathNotFound`] when:
+/// - The environment variable is not set AND
+/// - The platform cache directory cannot be determined
+///
+/// # Examples
+///
+/// ```
+/// use llm_coding_tools_models_dev::shared_cache_path;
+///
+/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
+/// let path = shared_cache_path()?;
+/// println!("Cache location: {}", path.display());
+/// # Ok(())
+/// # }
+/// ```
+pub fn shared_cache_path() -> Result<PathBuf, CatalogError> {
+    todo!("shared_cache_path() not yet implemented")
+}
diff --git a/src/llm-coding-tools-models-dev/src/catalog/load_result.rs b/src/llm-coding-tools-models-dev/src/catalog/load_result.rs
new file mode 100644
index 00000000..aee2d46e
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/catalog/load_result.rs
@@ -0,0 +1,52 @@
+//! Result types for catalog load operations.
+
+use llm_coding_tools_core::models::ModelCatalog;
+
+/// Result of a successful catalog load operation.
+///
+/// This struct provides both the loaded catalog and metadata about
+/// how the catalog was obtained (fresh download, cached, etc.).
+pub struct CatalogLoadResult {
+    /// The loaded model catalog ready for lookups.
+    pub catalog: ModelCatalog,
+
+    /// Information about how the catalog was loaded.
+    pub source: CatalogLoadSource,
+}
+
+/// Indicates how the catalog was loaded.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CatalogLoadSource {
+    /// The catalog was downloaded fresh (HTTP 200 OK) and the cache was updated.
+    Downloaded,
+
+    /// The cache was up to date (HTTP 304 Not Modified) and loaded from disk.
+    NotModifiedCache,
+
+    /// A network failure occurred, but a valid cached copy was available
+    /// and loaded as a fallback.
+    FallbackCache,
+}
+
+impl CatalogLoadSource {
+    /// Returns true if the catalog was loaded from the network (fresh download).
+    #[inline]
+    pub fn is_fresh(&self) -> bool {
+        matches!(self, CatalogLoadSource::Downloaded)
+    }
+
+    /// Returns true if the catalog was loaded from cache (either fresh cache or fallback).
+    #[inline]
+    pub fn is_cached(&self) -> bool {
+        matches!(
+            self,
+            CatalogLoadSource::NotModifiedCache | CatalogLoadSource::FallbackCache
+        )
+    }
+
+    /// Returns true if this was a fallback load due to network failure.
+    #[inline]
+    pub fn is_fallback(&self) -> bool {
+        matches!(self, CatalogLoadSource::FallbackCache)
+    }
+}
diff --git a/src/llm-coding-tools-models-dev/src/catalog/mod.rs b/src/llm-coding-tools-models-dev/src/catalog/mod.rs
new file mode 100644
index 00000000..2a826c81
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/catalog/mod.rs
@@ -0,0 +1,113 @@
+//! Catalog loading and synchronization with models.dev.
+//!
+//! Flow is simple:
+//! - Try online sync first using conditional HTTP (`If-None-Match`)
+//! - Reuse cache on `304 Not Modified`
+//! - Fall back to cached data if the network path fails
+
+mod load_result;
+
+pub use load_result::{CatalogLoadResult, CatalogLoadSource};
+
+use crate::error::CatalogError;
+use std::path::Path;
+
+/// Entry point for loading models.dev catalogs.
+///
+/// This struct provides static methods for loading the catalog either
+/// from the default shared cache location or from a custom path.
+pub struct ModelsDevCatalog;
+
+impl ModelsDevCatalog {
+    /// Loads the catalog from the default shared cache location.
+    ///
+    /// This is the primary entry point for most use cases. It will:
+    /// 1. Check for an existing cache and extract its ETag
+    /// 2. Send a conditional GET request with `If-None-Match`
+    /// 3. On `200 OK`: download, normalize, cache, and return fresh data
+    /// 4. On `304 Not Modified`: decode and return cached data
+    /// 5. On network failure: fall back to cached data if available
+    ///
+    /// The cache location is determined by:
+    /// - `LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH` environment variable (if set)
+    /// - Platform cache directory + `llm-coding-tools/models.dev.catalog.v1.cache`
+    ///
+    /// # Returns
+    ///
+    /// A [`CatalogLoadResult`] containing the loaded catalog and information
+    /// about how it was loaded (downloaded fresh, from cache, or fallback).
+    ///
+    /// # Errors
+    ///
+    /// Returns [`CatalogError`] when:
+    /// - The cache path cannot be determined and no cache exists
+    /// - An HTTP error occurs and no cache is available for fallback
+    /// - The cache is corrupted and cannot be decoded
+    /// - Catalog construction from normalized data fails
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use llm_coding_tools_models_dev::ModelsDevCatalog;
+    ///
+    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
+    /// let result = ModelsDevCatalog::load().await?;
+    ///
+    /// // Use the catalog
+    /// if let Some(entry) = result.catalog.lookup("openai", "gpt-4") {
+    ///     println!("API URL: {}", entry.0.api_url);
+    /// }
+    /// # Ok(())
+    /// # }
+    /// ```
+    pub async fn load() -> Result<CatalogLoadResult, CatalogError> {
+        todo!("ModelsDevCatalog::load() not yet implemented")
+    }
+
+    /// Loads the catalog from a specific cache file path.
+    ///
+    /// This method provides the same behavior as [`load`](Self::load), but
+    /// allows specifying a custom cache file path. This is useful for:
+    /// - Testing with temporary cache files
+    /// - Custom deployment scenarios
+    /// - Isolated cache locations
+    ///
+    /// # Parameters
+    ///
+    /// * `path` - The path to the cache file. Parent directories will be
+    ///   created if they don't exist.
+    ///
+    /// # Returns
+    ///
+    /// A [`CatalogLoadResult`] containing the loaded catalog and source
+    /// information.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`CatalogError`] under the same conditions as [`load`](Self::load),
+    /// plus:
+    /// - The parent directory cannot be created
+    /// - The path is not a valid file path
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use llm_coding_tools_models_dev::ModelsDevCatalog;
+    /// use std::path::PathBuf;
+    ///
+    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
+    /// let cache_path = PathBuf::from("/tmp/my-cache.cache");
+    /// let result = ModelsDevCatalog::load_at(&cache_path).await?;
+    ///
+    /// // Use the catalog
+    /// if let Some(entry) = result.catalog.lookup("openai", "gpt-4") {
+    ///     println!("API URL: {}", entry.0.api_url);
+    /// }
+    /// # Ok(())
+    /// # }
+    /// ```
+    pub async fn load_at(path: impl AsRef<Path>) -> Result<CatalogLoadResult, CatalogError> {
+        let _path = path.as_ref();
+        todo!("ModelsDevCatalog::load_at() not yet implemented")
+    }
+}
diff --git a/src/llm-coding-tools-models-dev/src/error.rs b/src/llm-coding-tools-models-dev/src/error.rs
new file mode 100644
index 00000000..daad6d28
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/error.rs
@@ -0,0 +1,36 @@
+//! Error types for models.dev catalog operations.
+
+use llm_coding_tools_core::models::ModelCatalogBuildError;
+use thiserror::Error;
+
+/// Errors that can occur during catalog loading and synchronization.
+#[derive(Debug, Error)]
+pub enum CatalogError {
+    /// The platform's cache directory could not be determined.
+    #[error("cache directory not found on this platform")]
+    CachePathNotFound,
+
+    /// A configuration error occurred (e.g., invalid environment variable).
+    #[error("configuration error: {0}")]
+    Configuration(String),
+
+    /// An I/O error occurred while reading or writing the cache.
+    #[error("I/O error: {0}")]
+    Io(#[from] std::io::Error),
+
+    /// An HTTP error occurred during the sync request.
+    #[error("HTTP error: {0}")]
+    Reqwest(#[from] reqwest::Error),
+
+    /// A zstd decompression error occurred.
+    #[error("decompression error: {0}")]
+    Zstd(String),
+
+    /// A bitcode deserialization error occurred.
+    #[error("decode error: {0}")]
+    BitcodeDecode(String),
+
+    /// The catalog failed to build from source rows.
+    #[error("catalog build error: {0}")]
+    ModelCatalogBuild(#[from] ModelCatalogBuildError),
+}
diff --git a/src/llm-coding-tools-models-dev/src/lib.rs b/src/llm-coding-tools-models-dev/src/lib.rs
new file mode 100644
index 00000000..82e92d1b
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/lib.rs
@@ -0,0 +1,9 @@
+#![doc = include_str!(concat!("../", env!("CARGO_PKG_README")))]
+
+pub mod cache;
+pub mod catalog;
+pub mod error;
+
+pub use cache::shared_cache_path;
+pub use catalog::{CatalogLoadResult, CatalogLoadSource, ModelsDevCatalog};
+pub use error::CatalogError;

From 4342fe1b8bb329f5df9ac96631d8c6864e2fc2d3 Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Sat, 28 Feb 2026 15:56:13 +0000
Subject: [PATCH 02/22] Added: cache path resolution with env override and
 CatalogResult type alias

---
 .../src/cache/mod.rs                            |  1 +
 .../src/cache/path.rs                           | 17 ++++++++++++++---
 src/llm-coding-tools-models-dev/src/error.rs    |  3 +++
 src/llm-coding-tools-models-dev/src/lib.rs      |  2 +-
 4 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/llm-coding-tools-models-dev/src/cache/mod.rs b/src/llm-coding-tools-models-dev/src/cache/mod.rs
index 9150d79d..e1ae9be5 100644
--- a/src/llm-coding-tools-models-dev/src/cache/mod.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/mod.rs
@@ -5,4 +5,5 @@
 
 mod path;
 
+pub use crate::error::CatalogResult;
 pub use path::shared_cache_path;
diff --git a/src/llm-coding-tools-models-dev/src/cache/path.rs b/src/llm-coding-tools-models-dev/src/cache/path.rs
index d8841147..06a43a7e 100644
--- a/src/llm-coding-tools-models-dev/src/cache/path.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/path.rs
@@ -2,7 +2,7 @@
 
 #![allow(dead_code)]
 
-use crate::error::CatalogError;
+use crate::{error::CatalogResult, CatalogError};
 use std::path::PathBuf;
 
 /// Environment variable name for overriding the default cache path.
@@ -43,6 +43,17 @@ pub const CACHE_PATH_ENV_VAR: &str = "LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH";
 /// # Ok(())
 /// # }
 /// ```
-pub fn shared_cache_path() -> Result<PathBuf, CatalogError> {
-    todo!("shared_cache_path() not yet implemented")
+const CACHE_SUBDIR: &str = "llm-coding-tools";
+const CACHE_FILENAME: &str = "models.dev.catalog.v1.cache";
+
+pub fn shared_cache_path() -> CatalogResult<PathBuf> {
+    // 1. Check env var first
+    if let Ok(path) = std::env::var(CACHE_PATH_ENV_VAR) {
+        return Ok(PathBuf::from(path));
+    }
+
+    // 2. Fall back to dirs::cache_dir()
+    let cache_dir = dirs::cache_dir().ok_or(CatalogError::CachePathNotFound)?;
+
+    Ok(cache_dir.join(CACHE_SUBDIR).join(CACHE_FILENAME))
 }
diff --git a/src/llm-coding-tools-models-dev/src/error.rs b/src/llm-coding-tools-models-dev/src/error.rs
index daad6d28..02f1b5c9 100644
--- a/src/llm-coding-tools-models-dev/src/error.rs
+++ b/src/llm-coding-tools-models-dev/src/error.rs
@@ -34,3 +34,6 @@ pub enum CatalogError {
     #[error("catalog build error: {0}")]
     ModelCatalogBuild(#[from] ModelCatalogBuildError),
 }
+
+/// Convenience type alias for catalog operations.
+pub type CatalogResult<T> = Result<T, CatalogError>;
diff --git a/src/llm-coding-tools-models-dev/src/lib.rs b/src/llm-coding-tools-models-dev/src/lib.rs
index 82e92d1b..a635ec71 100644
--- a/src/llm-coding-tools-models-dev/src/lib.rs
+++ b/src/llm-coding-tools-models-dev/src/lib.rs
@@ -6,4 +6,4 @@ pub mod error;
 
 pub use cache::shared_cache_path;
 pub use catalog::{CatalogLoadResult, CatalogLoadSource, ModelsDevCatalog};
-pub use error::CatalogError;
+pub use error::{CatalogError, CatalogResult};

From cd905182eb398839dc9a5ce6bd394791f1ddc6d6 Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Sat, 28 Feb 2026 20:50:47 +0000
Subject: [PATCH 03/22] Added: models-dev cache container and blocking runtime
 support

---
 .github/workflows/rust.yml                    |   3 +
 src/.cargo/verify.ps1                         |   5 +-
 src/.cargo/verify.sh                          |   5 +-
 src/Cargo.lock                                |  51 +++
 src/llm-coding-tools-models-dev/Cargo.toml    |  22 +-
 src/llm-coding-tools-models-dev/README.md     |  59 ++-
 .../src/cache/format.rs                       | 384 ++++++++++++++++++
 .../src/cache/mod.rs                          |  17 +-
 .../src/cache/path.rs                         |   8 +-
 .../src/catalog/mod.rs                        |  24 ++
 src/llm-coding-tools-models-dev/src/error.rs  |   4 +
 .../src/fs/blocking_impl.rs                   |  34 ++
 src/llm-coding-tools-models-dev/src/fs/mod.rs |  45 ++
 .../src/fs/tokio_impl.rs                      |  35 ++
 src/llm-coding-tools-models-dev/src/lib.rs    |   8 +
 15 files changed, 673 insertions(+), 31 deletions(-)
 create mode 100644 src/llm-coding-tools-models-dev/src/cache/format.rs
 create mode 100644 src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs
 create mode 100644 src/llm-coding-tools-models-dev/src/fs/mod.rs
 create mode 100644 src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index f24cfd1f..ee44bdc9 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -127,6 +127,7 @@ jobs:
           use-cross: ${{ matrix.use-cross }}
           packages: |
             llm-coding-tools-core
+            llm-coding-tools-models-dev
           no-default-features: true
           features: "blocking"
 
@@ -137,12 +138,14 @@ jobs:
           RUSTDOCFLAGS: "-D warnings"
         run: |
           cargo doc -p llm-coding-tools-core --no-default-features --features blocking --document-private-items --no-deps --target ${{ matrix.target }}
+          cargo doc -p llm-coding-tools-models-dev --no-default-features --features blocking --document-private-items --no-deps --target ${{ matrix.target }}
 
       - name: Run linter (Blocking)
         if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/')
         working-directory: src
         run: |
           cargo clippy -p llm-coding-tools-core --no-default-features --features blocking --target ${{ matrix.target }} -- -D warnings
+          cargo clippy -p llm-coding-tools-models-dev --no-default-features --features blocking --target ${{ matrix.target }} -- -D warnings
 
       - name: Run formatter check
         uses: actions-rust-lang/rustfmt@v1
diff --git a/src/.cargo/verify.ps1 b/src/.cargo/verify.ps1
index b90d0507..912389f1 100644
--- a/src/.cargo/verify.ps1
+++ b/src/.cargo/verify.ps1
@@ -2,8 +2,8 @@
 # All steps must pass without warnings
 # Keep in sync with verify.sh
 #
-# Note: llm-coding-tools-serdesai and llm-coding-tools-models-dev are async-only.
-# The blocking feature only applies to llm-coding-tools-core.
+# Note: llm-coding-tools-serdesai is async-only.
+# Blocking mode is validated for core and models-dev.
 
 $ErrorActionPreference = "Stop"
 
@@ -51,6 +51,7 @@ Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-models-dev", "-
 
 Write-Host "Testing blocking feature..."
 Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-core", "--no-default-features", "--features", "blocking", "--quiet")
+Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-models-dev", "--no-default-features", "--features", "blocking", "--quiet")
 
 Write-Host "Docs..."
 $originalRustdocFlags = $env:RUSTDOCFLAGS
diff --git a/src/.cargo/verify.sh b/src/.cargo/verify.sh
index a6f4f416..eefd0e0b 100755
--- a/src/.cargo/verify.sh
+++ b/src/.cargo/verify.sh
@@ -3,8 +3,8 @@
 # All steps must pass without warnings
 # Keep in sync with verify.ps1
 #
-# Note: llm-coding-tools-serdesai and llm-coding-tools-models-dev are async-only.
-# The blocking feature only applies to llm-coding-tools-core.
+# Note: llm-coding-tools-serdesai is async-only.
+# Blocking mode is validated for core and models-dev.
 
 set -e
 
@@ -40,6 +40,7 @@ run_cmd cargo clippy -p llm-coding-tools-models-dev --quiet -- -D warnings
 
 echo "Testing blocking feature..."
 run_cmd cargo test -p llm-coding-tools-core --no-default-features --features blocking --quiet
+run_cmd cargo test -p llm-coding-tools-models-dev --no-default-features --features blocking --quiet
 
 echo "Docs..."
 run_cmd env RUSTDOCFLAGS="-D warnings" cargo doc --workspace --no-deps --quiet
diff --git a/src/Cargo.lock b/src/Cargo.lock
index 08986572..25cb087f 100644
--- a/src/Cargo.lock
+++ b/src/Cargo.lock
@@ -360,6 +360,16 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "concat-idents"
+version = "1.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f76990911f2267d837d9d0ad060aa63aaad170af40904b29461734c339030d4d"
+dependencies = [
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "const-random"
 version = "0.1.18"
@@ -652,6 +662,29 @@ dependencies = [
  "encoding_rs",
 ]
 
+[[package]]
+name = "endian-writer"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5fba6714ed232b3a46d07255c9cb2d20e9a8aee06a20d5d2e3eb4e2b48d28ae"
+dependencies = [
+ "concat-idents",
+ "paste",
+]
+
+[[package]]
+name = "endian-writer-derive"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "160e7b32d1a63d6f02993f5ce2da2b7125480ae40c45d9a0b74d158f203f7e53"
+dependencies = [
+ "endian-writer",
+ "memoffset",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "equivalent"
 version = "1.0.2"
@@ -1468,7 +1501,10 @@ version = "0.1.0"
 dependencies = [
  "bitcode",
  "dirs",
+ "endian-writer",
+ "endian-writer-derive",
  "llm-coding-tools-core",
+ "maybe-async",
  "reqwest 0.13.1",
  "serde",
  "tempfile",
@@ -1562,6 +1598,15 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "memoffset"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
+dependencies = [
+ "autocfg",
+]
+
 [[package]]
 name = "mime"
 version = "0.3.17"
@@ -1673,6 +1718,12 @@ dependencies = [
  "windows-link",
 ]
 
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
 [[package]]
 name = "percent-encoding"
 version = "2.3.2"
diff --git a/src/llm-coding-tools-models-dev/Cargo.toml b/src/llm-coding-tools-models-dev/Cargo.toml
index 5ee01d31..4aab2702 100644
--- a/src/llm-coding-tools-models-dev/Cargo.toml
+++ b/src/llm-coding-tools-models-dev/Cargo.toml
@@ -10,14 +10,21 @@ readme = "README.md"
 
 [features]
 default = ["tokio"]
+# Base async signatures (enabled by runtime features)
+async = []
 # Async with tokio runtime
-tokio = ["dep:tokio", "dep:reqwest"]
+tokio = ["async", "dep:tokio", "dep:reqwest", "llm-coding-tools-core/tokio"]
+# Blocking/sync mode - mutually exclusive with tokio/async
+blocking = [
+    "dep:reqwest",
+    "reqwest/blocking",
+    "llm-coding-tools-core/blocking",
+    "maybe-async/is_sync",
+]
 
 [dependencies]
 # Core library for ModelCatalog and related types
-llm-coding-tools-core = { path = "../llm-coding-tools-core", version = "0.2.0", features = [
-    "tokio",
-] }
+llm-coding-tools-core = { path = "../llm-coding-tools-core", version = "0.2.0", default-features = false }
 
 # Cross-platform cache directory detection
 dirs = "6.0.0"
@@ -34,6 +41,13 @@ bitcode = "0.6.9"
 # Compression for cache payload
 zstd = "0.13.3"
 
+# Shared async/sync implementation for load/cache APIs
+maybe-async = "0.2"
+
+# Endian-aware fixed-header serialization helpers
+endian-writer = "2.2.0"
+endian-writer-derive = "0.1.0"
+
 # JSON parsing for models.dev API responses
 serde = { version = "1.0.228", features = ["derive"] }
 
diff --git a/src/llm-coding-tools-models-dev/README.md b/src/llm-coding-tools-models-dev/README.md
index 8ea3ad47..353a16e5 100644
--- a/src/llm-coding-tools-models-dev/README.md
+++ b/src/llm-coding-tools-models-dev/README.md
@@ -1,32 +1,32 @@
 # llm-coding-tools-models-dev
 
-models.dev catalog ingestion with online-first sync and local cache fallback.
-
-This crate loads provider/model data from models.dev and builds a
-`llm_coding_tools_core::models::ModelCatalog`.
+Reads the online models.dev catalog into llm-coding-tools-core; with support
+for a cached fallback and caching via ETag(s).
 
 ## Why this exists
 
-If you run coding agents against many providers, you usually want all three:
+If you run coding agents against many providers, you want to have fresh data.
+[models.dev][models.dev] is one such source of data.
 
-- Fresh data when online.
-- A reliable fallback when offline.
-- A compact cache that is cheap to load.
+This crate has the sufficient code to download from models.dev, distill down only
+the relevant data we need; and create a llm_coding_tools_core `ModelCatalog`.
 
-That is the core goal here.
+## Usage
 
-## What it does
+### Load flow (simple)
 
-- **Online-first sync**: Sends conditional requests with `If-None-Match` and reuses cache on `304 Not Modified`.
-- **Implicit fallback**: If network sync fails, loads the last valid cache automatically.
-- **Compact storage**: Stores cache as prelude + ETag + `zstd(bitcode(payload))`.
-- **Minimal API**: Exposes `ModelsDevCatalog::load()` and `ModelsDevCatalog::load_at(...)`.
+1. Read cache header (if present) and get the old ETag.
+2. Send request to models.dev with `If-None-Match` when ETag exists.
+3. If server returns `304 Not Modified`, load catalog from cache.
+4. If server returns `200 OK`, parse and normalize JSON, write fresh cache, then build catalog.
+5. If network fails, try cached data as fallback; if no valid cache exists, return an error.
 
-## Usage
+### Non-blocking (`tokio`)
 
 ```rust
 use llm_coding_tools_models_dev::{CatalogLoadSource, ModelsDevCatalog};
 
+#[cfg(feature = "tokio")]
 async fn load_catalog() -> Result<(), Box<dyn std::error::Error>> {
     let result = ModelsDevCatalog::load().await?;
 
@@ -45,6 +45,30 @@ async fn load_catalog() -> Result<(), Box<dyn std::error::Error>> {
 }
 ```
 
+### Blocking (`blocking`)
+
+```rust
+use llm_coding_tools_models_dev::{CatalogLoadSource, ModelsDevCatalog};
+
+#[cfg(feature = "blocking")]
+fn load_catalog() -> Result<(), Box<dyn std::error::Error>> {
+    let result = ModelsDevCatalog::load()?;
+
+    match result.source {
+        CatalogLoadSource::Downloaded => println!("Downloaded fresh snapshot."),
+        CatalogLoadSource::NotModifiedCache => println!("Cache is already up to date."),
+        CatalogLoadSource::FallbackCache => println!("Network unavailable, using cached snapshot."),
+    }
+
+    if let Some(entry) = result.catalog.lookup("openai", "gpt-4") {
+        println!("provider api url: {}", entry.0.api_url);
+        println!("max input tokens: {}", entry.1.max_input);
+    }
+
+    Ok(())
+}
+```
+
 ## Cache location
 
 By default, cache is stored in the platform cache directory:
@@ -58,7 +82,12 @@ Set `LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH` to override this path.
 ## Feature flags
 
 - `tokio` (default): async runtime support.
+- `blocking`: synchronous runtime support.
+
+Exactly one runtime mode must be enabled.
 
 ## License
 
 Apache-2.0
+
+[models.dev]: https://models.dev
diff --git a/src/llm-coding-tools-models-dev/src/cache/format.rs b/src/llm-coding-tools-models-dev/src/cache/format.rs
new file mode 100644
index 00000000..4f910404
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/cache/format.rs
@@ -0,0 +1,384 @@
+//! Cache container layout and read/write helpers.
+//!
+//! The on-disk layout for `models.dev.catalog.v1.cache` is:
+//!
+//! ```text
+//! [0..12)   12-byte fixed prelude:
+//!           - [0..4)   etag_len: u32 little-endian
+//!           - [4..8)   payload_len_compressed: u32 little-endian
+//!           - [8..12)  payload_len_decompressed: u32 little-endian
+//! [12..N)   raw ETag bytes (etag_len bytes, may be 0)
+//! [N..EOF)  compressed payload (rest of file)
+//! ```
+//!
+//! Versioning is keyed by filename (`*.v1.cache`), so this prelude carries
+//! lengths only and no magic marker.
+//! `payload_len_compressed` is retained so reads can detect unexpected file
+//! truncation before decode.
+//!
+//! Read path intentionally keeps payload compressed. We read the whole file in
+//! one pre-sized allocation, then parse/slice into `prelude`, `etag`, and
+//! `payload` views without additional copying.
+//!
+//! ## Safety
+//!
+//! Not a 'safe' parser. We assume the file was created by the user.
+//! There's no validation for erroneous data; e.g. malociously crafted headers.
+//! Only validation for accidental corruption/truncation (e.g., from partial writes) is included.
+
+use crate::{
+    error::{CatalogError, CatalogResult},
+    fs,
+};
+use endian_writer::{EndianReader, EndianWriter, HasSize, LittleEndianReader, LittleEndianWriter};
+use endian_writer_derive::EndianWritable;
+use std::mem::size_of;
+use std::path::Path;
+use std::ptr::copy_nonoverlapping;
+
+/// Fixed v1 prelude, encoded little-endian.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, EndianWritable)]
+#[repr(C)]
+struct CachePreludeV1 {
+    /// Length in bytes of the optional ETag block.
+    etag_len: u32,
+    /// Length in bytes of compressed payload as written to disk.
+    payload_len_compressed: u32,
+    /// Length in bytes after decompression.
+    payload_len_decompressed: u32,
+}
+
+/// Input parameters for writing a cache container.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(crate) struct CacheWriteInput<'a> {
+    /// Optional ETag bytes (e.g., HTTP ETag value).
+    pub(crate) etag: Option<&'a [u8]>,
+    /// Compressed payload bytes.
+    pub(crate) payload_compressed: &'a [u8],
+    /// Expected decompressed payload length in bytes.
+    pub(crate) payload_len_decompressed: usize,
+}
+
+/// Fixed prelude size for v1.
+const CACHE_HEADER_LEN: usize = <CachePreludeV1 as HasSize>::SIZE;
+
+// SAFETY: All modern platforms have usize >= 32 bits.
+// This lets us safely cast u32 lengths to usize without checked arithmetic.
+const _: () = assert!(size_of::<usize>() >= size_of::<u32>());
+
+/// Raw cache blocks extracted from disk.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(crate) struct CacheFileData {
+    /// Prefix length of ETag bytes after the fixed prelude.
+    etag_len: u32,
+    /// Length in bytes of compressed payload from prelude.
+    payload_len_compressed: u32,
+    /// Size hint for the eventual decompressed payload allocation.
+    payload_len_decompressed: u32,
+    /// Full file bytes laid out as `prelude || etag || payload_compressed`.
+    file_bytes: Box<[u8]>,
+}
+
+impl CacheFileData {
+    /// Returns the optional ETag as a borrowed byte slice.
+    #[inline]
+    pub(crate) fn etag_bytes(&self) -> Option<&[u8]> {
+        let etag_start = CACHE_HEADER_LEN;
+        let etag_end = CACHE_HEADER_LEN + self.etag_len as usize;
+        let etag = &self.file_bytes[etag_start..etag_end];
+        if etag.is_empty() {
+            None
+        } else {
+            Some(etag)
+        }
+    }
+
+    /// Returns compressed payload bytes as a borrowed slice.
+    #[inline]
+    pub(crate) fn payload_compressed(&self) -> &[u8] {
+        let payload_start = CACHE_HEADER_LEN + self.etag_len as usize;
+        &self.file_bytes[payload_start..]
+    }
+
+    /// Returns compressed payload length in bytes.
+    #[inline]
+    pub(crate) fn payload_len_compressed(&self) -> u32 {
+        self.payload_len_compressed
+    }
+
+    /// Returns expected decompressed payload length in bytes.
+    #[inline]
+    pub(crate) fn payload_len_decompressed(&self) -> u32 {
+        self.payload_len_decompressed
+    }
+}
+
+/// Reads a cache container from disk.
+///
+/// This reads only the prelude + raw blocks and does not decompress payload.
+/// Compressed payload length is validated against prelude metadata to catch
+/// unexpected truncation or trailing bytes before decode.
+///
+/// # Errors
+///
+/// Returns [`CatalogError::CacheFormat`] when the prelude is truncated, when
+/// encoded lengths overflow platform limits, or when declared block lengths do not
+/// match file contents.
+#[maybe_async::maybe_async]
+pub(crate) async fn read_cache_file(path: &Path) -> CatalogResult<CacheFileData> {
+    let file_bytes = fs::read(path).await?;
+    if file_bytes.len() < CACHE_HEADER_LEN {
+        return Err(CatalogError::CacheFormat("cache prelude is truncated"));
+    }
+
+    let prelude = decode_prelude(&file_bytes[..CACHE_HEADER_LEN]);
+    let etag_len = prelude.etag_len as usize;
+    let payload_len_compressed = prelude.payload_len_compressed as usize;
+    let expected_total = CACHE_HEADER_LEN + etag_len + payload_len_compressed;
+
+    if file_bytes.len() != expected_total {
+        return Err(CatalogError::CacheFormat(
+            "cache file size mismatch (possible truncation or trailing data)",
+        ));
+    }
+
+    Ok(CacheFileData {
+        etag_len: prelude.etag_len,
+        payload_len_compressed: prelude.payload_len_compressed,
+        payload_len_decompressed: prelude.payload_len_decompressed,
+        file_bytes,
+    })
+}
+
+/// Writes a cache container to disk.
+///
+/// # Errors
+///
+/// Returns [`CatalogError::CacheFormat`] if a block length exceeds v1 `u32`
+/// limits.
+#[maybe_async::maybe_async]
+pub(crate) async fn write_cache_file(
+    path: &Path,
+    input: &CacheWriteInput<'_>,
+) -> CatalogResult<()> {
+    if let Some(parent) = path.parent() {
+        fs::create_dir_all(parent).await?;
+    }
+
+    let etag_bytes = input.etag.unwrap_or(&[]);
+    let prelude = CachePreludeV1 {
+        etag_len: to_u32_limit(etag_bytes.len(), "etag exceeds v1 length limits")?,
+        payload_len_compressed: to_u32_limit(
+            input.payload_compressed.len(),
+            "compressed payload exceeds v1 length limits",
+        )?,
+        payload_len_decompressed: to_u32_limit(
+            input.payload_len_decompressed,
+            "decompressed payload exceeds v1 length limits",
+        )?,
+    };
+
+    let encoded_prelude = encode_prelude(prelude);
+
+    let encoded_len = CACHE_HEADER_LEN
+        .checked_add(etag_bytes.len())
+        .and_then(|value| value.checked_add(input.payload_compressed.len()))
+        .ok_or(CatalogError::CacheFormat(
+            "cache file exceeds platform length limits",
+        ))?;
+
+    let mut uninit = fs::alloc_uninit_u8_slice(encoded_len);
+    let ptr = uninit.as_mut_ptr().cast::<u8>();
+
+    unsafe {
+        copy_nonoverlapping(encoded_prelude.as_ptr(), ptr, CACHE_HEADER_LEN);
+        copy_nonoverlapping(
+            etag_bytes.as_ptr(),
+            ptr.add(CACHE_HEADER_LEN),
+            etag_bytes.len(),
+        );
+        copy_nonoverlapping(
+            input.payload_compressed.as_ptr(),
+            ptr.add(CACHE_HEADER_LEN + etag_bytes.len()),
+            input.payload_compressed.len(),
+        );
+    }
+
+    let file_bytes = fs::assume_init_u8_slice(uninit);
+    fs::write(path, &file_bytes).await?;
+    Ok(())
+}
+
+#[inline]
+fn to_u32_limit(value: usize, msg: &'static str) -> CatalogResult<u32> {
+    u32::try_from(value).map_err(|_| CatalogError::CacheFormat(msg))
+}
+
+/// Encodes prelude into little-endian bytes.
+#[inline]
+fn encode_prelude(prelude: CachePreludeV1) -> [u8; CACHE_HEADER_LEN] {
+    let mut bytes = [0_u8; CACHE_HEADER_LEN];
+    // SAFETY: `bytes` has exactly the derived serialized size of `CachePreludeV1`.
+    unsafe {
+        let mut writer = LittleEndianWriter::new(bytes.as_mut_ptr());
+        writer.write(&prelude);
+    }
+    bytes
+}
+
+/// Decodes prelude from little-endian bytes.
+#[inline]
+fn decode_prelude(bytes: &[u8]) -> CachePreludeV1 {
+    // SAFETY: Caller guarantees `bytes` is at least `CACHE_PRELUDE_LEN`.
+    unsafe {
+        let mut reader = LittleEndianReader::new(bytes.as_ptr());
+        reader.read()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::TempDir;
+
+    // Verifies prelude encoding/decoding preserves all fields.
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn prelude_layout_round_trips() {
+        let prelude = CachePreludeV1 {
+            etag_len: 13,
+            payload_len_compressed: 44,
+            payload_len_decompressed: 333,
+        };
+
+        let round_trip = decode_prelude(&encode_prelude(prelude));
+        assert_eq!(round_trip, prelude);
+    }
+
+    // Verifies full round-trip with ETag included.
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn write_then_read_round_trips_with_etag() {
+        let temp = TempDir::new().expect("tempdir");
+        let path = temp.path().join("models.dev.catalog.v1.cache");
+
+        let input = CacheWriteInput {
+            etag: Some(b"etag-123"),
+            payload_compressed: b"payload-zstd-bytes",
+            payload_len_decompressed: 2048,
+        };
+        write_cache_file(&path, &input).await.expect("write cache");
+        let data = read_cache_file(&path).await.expect("read cache");
+
+        assert_eq!(data.etag_bytes(), input.etag);
+        assert_eq!(data.payload_compressed(), input.payload_compressed);
+        assert_eq!(
+            data.payload_len_compressed(),
+            input.payload_compressed.len() as u32
+        );
+        assert_eq!(
+            data.payload_len_decompressed(),
+            input.payload_len_decompressed as u32
+        );
+    }
+
+    // Verifies full round-trip without ETag.
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn write_then_read_round_trips_without_etag() {
+        let temp = TempDir::new().expect("tempdir");
+        let path = temp.path().join("models.dev.catalog.v1.cache");
+
+        let input = CacheWriteInput {
+            etag: None,
+            payload_compressed: b"payload-only",
+            payload_len_decompressed: 1024,
+        };
+        write_cache_file(&path, &input).await.expect("write cache");
+        let data = read_cache_file(&path).await.expect("read cache");
+
+        assert_eq!(data.etag_bytes(), input.etag);
+        assert_eq!(data.payload_compressed(), input.payload_compressed);
+        assert_eq!(
+            data.payload_len_decompressed(),
+            input.payload_len_decompressed as u32
+        );
+    }
+
+    // Rejects files shorter than the fixed header.
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn read_rejects_truncated_prelude() {
+        // File is 1 byte shorter than required header
+        let temp = TempDir::new().expect("tempdir");
+        let path = temp.path().join("short-prelude.cache");
+
+        std::fs::write(&path, [0_u8; CACHE_HEADER_LEN - 1]).expect("write fixture");
+        let error = read_cache_file(&path)
+            .await
+            .expect_err("truncated prelude should fail");
+        assert!(matches!(error, CatalogError::CacheFormat(_)));
+    }
+
+    // Rejects when file ends before etag_len bytes after header.
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn read_rejects_short_etag_length() {
+        // Header claims 12 bytes of etag but only 4 provided
+        let temp = TempDir::new().expect("tempdir");
+        let path = temp.path().join("short-etag.cache");
+
+        let prelude = CachePreludeV1 {
+            etag_len: 12,
+            payload_len_compressed: 0,
+            payload_len_decompressed: 0,
+        };
+        let mut bytes = encode_prelude(prelude).to_vec();
+        bytes.extend_from_slice(b"tiny"); // 'tiny' etag is 4 bytes
+        std::fs::write(&path, bytes).expect("write fixture");
+
+        // Header claims 12 bytes of etag but only 4 'tiny' provided, so 8 bytes short.
+        let error = read_cache_file(&path)
+            .await
+            .expect_err("short etag should fail");
+        assert!(matches!(error, CatalogError::CacheFormat(_)));
+    }
+
+    // Accepts minimal valid file with all zero-length fields.
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn read_supports_empty_etag_and_payload() {
+        let temp = TempDir::new().expect("tempdir");
+        let path = temp.path().join("empty.cache");
+
+        let prelude = CachePreludeV1 {
+            etag_len: 0,
+            payload_len_compressed: 0,
+            payload_len_decompressed: 0,
+        };
+        std::fs::write(&path, encode_prelude(prelude)).expect("write fixture");
+        let data = read_cache_file(&path).await.expect("read empty cache");
+
+        assert_eq!(data.etag_bytes(), None);
+        assert!(data.payload_compressed().is_empty());
+        assert_eq!(data.payload_len_compressed(), 0);
+        assert_eq!(data.payload_len_decompressed(), 0);
+    }
+
+    // Rejects when declared compressed payload length does not match file size.
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn read_rejects_mismatched_payload_length() {
+        // Header claims 10 bytes payload but only 5 provided
+        let temp = TempDir::new().expect("tempdir");
+        let path = temp.path().join("mismatched-payload-len.cache");
+
+        let prelude = CachePreludeV1 {
+            etag_len: 4,
+            payload_len_compressed: 10,
+            payload_len_decompressed: 0,
+        };
+        let mut bytes = encode_prelude(prelude).to_vec();
+        bytes.extend_from_slice(b"etag");
+        bytes.extend_from_slice(b"short"); // only 5 bytes, not 10 here.
+        std::fs::write(&path, bytes).expect("write fixture");
+
+        let error = read_cache_file(&path)
+            .await
+            .expect_err("payload length mismatch should fail");
+        assert!(matches!(error, CatalogError::CacheFormat(_)));
+    }
+}
diff --git a/src/llm-coding-tools-models-dev/src/cache/mod.rs b/src/llm-coding-tools-models-dev/src/cache/mod.rs
index e1ae9be5..5043b9a5 100644
--- a/src/llm-coding-tools-models-dev/src/cache/mod.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/mod.rs
@@ -1,8 +1,19 @@
-//! Cache path resolution and management.
+//! Cache path and container utilities for models.dev snapshots.
 //!
-//! This module handles cross-platform cache directory detection and
-//! the default cache file path for models.dev catalogs.
+//! Responsibilities are split by concern:
+//!
+//! - `path` resolves the shared cache location.
+//! - `format` defines the cache container layout and read/write helpers.
+//!
+//! Runtime behavior follows crate features:
+//! - `tokio` (default): async file I/O APIs.
+//! - `blocking`: sync file I/O APIs.
+//!
+//! The public API currently exposes path resolution only; container helpers are
+//! crate-internal until the sync/load flow is wired.
 
+#[allow(dead_code)] // Wired into the load/sync path down the road
+pub(crate) mod format;
 mod path;
 
 pub use crate::error::CatalogResult;
diff --git a/src/llm-coding-tools-models-dev/src/cache/path.rs b/src/llm-coding-tools-models-dev/src/cache/path.rs
index 06a43a7e..9aa5bf58 100644
--- a/src/llm-coding-tools-models-dev/src/cache/path.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/path.rs
@@ -1,13 +1,14 @@
 //! Cross-platform cache path resolution.
 
-#![allow(dead_code)]
-
 use crate::{error::CatalogResult, CatalogError};
 use std::path::PathBuf;
 
 /// Environment variable name for overriding the default cache path.
 pub const CACHE_PATH_ENV_VAR: &str = "LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH";
 
+const CACHE_SUBDIR: &str = "llm-coding-tools";
+const CACHE_FILENAME: &str = "models.dev.catalog.v1.cache";
+
 /// Returns the shared cache path for the models.dev catalog.
 ///
 /// This function determines the appropriate cache location using the following
@@ -43,9 +44,6 @@ pub const CACHE_PATH_ENV_VAR: &str = "LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH";
 /// # Ok(())
 /// # }
 /// ```
-const CACHE_SUBDIR: &str = "llm-coding-tools";
-const CACHE_FILENAME: &str = "models.dev.catalog.v1.cache";
-
 pub fn shared_cache_path() -> CatalogResult<PathBuf> {
     // 1. Check env var first
     if let Ok(path) = std::env::var(CACHE_PATH_ENV_VAR) {
diff --git a/src/llm-coding-tools-models-dev/src/catalog/mod.rs b/src/llm-coding-tools-models-dev/src/catalog/mod.rs
index 2a826c81..af2a864e 100644
--- a/src/llm-coding-tools-models-dev/src/catalog/mod.rs
+++ b/src/llm-coding-tools-models-dev/src/catalog/mod.rs
@@ -50,6 +50,7 @@ impl ModelsDevCatalog {
     /// ```
     /// use llm_coding_tools_models_dev::ModelsDevCatalog;
     ///
+    /// # #[cfg(feature = "tokio")]
     /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
     /// let result = ModelsDevCatalog::load().await?;
     ///
@@ -59,7 +60,18 @@ impl ModelsDevCatalog {
     /// }
     /// # Ok(())
     /// # }
+    ///
+    /// # #[cfg(feature = "blocking")]
+    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
+    /// # let result = ModelsDevCatalog::load()?;
+    /// // Use the catalog
+    /// # if let Some(entry) = result.catalog.lookup("openai", "gpt-4") {
+    /// #     println!("API URL: {}", entry.0.api_url);
+    /// # }
+    /// # Ok(())
+    /// # }
     /// ```
+    #[maybe_async::maybe_async]
     pub async fn load() -> Result<CatalogLoadResult, CatalogError> {
         todo!("ModelsDevCatalog::load() not yet implemented")
     }
@@ -95,6 +107,7 @@ impl ModelsDevCatalog {
     /// use llm_coding_tools_models_dev::ModelsDevCatalog;
     /// use std::path::PathBuf;
     ///
+    /// # #[cfg(feature = "tokio")]
     /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
     /// let cache_path = PathBuf::from("/tmp/my-cache.cache");
     /// let result = ModelsDevCatalog::load_at(&cache_path).await?;
@@ -105,7 +118,18 @@ impl ModelsDevCatalog {
     /// }
     /// # Ok(())
     /// # }
+    ///
+    /// # #[cfg(feature = "blocking")]
+    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
+    /// # let cache_path = PathBuf::from("/tmp/my-cache.cache");
+    /// # let result = ModelsDevCatalog::load_at(&cache_path)?;
+    /// # if let Some(entry) = result.catalog.lookup("openai", "gpt-4") {
+    /// #     println!("API URL: {}", entry.0.api_url);
+    /// # }
+    /// # Ok(())
+    /// # }
     /// ```
+    #[maybe_async::maybe_async]
     pub async fn load_at(path: impl AsRef<Path>) -> Result<CatalogLoadResult, CatalogError> {
         let _path = path.as_ref();
         todo!("ModelsDevCatalog::load_at() not yet implemented")
diff --git a/src/llm-coding-tools-models-dev/src/error.rs b/src/llm-coding-tools-models-dev/src/error.rs
index 02f1b5c9..c4da4a20 100644
--- a/src/llm-coding-tools-models-dev/src/error.rs
+++ b/src/llm-coding-tools-models-dev/src/error.rs
@@ -30,6 +30,10 @@ pub enum CatalogError {
     #[error("decode error: {0}")]
     BitcodeDecode(String),
 
+    /// The on-disk cache file is malformed or incompatible.
+    #[error("cache format error: {0}")]
+    CacheFormat(&'static str),
+
     /// The catalog failed to build from source rows.
     #[error("catalog build error: {0}")]
     ModelCatalogBuild(#[from] ModelCatalogBuildError),
diff --git a/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs
new file mode 100644
index 00000000..5bfbc808
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs
@@ -0,0 +1,34 @@
+//! Blocking/sync filesystem operations.
+
+use std::io::{ErrorKind, Read as _};
+use std::path::Path;
+
+/// Reads a file into memory in one pre-sized allocation.
+#[inline]
+pub(crate) fn read(path: impl AsRef<Path>) -> std::io::Result<Box<[u8]>> {
+    let mut file = std::fs::File::open(path)?;
+    let file_len_u64 = file.metadata()?.len();
+    let file_len = usize::try_from(file_len_u64).map_err(|_| {
+        std::io::Error::new(ErrorKind::InvalidData, "file is too large to fit in memory")
+    })?;
+
+    let mut bytes = super::alloc_uninit_u8_slice(file_len);
+    if file_len != 0 {
+        let buf = super::uninit_u8_slice_as_mut_bytes(&mut bytes);
+        file.read_exact(buf)?;
+    }
+
+    Ok(super::assume_init_u8_slice(bytes))
+}
+
+/// Writes all bytes to a file, creating or truncating it.
+#[inline]
+pub(crate) fn write(path: impl AsRef<Path>, bytes: &[u8]) -> std::io::Result<()> {
+    std::fs::write(path, bytes)
+}
+
+/// Creates a directory and all parent directories.
+#[inline]
+pub(crate) fn create_dir_all(path: impl AsRef<Path>) -> std::io::Result<()> {
+    std::fs::create_dir_all(path)
+}
diff --git a/src/llm-coding-tools-models-dev/src/fs/mod.rs b/src/llm-coding-tools-models-dev/src/fs/mod.rs
new file mode 100644
index 00000000..e08a6304
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/fs/mod.rs
@@ -0,0 +1,45 @@
+//! Filesystem abstraction layer.
+//!
+//! Provides unified APIs that work with both sync and async runtimes.
+//! Exactly one runtime feature must be enabled:
+//! - `tokio`: Async operations using the tokio runtime
+//! - `blocking`: Synchronous operations
+
+use std::mem::MaybeUninit;
+
+#[cfg(all(feature = "tokio", feature = "blocking"))]
+compile_error!("Features `tokio` and `blocking` are mutually exclusive.");
+
+#[cfg(not(any(feature = "tokio", feature = "blocking")))]
+compile_error!("Either `tokio` or `blocking` feature must be enabled for the fs module.");
+
+/// Allocates an uninitialized boxed byte slice with logical length `len`.
+#[inline]
+pub(crate) fn alloc_uninit_u8_slice(len: usize) -> Box<[MaybeUninit<u8>]> {
+    Box::<[u8]>::new_uninit_slice(len)
+}
+
+/// Views an uninitialized `u8` slice as mutable bytes for initialization.
+#[inline]
+pub(crate) fn uninit_u8_slice_as_mut_bytes(bytes: &mut [MaybeUninit<u8>]) -> &mut [u8] {
+    // SAFETY: `MaybeUninit<u8>` has identical layout to `u8`; caller only uses
+    // returned slice for writes before reading.
+    unsafe { std::slice::from_raw_parts_mut(bytes.as_mut_ptr().cast::<u8>(), bytes.len()) }
+}
+
+/// Converts a fully-initialized boxed uninitialized slice into initialized bytes.
+#[inline]
+pub(crate) fn assume_init_u8_slice(bytes: Box<[MaybeUninit<u8>]>) -> Box<[u8]> {
+    // SAFETY: caller guarantees all bytes were initialized.
+    unsafe { bytes.assume_init() }
+}
+
+#[cfg(feature = "tokio")]
+mod tokio_impl;
+#[cfg(feature = "tokio")]
+pub(crate) use tokio_impl::*;
+
+#[cfg(feature = "blocking")]
+mod blocking_impl;
+#[cfg(feature = "blocking")]
+pub(crate) use blocking_impl::*;
diff --git a/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs
new file mode 100644
index 00000000..53474ce1
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs
@@ -0,0 +1,35 @@
+//! Tokio-based async filesystem operations.
+
+use std::io::ErrorKind;
+use std::path::Path;
+use tokio::io::AsyncReadExt as _;
+
+/// Reads a file into memory in one pre-sized allocation.
+#[inline]
+pub(crate) async fn read(path: impl AsRef<Path>) -> std::io::Result<Box<[u8]>> {
+    let mut file = tokio::fs::File::open(path).await?;
+    let file_len_u64 = file.metadata().await?.len();
+    let file_len = usize::try_from(file_len_u64).map_err(|_| {
+        std::io::Error::new(ErrorKind::InvalidData, "file is too large to fit in memory")
+    })?;
+
+    let mut bytes = super::alloc_uninit_u8_slice(file_len);
+    if file_len != 0 {
+        let buf = super::uninit_u8_slice_as_mut_bytes(&mut bytes);
+        file.read_exact(buf).await?;
+    }
+
+    Ok(super::assume_init_u8_slice(bytes))
+}
+
+/// Writes all bytes to a file, creating or truncating it.
+#[inline]
+pub(crate) async fn write(path: impl AsRef<Path>, bytes: &[u8]) -> std::io::Result<()> {
+    tokio::fs::write(path, bytes).await
+}
+
+/// Creates a directory and all parent directories.
+#[inline]
+pub(crate) async fn create_dir_all(path: impl AsRef<Path>) -> std::io::Result<()> {
+    tokio::fs::create_dir_all(path).await
+}
diff --git a/src/llm-coding-tools-models-dev/src/lib.rs b/src/llm-coding-tools-models-dev/src/lib.rs
index a635ec71..0cdd10f5 100644
--- a/src/llm-coding-tools-models-dev/src/lib.rs
+++ b/src/llm-coding-tools-models-dev/src/lib.rs
@@ -1,8 +1,16 @@
 #![doc = include_str!(concat!("../", env!("CARGO_PKG_README")))]
 
+// Validate feature combinations at compile time.
+#[cfg(all(feature = "async", feature = "blocking"))]
+compile_error!("Features `async` and `blocking` are mutually exclusive.");
+
+#[cfg(not(any(feature = "async", feature = "blocking")))]
+compile_error!("Either an async runtime (e.g., `tokio`) or `blocking` feature must be enabled.");
+
 pub mod cache;
 pub mod catalog;
 pub mod error;
+mod fs;
 
 pub use cache::shared_cache_path;
 pub use catalog::{CatalogLoadResult, CatalogLoadSource, ModelsDevCatalog};

From a4eeca37db971813cce4323b1ae875ab718a5e9d Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Fri, 6 Mar 2026 14:23:27 +0000
Subject: [PATCH 04/22] Refactor: Use vec![] instead of with_capacity + push in
 test

---
 .../src/models/catalog/internal/builder.rs                   | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs
index eb6b1fe5..3605bede 100644
--- a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs
@@ -730,12 +730,11 @@ mod tests {
             ));
         }
         let last_provider_key = format!("provider_{}", 5461usize);
-        let mut provider_models = Vec::with_capacity(1);
-        provider_models.push(provider_model_source(
+        let provider_models = vec![ProviderModelSource::new(
             &last_provider_key,
             "m1",
             info(4096, 512),
-        ));
+        )];
 
         let catalog =
             build_from_source(&providers, &provider_models).expect("boundary case should pass");

From d44a4d2e4699803e1b5eee3319dc8ebe66d26d0c Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Fri, 6 Mar 2026 18:19:41 +0000
Subject: [PATCH 05/22] Changed: Use borrowed strings in ProviderModelSource
 and add models.dev API mapping

Optimize catalog construction by borrowing string keys instead of allocating, and add direct API parsing for models.dev data.

Changes:
- ProviderModelSource now borrows `&'a str` keys instead of owning `String`s
- Added api module with schema parsing and catalog source mapping
- Added serde_json dependency for API parsing
- Updated benchmarks to work with borrowed strings
- Refined documentation to reflect mapping approach

Benefits:
- Reduces allocations during catalog construction
- Enables direct mapping from models.dev API to catalog
- Keeps parse memory bounded by ignoring unused fields
---
 src/Cargo.lock                                |   1 +
 .../benches/model_catalog_builder.rs          |  48 +-
 .../src/models/catalog/internal/builder.rs    |  31 +-
 .../src/models/catalog/mod.rs                 |   4 +-
 .../models/catalog/public/builder_types.rs    |  31 +-
 src/llm-coding-tools-models-dev/Cargo.toml    |   1 +
 src/llm-coding-tools-models-dev/README.md     |  10 +-
 .../src/api/catalog_sources.rs                | 526 ++++++++++++++++++
 .../src/api/mod.rs                            |  12 +
 .../src/api/schema.rs                         | 153 +++++
 .../src/cache/mod.rs                          |   2 +-
 .../src/catalog/mod.rs                        |   5 +-
 src/llm-coding-tools-models-dev/src/error.rs  |   4 +
 src/llm-coding-tools-models-dev/src/lib.rs    |   2 +
 14 files changed, 778 insertions(+), 52 deletions(-)
 create mode 100644 src/llm-coding-tools-models-dev/src/api/catalog_sources.rs
 create mode 100644 src/llm-coding-tools-models-dev/src/api/mod.rs
 create mode 100644 src/llm-coding-tools-models-dev/src/api/schema.rs

diff --git a/src/Cargo.lock b/src/Cargo.lock
index 25cb087f..7dee5401 100644
--- a/src/Cargo.lock
+++ b/src/Cargo.lock
@@ -1507,6 +1507,7 @@ dependencies = [
  "maybe-async",
  "reqwest 0.13.1",
  "serde",
+ "serde_json",
  "tempfile",
  "thiserror 2.0.18",
  "tokio",
diff --git a/src/llm-coding-tools-core/benches/model_catalog_builder.rs b/src/llm-coding-tools-core/benches/model_catalog_builder.rs
index 4aedbac3..b9fcdd7d 100644
--- a/src/llm-coding-tools-core/benches/model_catalog_builder.rs
+++ b/src/llm-coding-tools-core/benches/model_catalog_builder.rs
@@ -7,9 +7,32 @@ use llm_coding_tools_core::models::{
     ProviderType,
 };
 
+struct ProviderModelSpec {
+    provider_idx: usize,
+    model_key: String,
+    model: ModelInfo,
+}
+
 struct Dataset {
     providers: Vec<ProviderSource>,
-    provider_models: Vec<ProviderModelSource>,
+    provider_models: Vec<ProviderModelSpec>,
+}
+
+impl Dataset {
+    fn provider_model_sources(&self) -> Vec<ProviderModelSource<'_>> {
+        let mut sources = Vec::with_capacity(self.provider_models.len());
+        for provider_model in &self.provider_models {
+            let provider_key = self.providers[provider_model.provider_idx]
+                .provider_key
+                .as_str();
+            sources.push(ProviderModelSource::new(
+                provider_key,
+                provider_model.model_key.as_str(),
+                provider_model.model,
+            ));
+        }
+        sources
+    }
 }
 
 fn make_dataset(provider_count: usize, model_count: usize) -> Dataset {
@@ -47,17 +70,17 @@ fn make_dataset(provider_count: usize, model_count: usize) -> Dataset {
             None
         };
 
-        provider_models.push(ProviderModelSource::new(
-            format!("provider-{provider_idx}"),
-            format!("org-{}/model-{i}", i % 17),
-            ModelInfo {
+        provider_models.push(ProviderModelSpec {
+            provider_idx,
+            model_key: format!("org-{}/model-{i}", i % 17),
+            model: ModelInfo {
                 modalities: Modality::TEXT,
                 max_input: 4096 + ((cfg as u32) * 32),
                 max_output: 512 + ((cfg as u32) * 8),
                 temperature,
                 top_p,
             },
-        ));
+        });
     }
 
     Dataset {
@@ -66,9 +89,8 @@ fn make_dataset(provider_count: usize, model_count: usize) -> Dataset {
     }
 }
 
-fn construct_batch(dataset: &Dataset) {
-    let catalog =
-        ModelCatalog::build(&dataset.providers, &dataset.provider_models).expect("batch build");
+fn construct_batch(providers: &[ProviderSource], provider_models: &[ProviderModelSource<'_>]) {
+    let catalog = ModelCatalog::build(providers, provider_models).expect("batch build");
 
     black_box((
         catalog.provider_count(),
@@ -85,12 +107,18 @@ fn benchmark_builder_construction(c: &mut Criterion) {
         ("max", 16384usize, 65535usize),
     ] {
         let dataset = make_dataset(provider_count, model_count);
+        let provider_model_sources = dataset.provider_model_sources();
         group.throughput(Throughput::Elements(
             (provider_count + dataset.provider_models.len()) as u64,
         ));
 
         group.bench_with_input(BenchmarkId::new("batch", name), &dataset, |b, input| {
-            b.iter(|| construct_batch(black_box(input)))
+            b.iter(|| {
+                construct_batch(
+                    black_box(&input.providers),
+                    black_box(&provider_model_sources),
+                )
+            })
         });
     }
 
diff --git a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs
index 3605bede..02a412d4 100644
--- a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs
@@ -68,7 +68,7 @@ fn build_state_with_capacity(
 #[inline]
 pub(crate) fn build_from_source(
     providers: &[ProviderSource],
-    provider_models: &[ProviderModelSource],
+    provider_models: &[ProviderModelSource<'_>],
 ) -> Result<ModelCatalog, ModelCatalogBuildError> {
     let provider_stats = analyze_provider_sources(providers)?;
     let mut state = build_state_with_capacity(provider_stats.provider_count, provider_models.len());
@@ -90,7 +90,7 @@ pub(crate) fn build_from_source(
 fn populate_tables_once(
     state: &mut BuildState,
     providers: &[ProviderSource],
-    provider_models: &[ProviderModelSource],
+    provider_models: &[ProviderModelSource<'_>],
 ) -> Result<(), ModelCatalogBuildError> {
     let mut env_start: u16 = 0;
     let mut provider_idx_by_key: AHashMap<&str, ProviderIdx> =
@@ -127,18 +127,15 @@ fn populate_tables_once(
 
     for provider_model in provider_models {
         // Validate provider exists before inserting model.
-        if !provider_idx_by_key.contains_key(provider_model.provider_key.as_str()) {
+        if !provider_idx_by_key.contains_key(provider_model.provider_key) {
             return Err(ModelCatalogBuildError::ProviderKeyNotFoundForModel {
-                provider_key: provider_model.provider_key.clone(),
-                model_key: provider_model.model_key.clone(),
+                provider_key: provider_model.provider_key.to_owned(),
+                model_key: provider_model.model_key.to_owned(),
             });
         }
 
         // Check for duplicate (provider_key, model_key) pair.
-        let key = (
-            provider_model.provider_key.as_str(),
-            provider_model.model_key.as_str(),
-        );
+        let key = (provider_model.provider_key, provider_model.model_key);
         if !seen_provider_models.insert(key) {
             return Err(ModelCatalogBuildError::DuplicateKey {
                 table: LookupTableKind::ProviderModel,
@@ -197,7 +194,7 @@ fn insert_provider(
 #[inline]
 fn insert_provider_model(
     state: &mut BuildState,
-    provider_model: &ProviderModelSource,
+    provider_model: &ProviderModelSource<'_>,
 ) -> Result<(), ModelCatalogBuildError> {
     let info = provider_model.model;
 
@@ -240,8 +237,8 @@ fn insert_provider_model(
 
     let key = hash_provider_model_key(
         &state.hash_state,
-        &provider_model.provider_key,
-        &provider_model.model_key,
+        provider_model.provider_key,
+        provider_model.model_key,
     );
     let hash48 = PackedProviderModelTableEntry::truncate_hash48(key.as_u64());
 
@@ -454,15 +451,15 @@ mod tests {
         ProviderSource::new(provider_key, provider)
     }
 
-    fn provider_model_source(
-        provider_key: &str,
-        model_key: &str,
+    fn provider_model_source<'a>(
+        provider_key: &'a str,
+        model_key: &'a str,
         model: ModelInfo,
-    ) -> ProviderModelSource {
+    ) -> ProviderModelSource<'a> {
         ProviderModelSource::new(provider_key, model_key, model)
     }
 
-    fn test_sources() -> (Vec<ProviderSource>, Vec<ProviderModelSource>) {
+    fn test_sources() -> (Vec<ProviderSource>, Vec<ProviderModelSource<'static>>) {
         (
             vec![provider_source(
                 "alpha",
diff --git a/src/llm-coding-tools-core/src/models/catalog/mod.rs b/src/llm-coding-tools-core/src/models/catalog/mod.rs
index 9d99f090..4d7095c6 100644
--- a/src/llm-coding-tools-core/src/models/catalog/mod.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/mod.rs
@@ -298,7 +298,7 @@ impl ModelCatalog {
     #[inline]
     pub fn build(
         providers: &[ProviderSource],
-        provider_models: &[ProviderModelSource],
+        provider_models: &[ProviderModelSource<'_>],
     ) -> Result<Self, ModelCatalogBuildError> {
         build_from_source(providers, provider_models)
     }
@@ -562,7 +562,7 @@ mod tests {
             .into_iter()
             .map(|(key, info)| ProviderSource::new(key, info))
             .collect();
-        let provider_model_sources: Vec<ProviderModelSource> = provider_models
+        let provider_model_sources: Vec<ProviderModelSource<'_>> = provider_models
             .into_iter()
             .map(|(provider_key, model_key, info)| {
                 ProviderModelSource::new(provider_key, model_key, info)
diff --git a/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs b/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs
index 7167c3cc..28b45227 100644
--- a/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs
@@ -80,17 +80,22 @@ impl From<(String, ProviderInfo)> for ProviderSource {
 ///
 /// This wrapper keeps builder input self-documenting and avoids tuple-position
 /// ambiguity at call sites.
+///
+/// The keys are borrowed because the catalog builder hashes them during
+/// construction and does not retain them afterward. Callers must therefore keep
+/// the referenced strings alive until [`crate::models::catalog::ModelCatalog::build`]
+/// returns.
 #[derive(Debug, Clone, PartialEq)]
-pub struct ProviderModelSource {
-    /// Provider identifier used by lookups (for example, `"openai"`).
-    pub provider_key: String,
-    /// Model identifier used by lookups (for example, `"gpt-4"`).
-    pub model_key: String,
+pub struct ProviderModelSource<'a> {
+    /// Borrowed provider identifier used by lookups (for example, `"openai"`).
+    pub provider_key: &'a str,
+    /// Borrowed model identifier used by lookups (for example, `"gpt-4"`).
+    pub model_key: &'a str,
     /// Model metadata associated with [`Self::model_key`].
     pub model: ModelInfo,
 }
 
-impl ProviderModelSource {
+impl<'a> ProviderModelSource<'a> {
     /// Creates a provider model source.
     ///
     /// # Parameters
@@ -103,22 +108,18 @@ impl ProviderModelSource {
     ///
     /// A new [`ProviderModelSource`].
     #[inline]
-    pub fn new(
-        provider_key: impl Into<String>,
-        model_key: impl Into<String>,
-        model: ModelInfo,
-    ) -> Self {
+    pub fn new(provider_key: &'a str, model_key: &'a str, model: ModelInfo) -> Self {
         Self {
-            provider_key: provider_key.into(),
-            model_key: model_key.into(),
+            provider_key,
+            model_key,
             model,
         }
     }
 }
 
-impl From<(String, String, ModelInfo)> for ProviderModelSource {
+impl<'a> From<(&'a str, &'a str, ModelInfo)> for ProviderModelSource<'a> {
     #[inline]
-    fn from((provider_key, model_key, model): (String, String, ModelInfo)) -> Self {
+    fn from((provider_key, model_key, model): (&'a str, &'a str, ModelInfo)) -> Self {
         Self {
             provider_key,
             model_key,
diff --git a/src/llm-coding-tools-models-dev/Cargo.toml b/src/llm-coding-tools-models-dev/Cargo.toml
index 4aab2702..934588c7 100644
--- a/src/llm-coding-tools-models-dev/Cargo.toml
+++ b/src/llm-coding-tools-models-dev/Cargo.toml
@@ -50,6 +50,7 @@ endian-writer-derive = "0.1.0"
 
 # JSON parsing for models.dev API responses
 serde = { version = "1.0.228", features = ["derive"] }
+serde_json = "1.0.145"
 
 # Ergonomic error definitions
 thiserror = "2.0.18"
diff --git a/src/llm-coding-tools-models-dev/README.md b/src/llm-coding-tools-models-dev/README.md
index 353a16e5..a0818b1e 100644
--- a/src/llm-coding-tools-models-dev/README.md
+++ b/src/llm-coding-tools-models-dev/README.md
@@ -18,7 +18,7 @@ the relevant data we need; and create a llm_coding_tools_core `ModelCatalog`.
 1. Read cache header (if present) and get the old ETag.
 2. Send request to models.dev with `If-None-Match` when ETag exists.
 3. If server returns `304 Not Modified`, load catalog from cache.
-4. If server returns `200 OK`, parse and normalize JSON, write fresh cache, then build catalog.
+4. If server returns `200 OK`, parse JSON, map it into catalog sources, write fresh cache, then build catalog.
 5. If network fails, try cached data as fallback; if no valid cache exists, return an error.
 
 ### Non-blocking (`tokio`)
@@ -31,9 +31,9 @@ async fn load_catalog() -> Result<(), Box<dyn std::error::Error>> {
     let result = ModelsDevCatalog::load().await?;
 
     match result.source {
-        CatalogLoadSource::Downloaded => println!("Downloaded fresh snapshot."),
+        CatalogLoadSource::Downloaded => println!("Downloaded fresh catalog data."),
         CatalogLoadSource::NotModifiedCache => println!("Cache is already up to date."),
-        CatalogLoadSource::FallbackCache => println!("Network unavailable, using cached snapshot."),
+        CatalogLoadSource::FallbackCache => println!("Network unavailable, using cached catalog data."),
     }
 
     if let Some(entry) = result.catalog.lookup("openai", "gpt-4") {
@@ -55,9 +55,9 @@ fn load_catalog() -> Result<(), Box<dyn std::error::Error>> {
     let result = ModelsDevCatalog::load()?;
 
     match result.source {
-        CatalogLoadSource::Downloaded => println!("Downloaded fresh snapshot."),
+        CatalogLoadSource::Downloaded => println!("Downloaded fresh catalog data."),
         CatalogLoadSource::NotModifiedCache => println!("Cache is already up to date."),
-        CatalogLoadSource::FallbackCache => println!("Network unavailable, using cached snapshot."),
+        CatalogLoadSource::FallbackCache => println!("Network unavailable, using cached catalog data."),
     }
 
     if let Some(entry) = result.catalog.lookup("openai", "gpt-4") {
diff --git a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs
new file mode 100644
index 00000000..3593c33f
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs
@@ -0,0 +1,526 @@
+//! models.dev API -> `ModelCatalog` mapping.
+//!
+//! This module parses models.dev `api.json`, maps provider/model metadata into
+//! transient core builder inputs, and immediately constructs a [`ModelCatalog`].
+//!
+//! Mapping policy:
+//! - missing limits default to `0`;
+//! - model modalities are mapped from `modalities.input[]`/`modalities.output[]`
+//!   into directional [`Modality`] flags;
+//! - unknown npm package identifiers map to [`ProviderType::Unknown`];
+//! - unknown modality labels are ignored; if nothing maps, modalities default to
+//!   [`Modality::TEXT`];
+//! - model rows remain provider-scoped; shared configurations are deduplicated by
+//!   core during catalog build.
+
+use super::schema::{parse_api_json, ApiModelEntry, ApiModelLimit, ApiModelModalities};
+use crate::error::CatalogResult;
+use llm_coding_tools_core::models::{
+    Modality, ModelCatalog, ModelInfo, ProviderInfo, ProviderModelSource, ProviderSource,
+    ProviderType,
+};
+
+/// Parses models.dev `api.json` bytes and builds a [`ModelCatalog`].
+pub(crate) fn catalog_from_api_json_bytes(json_bytes: &[u8]) -> CatalogResult<ModelCatalog> {
+    let provider_entries = parse_api_json(json_bytes)?;
+    let mut provider_model_count = 0usize;
+    for provider in provider_entries.values() {
+        provider_model_count = provider_model_count.saturating_add(provider.models.len());
+    }
+
+    let mut provider_rows = Vec::with_capacity(provider_entries.len());
+    let mut model_rows = Vec::with_capacity(provider_model_count);
+
+    for (provider_key, provider) in &provider_entries {
+        debug_assert!(provider.id.is_empty() || provider.id == *provider_key);
+
+        let api_type = provider_type_from_models_dev_npm(provider.npm.as_deref());
+        for (model_key, model_entry) in &provider.models {
+            model_rows.push(ProviderModelSource::new(
+                provider_key.as_str(),
+                model_key.as_str(),
+                model_info_from_entry(model_entry),
+            ));
+        }
+
+        provider_rows.push(ProviderSource::new(
+            provider_key.as_str(),
+            ProviderInfo {
+                api_url: provider.api.clone().unwrap_or_default(),
+                env_vars: provider.env.clone(),
+                api_type,
+            },
+        ));
+    }
+
+    Ok(ModelCatalog::build(&provider_rows, &model_rows)?)
+}
+
+#[inline]
+fn model_info_from_entry(model_entry: &ApiModelEntry) -> ModelInfo {
+    let (max_input, max_output) = match model_entry.limit.as_ref() {
+        Some(limit) => (model_max_input(limit), limit.output),
+        None => (0, 0),
+    };
+    let modalities = model_modalities(model_entry.modalities.as_ref());
+
+    ModelInfo {
+        modalities,
+        max_input,
+        max_output,
+        temperature: None,
+        top_p: None,
+    }
+}
+
+#[inline]
+fn model_modalities(raw: Option<&ApiModelModalities>) -> Modality {
+    let Some(raw) = raw else {
+        return Modality::TEXT;
+    };
+
+    let mut modalities = Modality::empty();
+    for label in &raw.input {
+        modalities |= input_modality_flag(label.as_str());
+    }
+    for label in &raw.output {
+        modalities |= output_modality_flag(label.as_str());
+    }
+
+    if modalities.is_empty() {
+        Modality::TEXT
+    } else {
+        modalities
+    }
+}
+
+#[inline]
+fn input_modality_flag(label: &str) -> Modality {
+    match label {
+        "text" => Modality::TEXT_INPUT,
+        "image" => Modality::IMAGE_INPUT,
+        "audio" => Modality::AUDIO_INPUT,
+        "video" => Modality::VIDEO_INPUT,
+        // `pdf` appears in models.dev input modalities. Core has no PDF bit yet,
+        // so map it to text-input capability as closest supported fallback.
+        "pdf" => Modality::TEXT_INPUT,
+        _ => Modality::empty(),
+    }
+}
+
+#[inline]
+fn output_modality_flag(label: &str) -> Modality {
+    match label {
+        "text" => Modality::TEXT_OUTPUT,
+        "image" => Modality::IMAGE_OUTPUT,
+        "audio" => Modality::AUDIO_OUTPUT,
+        "video" => Modality::VIDEO_OUTPUT,
+        _ => Modality::empty(),
+    }
+}
+
+#[inline]
+fn model_max_input(limit: &ApiModelLimit) -> u32 {
+    if limit.input == 0 {
+        limit.context
+    } else {
+        limit.input
+    }
+}
+
+#[inline]
+fn provider_type_from_models_dev_npm(npm_package: Option<&str>) -> ProviderType {
+    match npm_package {
+        Some("@ai-sdk/openai") => ProviderType::OpenAiCompletions,
+        Some("@ai-sdk/openai-responses") => ProviderType::OpenAiResponses,
+        Some("@ai-sdk/anthropic") => ProviderType::Anthropic,
+        Some("@ai-sdk/google") => ProviderType::Google,
+        Some("@ai-sdk/groq") => ProviderType::Groq,
+        Some("@ai-sdk/mistral") => ProviderType::Mistral,
+        Some("@ai-sdk/ollama") => ProviderType::Ollama,
+        Some("@ai-sdk/amazon-bedrock") => ProviderType::Bedrock,
+        Some("@ai-sdk/azure") => ProviderType::Azure,
+        Some("@openrouter/ai-sdk-provider") => ProviderType::OpenRouter,
+        Some("@ai-sdk/huggingface") => ProviderType::HuggingFace,
+        Some("@ai-sdk/cohere") => ProviderType::Cohere,
+        Some("@ai-sdk/chatgpt-oauth") => ProviderType::ChatGptOAuth,
+        Some("@ai-sdk/claude-code-oauth") => ProviderType::ClaudeCodeOAuth,
+        Some("@ai-sdk/antigravity") => ProviderType::Antigravity,
+        Some(_) | None => ProviderType::Unknown,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{catalog_from_api_json_bytes, provider_type_from_models_dev_npm};
+    use llm_coding_tools_core::models::{Modality, ModelCatalog, ProviderType};
+
+    fn catalog(json: &[u8]) -> ModelCatalog {
+        catalog_from_api_json_bytes(json).expect("API payload should map")
+    }
+
+    fn provider_snapshot(
+        catalog: &ModelCatalog,
+        provider_key: &str,
+    ) -> (String, Vec<String>, ProviderType) {
+        let provider = catalog
+            .lookup_provider(provider_key)
+            .expect("provider should exist");
+        (
+            provider.api_url.to_string(),
+            provider
+                .env_vars()
+                .iter()
+                .map(|env_var| (*env_var).to_string())
+                .collect(),
+            provider.api_type,
+        )
+    }
+
+    fn model_snapshot(
+        catalog: &ModelCatalog,
+        provider_key: &str,
+        model_key: &str,
+    ) -> (Modality, u32, u32, Option<f32>, Option<f32>) {
+        let model = catalog
+            .lookup_provider_model(provider_key, model_key)
+            .expect("provider model should exist");
+        (
+            model.modalities,
+            model.max_input,
+            model.max_output,
+            model.temperature(),
+            model.top_p(),
+        )
+    }
+
+    #[test]
+    fn catalog_source_mapping_maps_provider_rows() {
+        let api_json = br#"
+        {
+            "alpha": {
+                "id": "alpha",
+                "npm": "@ai-sdk/openai-responses",
+                "api": "https://alpha.example/v1",
+                "env": ["ALPHA_KEY"],
+                "models": {}
+            }
+        }
+        "#;
+        let catalog = catalog(api_json);
+
+        assert_eq!(catalog.provider_count(), 1);
+        let provider = catalog
+            .lookup_provider("alpha")
+            .expect("alpha provider should exist");
+        assert_eq!(provider.api_url, "https://alpha.example/v1");
+        assert_eq!(provider.env_vars(), ["ALPHA_KEY"]);
+        assert_eq!(provider.api_type, ProviderType::OpenAiResponses);
+    }
+
+    #[test]
+    fn catalog_source_mapping_defaults_missing_limits_to_zero() {
+        let api_json = br#"
+        {
+            "alpha": {
+                "id": "alpha",
+                "npm": null,
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": {}
+                }
+            }
+        }
+        "#;
+        let catalog = catalog(api_json);
+
+        assert_eq!(catalog.provider_model_count(), 1);
+        let model = catalog
+            .lookup_provider_model("alpha", "m1")
+            .expect("alpha/m1 should exist");
+        assert_eq!(model.modalities, Modality::TEXT);
+        assert_eq!(model.max_input, 0);
+        assert_eq!(model.max_output, 0);
+    }
+
+    #[test]
+    fn catalog_source_mapping_uses_limit_input_when_present() {
+        let api_json = br#"
+        {
+            "alpha": {
+                "id": "alpha",
+                "npm": null,
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": {
+                        "limit": {
+                            "context": 128000,
+                            "input": 124000,
+                            "output": 4096
+                        }
+                    }
+                }
+            }
+        }
+        "#;
+        let catalog = catalog(api_json);
+
+        let model = catalog
+            .lookup_provider_model("alpha", "m1")
+            .expect("alpha/m1 should exist");
+        assert_eq!(model.max_input, 124000);
+        assert_eq!(model.max_output, 4096);
+    }
+
+    #[test]
+    fn catalog_source_mapping_maps_directional_modalities() {
+        let api_json = br#"
+        {
+            "alpha": {
+                "id": "alpha",
+                "npm": null,
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": {
+                        "modalities": {
+                            "input": ["text", "image", "pdf"],
+                            "output": ["text", "audio"]
+                        },
+                        "limit": { "context": 4096, "output": 512 }
+                    }
+                }
+            }
+        }
+        "#;
+
+        let catalog = catalog(api_json);
+        let model = catalog
+            .lookup_provider_model("alpha", "m1")
+            .expect("alpha/m1 should exist");
+        assert_eq!(
+            model.modalities,
+            Modality::TEXT_INPUT
+                | Modality::TEXT_OUTPUT
+                | Modality::IMAGE_INPUT
+                | Modality::AUDIO_OUTPUT
+        );
+    }
+
+    #[test]
+    fn catalog_source_mapping_maps_pdf_input_to_text_input() {
+        let api_json = br#"
+        {
+            "alpha": {
+                "id": "alpha",
+                "npm": null,
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": {
+                        "modalities": {
+                            "input": ["pdf"],
+                            "output": []
+                        }
+                    }
+                }
+            }
+        }
+        "#;
+
+        let catalog = catalog(api_json);
+        let model = catalog
+            .lookup_provider_model("alpha", "m1")
+            .expect("alpha/m1 should exist");
+        assert_eq!(model.modalities, Modality::TEXT_INPUT);
+    }
+
+    #[test]
+    fn catalog_source_mapping_falls_back_to_text_for_unknown_modalities() {
+        let api_json = br#"
+        {
+            "alpha": {
+                "id": "alpha",
+                "npm": null,
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": {
+                        "modalities": {
+                            "input": ["binary"],
+                            "output": ["embedding"]
+                        }
+                    }
+                }
+            }
+        }
+        "#;
+
+        let catalog = catalog(api_json);
+        let model = catalog
+            .lookup_provider_model("alpha", "m1")
+            .expect("alpha/m1 should exist");
+        assert_eq!(model.modalities, Modality::TEXT);
+    }
+
+    #[test]
+    fn catalog_source_mapping_keeps_duplicate_model_ids_per_provider() {
+        let api_json = br#"
+        {
+            "alpha": {
+                "id": "alpha",
+                "npm": "@ai-sdk/openai",
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": {
+                        "modalities": {
+                            "input": ["image"],
+                            "output": ["text"]
+                        },
+                        "limit": { "context": 4096, "output": 512 }
+                    }
+                }
+            },
+            "beta": {
+                "id": "beta",
+                "npm": "@ai-sdk/anthropic",
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": {
+                        "modalities": {
+                            "input": ["audio"],
+                            "output": ["video"]
+                        },
+                        "limit": { "context": 8192, "output": 256 }
+                    }
+                }
+            }
+        }
+        "#;
+        let catalog = catalog(api_json);
+
+        assert_eq!(catalog.provider_model_count(), 2);
+
+        let alpha_model = catalog
+            .lookup_provider_model("alpha", "m1")
+            .expect("alpha/m1 should exist");
+        assert_eq!(alpha_model.max_input, 4096);
+        assert_eq!(alpha_model.max_output, 512);
+        assert_eq!(
+            alpha_model.modalities,
+            Modality::IMAGE_INPUT | Modality::TEXT_OUTPUT
+        );
+
+        let beta_model = catalog
+            .lookup_provider_model("beta", "m1")
+            .expect("beta/m1 should exist");
+        assert_eq!(beta_model.max_input, 8192);
+        assert_eq!(beta_model.max_output, 256);
+        assert_eq!(
+            beta_model.modalities,
+            Modality::AUDIO_INPUT | Modality::VIDEO_OUTPUT
+        );
+    }
+
+    #[test]
+    fn catalog_source_mapping_keeps_same_data_for_different_input_key_order() {
+        let api_json_a = br#"
+        {
+            "beta": {
+                "id": "beta",
+                "npm": "@ai-sdk/anthropic",
+                "api": null,
+                "env": [],
+                "models": {
+                    "m2": { "limit": { "context": 2048, "output": 512 } }
+                }
+            },
+            "alpha": {
+                "id": "alpha",
+                "npm": "@ai-sdk/openai",
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": { "limit": { "context": 1024, "output": 256 } }
+                }
+            }
+        }
+        "#;
+
+        let api_json_b = br#"
+        {
+            "alpha": {
+                "id": "alpha",
+                "npm": "@ai-sdk/openai",
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": { "limit": { "context": 1024, "output": 256 } }
+                }
+            },
+            "beta": {
+                "id": "beta",
+                "npm": "@ai-sdk/anthropic",
+                "api": null,
+                "env": [],
+                "models": {
+                    "m2": { "limit": { "context": 2048, "output": 512 } }
+                }
+            }
+        }
+        "#;
+
+        let catalog_a = catalog(api_json_a);
+        let catalog_b = catalog(api_json_b);
+
+        assert_eq!(catalog_a.provider_count(), catalog_b.provider_count());
+        assert_eq!(
+            catalog_a.provider_model_count(),
+            catalog_b.provider_model_count()
+        );
+        assert_eq!(
+            catalog_a.model_config_count(),
+            catalog_b.model_config_count()
+        );
+        assert_eq!(
+            provider_snapshot(&catalog_a, "alpha"),
+            provider_snapshot(&catalog_b, "alpha")
+        );
+        assert_eq!(
+            provider_snapshot(&catalog_a, "beta"),
+            provider_snapshot(&catalog_b, "beta")
+        );
+        assert_eq!(
+            model_snapshot(&catalog_a, "alpha", "m1"),
+            model_snapshot(&catalog_b, "alpha", "m1")
+        );
+        assert_eq!(
+            model_snapshot(&catalog_a, "beta", "m2"),
+            model_snapshot(&catalog_b, "beta", "m2")
+        );
+    }
+
+    #[test]
+    fn provider_type_mapping_handles_known_and_unknown_packages() {
+        assert_eq!(
+            provider_type_from_models_dev_npm(Some("@ai-sdk/openai")),
+            ProviderType::OpenAiCompletions
+        );
+        assert_eq!(
+            provider_type_from_models_dev_npm(Some("@ai-sdk/google")),
+            ProviderType::Google
+        );
+        assert_eq!(
+            provider_type_from_models_dev_npm(Some("@ai-sdk/openai-compatible")),
+            ProviderType::Unknown
+        );
+        assert_eq!(
+            provider_type_from_models_dev_npm(None),
+            ProviderType::Unknown
+        );
+    }
+}
diff --git a/src/llm-coding-tools-models-dev/src/api/mod.rs b/src/llm-coding-tools-models-dev/src/api/mod.rs
new file mode 100644
index 00000000..730624ee
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/api/mod.rs
@@ -0,0 +1,12 @@
+//! models.dev API parsing and catalog-source mapping.
+//!
+//! - [`schema`] parses upstream `https://models.dev/api.json` into a minimal
+//!   serde representation.
+//! - [`catalog_sources`] maps parsed data into a
+//!   [`llm_coding_tools_core::models::ModelCatalog`].
+//!
+//! Both modules intentionally keep only fields required by core catalog
+//! construction so ingest stays fast and memory-bounded.
+
+pub(crate) mod catalog_sources;
+pub(crate) mod schema;
diff --git a/src/llm-coding-tools-models-dev/src/api/schema.rs b/src/llm-coding-tools-models-dev/src/api/schema.rs
new file mode 100644
index 00000000..add3092e
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/api/schema.rs
@@ -0,0 +1,153 @@
+//! Minimal models.dev API schema used by this crate.
+//!
+//! We deserialize only fields needed for catalog-source mapping:
+//! provider metadata (`id`, `npm`, `api`, `env`) and model token limits
+//! (`limit.context`, `limit.input`, `limit.output`) plus directional modalities
+//! (`modalities.input[]`, `modalities.output[]`).
+//!
+//! Representative payload shape from `https://models.dev/api.json`:
+//!
+//! ```json
+//! {
+//!   "openai": {
+//!     "id": "openai",
+//!     "npm": "@ai-sdk/openai",
+//!     "api": null,
+//!     "env": ["OPENAI_API_KEY"],
+//!     "models": {
+//!       "gpt-4o": {
+//!         "id": "gpt-4o",
+//!         "modalities": {
+//!           "input": ["text", "image"],
+//!           "output": ["text"]
+//!         },
+//!         "limit": {
+//!           "context": 128000,
+//!           "output": 16384
+//!         }
+//!       }
+//!     }
+//!   }
+//! }
+//! ```
+//!
+//! Mapping into local structs:
+//! - top-level provider map entry -> [`ApiProviderEntry`]
+//! - `models.<model_id>` object -> [`ApiModelEntry`]
+//! - `models.<model_id>.modalities` object -> [`ApiModelModalities`]
+//! - `models.<model_id>.limit` object -> [`ApiModelLimit`]
+//!
+//! Unknown fields are intentionally ignored so we can drop large unused sections
+//! early and keep parse memory bounded.
+
+use crate::error::CatalogResult;
+use serde::Deserialize;
+use std::collections::HashMap;
+
+#[derive(Debug, Deserialize)]
+pub(crate) struct ApiProviderEntry {
+    pub(crate) id: String,
+    #[serde(default)]
+    pub(crate) npm: Option<String>,
+    #[serde(default)]
+    pub(crate) api: Option<String>,
+    #[serde(default)]
+    pub(crate) env: Vec<String>,
+    #[serde(default)]
+    pub(crate) models: HashMap<String, ApiModelEntry>,
+}
+
+#[derive(Debug, Deserialize)]
+pub(crate) struct ApiModelEntry {
+    #[serde(default)]
+    pub(crate) limit: Option<ApiModelLimit>,
+    #[serde(default)]
+    pub(crate) modalities: Option<ApiModelModalities>,
+}
+
+#[derive(Debug, Deserialize)]
+pub(crate) struct ApiModelModalities {
+    #[serde(default)]
+    pub(crate) input: Vec<String>,
+    #[serde(default)]
+    pub(crate) output: Vec<String>,
+}
+
+#[derive(Debug, Deserialize)]
+pub(crate) struct ApiModelLimit {
+    #[serde(default)]
+    pub(crate) context: u32,
+    #[serde(default)]
+    pub(crate) input: u32,
+    #[serde(default)]
+    pub(crate) output: u32,
+}
+
+/// Parses upstream `api.json` bytes into a provider map.
+///
+/// Input must match the current models.dev shape: a flat top-level object where
+/// each key is a provider id and each value is a provider entry.
+#[inline]
+pub(crate) fn parse_api_json(
+    json_bytes: &[u8],
+) -> CatalogResult<HashMap<String, ApiProviderEntry>> {
+    Ok(serde_json::from_slice(json_bytes)?)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::parse_api_json;
+
+    #[test]
+    fn parse_api_json_supports_flat_provider_map() {
+        let api_json = br#"{"alpha":{"id":"alpha","npm":"@ai-sdk/openai","api":null,"env":["ALPHA_KEY"],"models":{"m1":{"modalities":{"input":["text","image"],"output":["text"]},"limit":{"context":4096,"output":512}}}}}"#;
+        let providers = parse_api_json(api_json).expect("API payload should parse");
+        let provider = providers.get("alpha").expect("provider should exist");
+
+        assert_eq!(provider.id, "alpha");
+        assert_eq!(provider.npm.as_deref(), Some("@ai-sdk/openai"));
+        assert_eq!(provider.env.as_slice(), ["ALPHA_KEY"]);
+
+        let model = provider.models.get("m1").expect("model should exist");
+        let modalities = model.modalities.as_ref().expect("modalities should exist");
+        let limit = model.limit.as_ref().expect("limit should exist");
+        assert_eq!(modalities.input.as_slice(), ["text", "image"]);
+        assert_eq!(modalities.output.as_slice(), ["text"]);
+        assert_eq!(limit.context, 4096);
+        assert_eq!(limit.output, 512);
+    }
+
+    #[test]
+    fn parse_api_json_ignores_unknown_fields() {
+        let api_json = br#"
+        {
+            "alpha": {
+                "id": "alpha",
+                "name": "Alpha",
+                "npm": "@ai-sdk/openai",
+                "api": "https://alpha.example/v1",
+                "env": ["ALPHA_KEY"],
+                "models": {
+                    "m1": {
+                        "description": "ignored",
+                        "limit": {
+                            "context": 128000,
+                            "input": 124000,
+                            "output": 4096
+                        }
+                    }
+                }
+            }
+        }
+        "#;
+
+        let providers = parse_api_json(api_json).expect("API payload should parse");
+        let provider = providers.get("alpha").expect("provider should exist");
+        let model = provider.models.get("m1").expect("model should exist");
+        let limit = model.limit.as_ref().expect("limit should exist");
+
+        assert_eq!(limit.context, 128000);
+        assert_eq!(limit.input, 124000);
+        assert_eq!(limit.output, 4096);
+    }
+}
diff --git a/src/llm-coding-tools-models-dev/src/cache/mod.rs b/src/llm-coding-tools-models-dev/src/cache/mod.rs
index 5043b9a5..966419c9 100644
--- a/src/llm-coding-tools-models-dev/src/cache/mod.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/mod.rs
@@ -1,4 +1,4 @@
-//! Cache path and container utilities for models.dev snapshots.
+//! Cache path and container utilities for models.dev catalog data.
 //!
 //! Responsibilities are split by concern:
 //!
diff --git a/src/llm-coding-tools-models-dev/src/catalog/mod.rs b/src/llm-coding-tools-models-dev/src/catalog/mod.rs
index af2a864e..dca3b904 100644
--- a/src/llm-coding-tools-models-dev/src/catalog/mod.rs
+++ b/src/llm-coding-tools-models-dev/src/catalog/mod.rs
@@ -24,7 +24,8 @@ impl ModelsDevCatalog {
     /// This is the primary entry point for most use cases. It will:
     /// 1. Check for an existing cache and extract its ETag
     /// 2. Send a conditional GET request with `If-None-Match`
-    /// 3. On `200 OK`: download, normalize, cache, and return fresh data
+    /// 3. On `200 OK`: download, map the API payload into catalog sources,
+    ///    cache it, and return fresh data
     /// 4. On `304 Not Modified`: decode and return cached data
     /// 5. On network failure: fall back to cached data if available
     ///
@@ -43,7 +44,7 @@ impl ModelsDevCatalog {
     /// - The cache path cannot be determined and no cache exists
     /// - An HTTP error occurs and no cache is available for fallback
     /// - The cache is corrupted and cannot be decoded
-    /// - Catalog construction from normalized data fails
+    /// - Catalog construction from mapped catalog sources fails
     ///
     /// # Examples
     ///
diff --git a/src/llm-coding-tools-models-dev/src/error.rs b/src/llm-coding-tools-models-dev/src/error.rs
index c4da4a20..ba0b2b21 100644
--- a/src/llm-coding-tools-models-dev/src/error.rs
+++ b/src/llm-coding-tools-models-dev/src/error.rs
@@ -22,6 +22,10 @@ pub enum CatalogError {
     #[error("HTTP error: {0}")]
     Reqwest(#[from] reqwest::Error),
 
+    /// A JSON parse error occurred while decoding models.dev API JSON.
+    #[error("JSON parse error: {0}")]
+    Json(#[from] serde_json::Error),
+
     /// A zstd decompression error occurred.
     #[error("decompression error: {0}")]
     Zstd(String),
diff --git a/src/llm-coding-tools-models-dev/src/lib.rs b/src/llm-coding-tools-models-dev/src/lib.rs
index 0cdd10f5..1b4eedae 100644
--- a/src/llm-coding-tools-models-dev/src/lib.rs
+++ b/src/llm-coding-tools-models-dev/src/lib.rs
@@ -7,6 +7,8 @@ compile_error!("Features `async` and `blocking` are mutually exclusive.");
 #[cfg(not(any(feature = "async", feature = "blocking")))]
 compile_error!("Either an async runtime (e.g., `tokio`) or `blocking` feature must be enabled.");
 
+#[allow(dead_code)] // Wired into catalog build/load slices
+mod api;
 pub mod cache;
 pub mod catalog;
 pub mod error;

From 388c4a7a26b09f90267654d005c16e22318c23c4 Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Fri, 6 Mar 2026 20:38:42 +0000
Subject: [PATCH 06/22] Added: ETag-based catalog caching with conditional GET
 and schema updates

Adds HTTP caching with ETags, plus API schema cleanup and modality handling fixes.

Changes:
- Added `CatalogCachePayload` schema in `cache/payload.rs` for efficient bitcode serialization
- Added `cache_payload_from_api_json_bytes()` for JSON-to-payload mapping
- Added `load_catalog_from_cache_file_data()` for 304 Not Modified fast path
- Added atomic cache writes via temp-file-plus-rename in `fs` module
- Added `sync.rs` with conditional GET using `If-None-Match` header
- Wired `ModelsDevCatalog::load()` and `load_at()` to delegate to sync layer
- Added crate-private test URL override and mock server for unit testing
- Removed unused `id` field from `ApiProviderEntry` (not in models.dev API)
- Changed PDF input modality to return empty instead of mapping to TEXT_INPUT
- Changed empty modalities to return empty instead of defaulting to TEXT
- Added `bitcode` dependency for fast binary serialization

Benefits:
- Keeps public API minimal while enabling efficient caching
- Aligns schema with actual models.dev API response structure
---
 src/Cargo.lock                                |  45 +++-
 src/llm-coding-tools-core/Cargo.toml          |   3 +
 .../src/models/catalog/public/provider_idx.rs |   2 +-
 src/llm-coding-tools-core/src/models/mod.rs   |   2 +-
 .../src/models/provider_type.rs               |   2 +-
 src/llm-coding-tools-models-dev/Cargo.toml    |   1 +
 src/llm-coding-tools-models-dev/README.md     |   2 +-
 .../src/api/catalog_sources.rs                | 154 ++++++++-----
 .../src/api/schema.rs                         |   4 +-
 .../src/cache/format.rs                       |  58 ++++-
 .../src/cache/mod.rs                          |   3 +-
 .../src/cache/payload.rs                      | 205 +++++++++++++++++
 .../src/catalog/load_cache.rs                 |  89 ++++++++
 .../src/catalog/mod.rs                        |  83 ++++++-
 .../src/catalog/sync.rs                       | 210 ++++++++++++++++++
 .../src/catalog/test_utils.rs                 |  77 +++++++
 .../src/fs/blocking_impl.rs                   |   6 +
 .../src/fs/tokio_impl.rs                      |   6 +
 18 files changed, 879 insertions(+), 73 deletions(-)
 create mode 100644 src/llm-coding-tools-models-dev/src/cache/payload.rs
 create mode 100644 src/llm-coding-tools-models-dev/src/catalog/load_cache.rs
 create mode 100644 src/llm-coding-tools-models-dev/src/catalog/sync.rs
 create mode 100644 src/llm-coding-tools-models-dev/src/catalog/test_utils.rs

diff --git a/src/Cargo.lock b/src/Cargo.lock
index 7dee5401..68757b47 100644
--- a/src/Cargo.lock
+++ b/src/Cargo.lock
@@ -1469,6 +1469,7 @@ name = "llm-coding-tools-core"
 version = "0.2.0"
 dependencies = [
  "ahash",
+ "bitcode",
  "bitfields",
  "bitflags",
  "criterion",
@@ -1508,6 +1509,7 @@ dependencies = [
  "reqwest 0.13.1",
  "serde",
  "serde_json",
+ "serial_test",
  "tempfile",
  "thiserror 2.0.18",
  "tokio",
@@ -2306,6 +2308,15 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "scc"
+version = "2.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc"
+dependencies = [
+ "sdd",
+]
+
 [[package]]
 name = "schannel"
 version = "0.1.28"
@@ -2346,6 +2357,12 @@ version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
+[[package]]
+name = "sdd"
+version = "3.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca"
+
 [[package]]
 name = "security-framework"
 version = "3.5.1"
@@ -2674,6 +2691,32 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "serial_test"
+version = "3.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "911bd979bf1070a3f3aa7b691a3b3e9968f339ceeec89e08c280a8a22207a32f"
+dependencies = [
+ "futures-executor",
+ "futures-util",
+ "log",
+ "once_cell",
+ "parking_lot",
+ "scc",
+ "serial_test_derive",
+]
+
+[[package]]
+name = "serial_test_derive"
+version = "3.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a7d91949b85b0d2fb687445e448b40d322b6b3e4af6b44a29b21d9a5f33e6d9"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "sha2"
 version = "0.10.9"
@@ -2809,7 +2852,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0"
 dependencies = [
  "fastrand",
- "getrandom 0.3.4",
+ "getrandom 0.4.2",
  "once_cell",
  "rustix",
  "windows-sys 0.61.2",
diff --git a/src/llm-coding-tools-core/Cargo.toml b/src/llm-coding-tools-core/Cargo.toml
index 3238bfa4..b88ce699 100644
--- a/src/llm-coding-tools-core/Cargo.toml
+++ b/src/llm-coding-tools-core/Cargo.toml
@@ -40,6 +40,9 @@ serde_json = "1.0"
 # Zero overhead compile time bitflag generation
 bitflags = "2.11.0"
 
+# Fast binary serialization for catalog cache types
+bitcode = "0.6.9"
+
 # Compile-time generated packed bitfield structs for model metadata
 bitfields = "1.0.2"
 
diff --git a/src/llm-coding-tools-core/src/models/catalog/public/provider_idx.rs b/src/llm-coding-tools-core/src/models/catalog/public/provider_idx.rs
index ba3951e7..d82121a9 100644
--- a/src/llm-coding-tools-core/src/models/catalog/public/provider_idx.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/public/provider_idx.rs
@@ -4,7 +4,7 @@
 ///
 /// Used to reference a specific provider in the catalog's
 /// packed provider entry tables and string tables.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, bitcode::Encode, bitcode::Decode)]
 #[repr(transparent)]
 pub struct ProviderIdx(pub(crate) u16);
 
diff --git a/src/llm-coding-tools-core/src/models/mod.rs b/src/llm-coding-tools-core/src/models/mod.rs
index 17137b62..495de325 100644
--- a/src/llm-coding-tools-core/src/models/mod.rs
+++ b/src/llm-coding-tools-core/src/models/mod.rs
@@ -5,6 +5,6 @@ mod provider_type;
 
 pub use catalog::{
     LookupTableKind, Modality, Model, ModelCatalog, ModelCatalogBuildError, ModelInfo, Provider,
-    ProviderInfo, ProviderModelSource, ProviderSource,
+    ProviderIdx, ProviderInfo, ProviderModelSource, ProviderSource,
 };
 pub use provider_type::ProviderType;
diff --git a/src/llm-coding-tools-core/src/models/provider_type.rs b/src/llm-coding-tools-core/src/models/provider_type.rs
index af7c5d24..1b18e262 100644
--- a/src/llm-coding-tools-core/src/models/provider_type.rs
+++ b/src/llm-coding-tools-core/src/models/provider_type.rs
@@ -1,5 +1,5 @@
 /// Provider behavior profile used by model resolver logic.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, bitcode::Encode, bitcode::Decode)]
 #[repr(u8)]
 pub enum ProviderType {
     /// Unknown or unsupported provider package.
diff --git a/src/llm-coding-tools-models-dev/Cargo.toml b/src/llm-coding-tools-models-dev/Cargo.toml
index 934588c7..26ef2550 100644
--- a/src/llm-coding-tools-models-dev/Cargo.toml
+++ b/src/llm-coding-tools-models-dev/Cargo.toml
@@ -61,3 +61,4 @@ tokio = { version = "1.49", features = ["fs", "io-util"], optional = true }
 [dev-dependencies]
 tokio = { version = "1.49", features = ["rt", "macros"] }
 tempfile = "3.26"
+serial_test = "3"
diff --git a/src/llm-coding-tools-models-dev/README.md b/src/llm-coding-tools-models-dev/README.md
index a0818b1e..9a31b444 100644
--- a/src/llm-coding-tools-models-dev/README.md
+++ b/src/llm-coding-tools-models-dev/README.md
@@ -8,7 +8,7 @@ for a cached fallback and caching via ETag(s).
 If you run coding agents against many providers, you want to have fresh data.
 [models.dev][models.dev] is one such source of data.
 
-This crate has the sufficient code to download from models.dev, distill down only
+This crate has sufficient code to download from models.dev, distill down only
 the relevant data we need; and create a llm_coding_tools_core `ModelCatalog`.
 
 ## Usage
diff --git a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs
index 3593c33f..2a3c1ee7 100644
--- a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs
+++ b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs
@@ -14,46 +14,69 @@
 //!   core during catalog build.
 
 use super::schema::{parse_api_json, ApiModelEntry, ApiModelLimit, ApiModelModalities};
-use crate::error::CatalogResult;
+use crate::cache::payload::{
+    catalog_from_cache_payload, CachedModelRow, CachedProviderRow, CatalogCachePayload,
+};
+use crate::error::{CatalogError, CatalogResult};
 use llm_coding_tools_core::models::{
-    Modality, ModelCatalog, ModelInfo, ProviderInfo, ProviderModelSource, ProviderSource,
-    ProviderType,
+    Modality, ModelCatalog, ModelCatalogBuildError, ModelInfo, ProviderIdx, ProviderType,
 };
 
-/// Parses models.dev `api.json` bytes and builds a [`ModelCatalog`].
-pub(crate) fn catalog_from_api_json_bytes(json_bytes: &[u8]) -> CatalogResult<ModelCatalog> {
+pub(crate) fn cache_payload_from_api_json_bytes(
+    json_bytes: &[u8],
+) -> CatalogResult<CatalogCachePayload> {
     let provider_entries = parse_api_json(json_bytes)?;
-    let mut provider_model_count = 0usize;
-    for provider in provider_entries.values() {
-        provider_model_count = provider_model_count.saturating_add(provider.models.len());
-    }
 
-    let mut provider_rows = Vec::with_capacity(provider_entries.len());
-    let mut model_rows = Vec::with_capacity(provider_model_count);
+    let provider_count = provider_entries.len();
+    if provider_count > (u16::MAX as usize) + 1 {
+        return Err(CatalogError::ModelCatalogBuild(
+            ModelCatalogBuildError::TooManyProviders {
+                count: provider_count,
+                max: (u16::MAX as usize) + 1,
+            },
+        ));
+    }
 
-    for (provider_key, provider) in &provider_entries {
-        debug_assert!(provider.id.is_empty() || provider.id == *provider_key);
+    let mut providers = Vec::with_capacity(provider_count);
+    let mut models = Vec::with_capacity(
+        provider_entries
+            .values()
+            .map(|provider| provider.models.len())
+            .sum(),
+    );
 
+    for (provider_key, provider) in provider_entries {
+        let provider_idx = ProviderIdx::new(providers.len() as u16);
         let api_type = provider_type_from_models_dev_npm(provider.npm.as_deref());
-        for (model_key, model_entry) in &provider.models {
-            model_rows.push(ProviderModelSource::new(
-                provider_key.as_str(),
-                model_key.as_str(),
-                model_info_from_entry(model_entry),
-            ));
-        }
 
-        provider_rows.push(ProviderSource::new(
-            provider_key.as_str(),
-            ProviderInfo {
-                api_url: provider.api.clone().unwrap_or_default(),
-                env_vars: provider.env.clone(),
-                api_type,
-            },
-        ));
+        providers.push(CachedProviderRow {
+            provider_key,
+            api_url: provider.api.unwrap_or_default(),
+            env_vars: provider.env,
+            api_type,
+        });
+
+        for (model_key, model_entry) in provider.models {
+            let model = model_info_from_entry(&model_entry);
+            models.push(CachedModelRow {
+                provider_idx,
+                model_key,
+                modalities_bits: model.modalities.bits(),
+                max_input: model.max_input,
+                max_output: model.max_output,
+                temperature: model.temperature,
+                top_p: model.top_p,
+            });
+        }
     }
 
-    Ok(ModelCatalog::build(&provider_rows, &model_rows)?)
+    Ok(CatalogCachePayload { providers, models })
+}
+
+/// Parses models.dev `api.json` bytes and builds a [`ModelCatalog`].
+pub(crate) fn catalog_from_api_json_bytes(json_bytes: &[u8]) -> CatalogResult<ModelCatalog> {
+    let payload = cache_payload_from_api_json_bytes(json_bytes)?;
+    catalog_from_cache_payload(payload)
 }
 
 #[inline]
@@ -87,11 +110,7 @@ fn model_modalities(raw: Option<&ApiModelModalities>) -> Modality {
         modalities |= output_modality_flag(label.as_str());
     }
 
-    if modalities.is_empty() {
-        Modality::TEXT
-    } else {
-        modalities
-    }
+    modalities
 }
 
 #[inline]
@@ -101,10 +120,7 @@ fn input_modality_flag(label: &str) -> Modality {
         "image" => Modality::IMAGE_INPUT,
         "audio" => Modality::AUDIO_INPUT,
         "video" => Modality::VIDEO_INPUT,
-        // `pdf` appears in models.dev input modalities. Core has no PDF bit yet,
-        // so map it to text-input capability as closest supported fallback.
-        "pdf" => Modality::TEXT_INPUT,
-        _ => Modality::empty(),
+        _ => Modality::empty(), // pdf not supported
     }
 }
 
@@ -152,8 +168,11 @@ fn provider_type_from_models_dev_npm(npm_package: Option<&str>) -> ProviderType
 
 #[cfg(test)]
 mod tests {
-    use super::{catalog_from_api_json_bytes, provider_type_from_models_dev_npm};
-    use llm_coding_tools_core::models::{Modality, ModelCatalog, ProviderType};
+    use super::{
+        cache_payload_from_api_json_bytes, catalog_from_api_json_bytes,
+        provider_type_from_models_dev_npm,
+    };
+    use llm_coding_tools_core::models::{Modality, ModelCatalog, ProviderIdx, ProviderType};
 
     fn catalog(json: &[u8]) -> ModelCatalog {
         catalog_from_api_json_bytes(json).expect("API payload should map")
@@ -194,12 +213,46 @@ mod tests {
         )
     }
 
+    #[test]
+    fn cache_payload_maps_single_provider_with_models() {
+        let api_json = br#"
+        {
+            "openai": {
+                "npm": "@ai-sdk/openai",
+                "api": "https://api.openai.com/v1",
+                "env": ["OPENAI_API_KEY"],
+                "models": {
+                    "gpt-4": {
+                        "modalities": { "input": ["text"], "output": ["text"] },
+                        "limit": { "context": 8192, "output": 4096 }
+                    }
+                }
+            }
+        }
+        "#;
+
+        let payload = cache_payload_from_api_json_bytes(api_json).expect("payload should build");
+        assert_eq!(payload.providers.len(), 1);
+        assert_eq!(payload.models.len(), 1);
+
+        assert_eq!(payload.providers[0].provider_key, "openai");
+        assert_eq!(
+            payload.providers[0].api_type,
+            ProviderType::OpenAiCompletions
+        );
+
+        assert_eq!(payload.models[0].provider_idx, ProviderIdx::new(0));
+        assert_eq!(payload.models[0].model_key, "gpt-4");
+        assert_eq!(payload.models[0].modalities_bits, Modality::TEXT.bits());
+        assert_eq!(payload.models[0].max_input, 8192);
+        assert_eq!(payload.models[0].max_output, 4096);
+    }
+
     #[test]
     fn catalog_source_mapping_maps_provider_rows() {
         let api_json = br#"
         {
             "alpha": {
-                "id": "alpha",
                 "npm": "@ai-sdk/openai-responses",
                 "api": "https://alpha.example/v1",
                 "env": ["ALPHA_KEY"],
@@ -223,7 +276,6 @@ mod tests {
         let api_json = br#"
         {
             "alpha": {
-                "id": "alpha",
                 "npm": null,
                 "api": null,
                 "env": [],
@@ -249,7 +301,6 @@ mod tests {
         let api_json = br#"
         {
             "alpha": {
-                "id": "alpha",
                 "npm": null,
                 "api": null,
                 "env": [],
@@ -279,7 +330,6 @@ mod tests {
         let api_json = br#"
         {
             "alpha": {
-                "id": "alpha",
                 "npm": null,
                 "api": null,
                 "env": [],
@@ -310,11 +360,10 @@ mod tests {
     }
 
     #[test]
-    fn catalog_source_mapping_maps_pdf_input_to_text_input() {
+    fn catalog_source_mapping_maps_pdf_input_to_empty() {
         let api_json = br#"
         {
             "alpha": {
-                "id": "alpha",
                 "npm": null,
                 "api": null,
                 "env": [],
@@ -334,15 +383,14 @@ mod tests {
         let model = catalog
             .lookup_provider_model("alpha", "m1")
             .expect("alpha/m1 should exist");
-        assert_eq!(model.modalities, Modality::TEXT_INPUT);
+        assert_eq!(model.modalities, Modality::empty());
     }
 
     #[test]
-    fn catalog_source_mapping_falls_back_to_text_for_unknown_modalities() {
+    fn catalog_source_mapping_falls_back_to_empty_for_unknown_modalities() {
         let api_json = br#"
         {
             "alpha": {
-                "id": "alpha",
                 "npm": null,
                 "api": null,
                 "env": [],
@@ -362,7 +410,7 @@ mod tests {
         let model = catalog
             .lookup_provider_model("alpha", "m1")
             .expect("alpha/m1 should exist");
-        assert_eq!(model.modalities, Modality::TEXT);
+        assert_eq!(model.modalities, Modality::empty());
     }
 
     #[test]
@@ -370,7 +418,6 @@ mod tests {
         let api_json = br#"
         {
             "alpha": {
-                "id": "alpha",
                 "npm": "@ai-sdk/openai",
                 "api": null,
                 "env": [],
@@ -385,7 +432,6 @@ mod tests {
                 }
             },
             "beta": {
-                "id": "beta",
                 "npm": "@ai-sdk/anthropic",
                 "api": null,
                 "env": [],
@@ -431,7 +477,6 @@ mod tests {
         let api_json_a = br#"
         {
             "beta": {
-                "id": "beta",
                 "npm": "@ai-sdk/anthropic",
                 "api": null,
                 "env": [],
@@ -440,7 +485,6 @@ mod tests {
                 }
             },
             "alpha": {
-                "id": "alpha",
                 "npm": "@ai-sdk/openai",
                 "api": null,
                 "env": [],
@@ -454,7 +498,6 @@ mod tests {
         let api_json_b = br#"
         {
             "alpha": {
-                "id": "alpha",
                 "npm": "@ai-sdk/openai",
                 "api": null,
                 "env": [],
@@ -463,7 +506,6 @@ mod tests {
                 }
             },
             "beta": {
-                "id": "beta",
                 "npm": "@ai-sdk/anthropic",
                 "api": null,
                 "env": [],
diff --git a/src/llm-coding-tools-models-dev/src/api/schema.rs b/src/llm-coding-tools-models-dev/src/api/schema.rs
index add3092e..3e0f4c12 100644
--- a/src/llm-coding-tools-models-dev/src/api/schema.rs
+++ b/src/llm-coding-tools-models-dev/src/api/schema.rs
@@ -1,7 +1,7 @@
 //! Minimal models.dev API schema used by this crate.
 //!
 //! We deserialize only fields needed for catalog-source mapping:
-//! provider metadata (`id`, `npm`, `api`, `env`) and model token limits
+//! provider metadata (`npm`, `api`, `env`) and model token limits
 //! (`limit.context`, `limit.input`, `limit.output`) plus directional modalities
 //! (`modalities.input[]`, `modalities.output[]`).
 //!
@@ -46,7 +46,6 @@ use std::collections::HashMap;
 
 #[derive(Debug, Deserialize)]
 pub(crate) struct ApiProviderEntry {
-    pub(crate) id: String,
     #[serde(default)]
     pub(crate) npm: Option<String>,
     #[serde(default)]
@@ -104,7 +103,6 @@ mod tests {
         let providers = parse_api_json(api_json).expect("API payload should parse");
         let provider = providers.get("alpha").expect("provider should exist");
 
-        assert_eq!(provider.id, "alpha");
         assert_eq!(provider.npm.as_deref(), Some("@ai-sdk/openai"));
         assert_eq!(provider.env.as_slice(), ["ALPHA_KEY"]);
 
diff --git a/src/llm-coding-tools-models-dev/src/cache/format.rs b/src/llm-coding-tools-models-dev/src/cache/format.rs
index 4f910404..fbfad751 100644
--- a/src/llm-coding-tools-models-dev/src/cache/format.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/format.rs
@@ -23,7 +23,7 @@
 //! ## Safety
 //!
 //! Not a 'safe' parser. We assume the file was created by the user.
-//! There's no validation for erroneous data; e.g. malociously crafted headers.
+//! There's no validation for erroneous data; e.g. maliciously crafted headers.
 //! Only validation for accidental corruption/truncation (e.g., from partial writes) is included.
 
 use crate::{
@@ -33,7 +33,7 @@ use crate::{
 use endian_writer::{EndianReader, EndianWriter, HasSize, LittleEndianReader, LittleEndianWriter};
 use endian_writer_derive::EndianWritable;
 use std::mem::size_of;
-use std::path::Path;
+use std::path::{Path, PathBuf};
 use std::ptr::copy_nonoverlapping;
 
 /// Fixed v1 prelude, encoded little-endian.
@@ -79,6 +79,13 @@ pub(crate) struct CacheFileData {
     file_bytes: Box<[u8]>,
 }
 
+/// Returns a temporary path for atomic cache writes.
+fn temp_cache_path(path: &Path) -> PathBuf {
+    let mut temp = path.as_os_str().to_os_string();
+    temp.push(".tmp");
+    PathBuf::from(temp)
+}
+
 impl CacheFileData {
     /// Returns the optional ETag as a borrowed byte slice.
     #[inline]
@@ -134,7 +141,7 @@ pub(crate) async fn read_cache_file(path: &Path) -> CatalogResult<CacheFileData>
     let prelude = decode_prelude(&file_bytes[..CACHE_HEADER_LEN]);
     let etag_len = prelude.etag_len as usize;
     let payload_len_compressed = prelude.payload_len_compressed as usize;
-    let expected_total = CACHE_HEADER_LEN + etag_len + payload_len_compressed;
+    let expected_total = CACHE_HEADER_LEN + etag_len + payload_len_compressed; // unlikely to overflow. file is trusted.
 
     if file_bytes.len() != expected_total {
         return Err(CatalogError::CacheFormat(
@@ -205,7 +212,9 @@ pub(crate) async fn write_cache_file(
     }
 
     let file_bytes = fs::assume_init_u8_slice(uninit);
-    fs::write(path, &file_bytes).await?;
+    let temp_path = temp_cache_path(path);
+    fs::write(&temp_path, &file_bytes).await?;
+    fs::rename(&temp_path, path).await?;
     Ok(())
 }
 
@@ -229,7 +238,7 @@ fn encode_prelude(prelude: CachePreludeV1) -> [u8; CACHE_HEADER_LEN] {
 /// Decodes prelude from little-endian bytes.
 #[inline]
 fn decode_prelude(bytes: &[u8]) -> CachePreludeV1 {
-    // SAFETY: Caller guarantees `bytes` is at least `CACHE_PRELUDE_LEN`.
+    // SAFETY: Caller guarantees `bytes` is at least `CACHE_HEADER_LEN`.
     unsafe {
         let mut reader = LittleEndianReader::new(bytes.as_ptr());
         reader.read()
@@ -381,4 +390,43 @@ mod tests {
             .expect_err("payload length mismatch should fail");
         assert!(matches!(error, CatalogError::CacheFormat(_)));
     }
+
+    // Verifies atomic replacement replaces existing cache file content.
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn write_replaces_existing_cache_atomically() {
+        let temp = TempDir::new().expect("tempdir");
+        let path = temp.path().join("atomic-test.cache");
+
+        // Write first payload
+        let first_input = CacheWriteInput {
+            etag: Some(b"etag-1"),
+            payload_compressed: b"first-payload",
+            payload_len_decompressed: 100,
+        };
+        write_cache_file(&path, &first_input)
+            .await
+            .expect("write first");
+
+        let first_data = read_cache_file(&path).await.expect("read first");
+        assert_eq!(first_data.etag_bytes(), Some(b"etag-1".as_slice()));
+        assert_eq!(first_data.payload_compressed(), b"first-payload");
+
+        // Write second payload (atomic replacement)
+        let second_input = CacheWriteInput {
+            etag: Some(b"etag-2"),
+            payload_compressed: b"second-payload-different",
+            payload_len_decompressed: 200,
+        };
+        write_cache_file(&path, &second_input)
+            .await
+            .expect("write second");
+
+        let second_data = read_cache_file(&path).await.expect("read second");
+        assert_eq!(second_data.etag_bytes(), Some(b"etag-2".as_slice()));
+        assert_eq!(
+            second_data.payload_compressed(),
+            b"second-payload-different"
+        );
+        assert_eq!(second_data.payload_len_decompressed(), 200);
+    }
 }
diff --git a/src/llm-coding-tools-models-dev/src/cache/mod.rs b/src/llm-coding-tools-models-dev/src/cache/mod.rs
index 966419c9..695f7660 100644
--- a/src/llm-coding-tools-models-dev/src/cache/mod.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/mod.rs
@@ -15,6 +15,7 @@
 #[allow(dead_code)] // Wired into the load/sync path down the road
 pub(crate) mod format;
 mod path;
+pub(crate) mod payload;
 
 pub use crate::error::CatalogResult;
-pub use path::shared_cache_path;
+pub use path::{shared_cache_path, CACHE_PATH_ENV_VAR};
diff --git a/src/llm-coding-tools-models-dev/src/cache/payload.rs b/src/llm-coding-tools-models-dev/src/cache/payload.rs
new file mode 100644
index 00000000..bf5bf25a
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/cache/payload.rs
@@ -0,0 +1,205 @@
+use crate::error::{CatalogError, CatalogResult};
+use llm_coding_tools_core::models::{
+    Modality, ModelCatalog, ModelInfo, ProviderIdx, ProviderInfo, ProviderModelSource,
+    ProviderSource, ProviderType,
+};
+
+#[derive(Debug, Clone, PartialEq, bitcode::Encode, bitcode::Decode)]
+pub(crate) struct CatalogCachePayload {
+    pub(crate) providers: Vec<CachedProviderRow>,
+    pub(crate) models: Vec<CachedModelRow>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, bitcode::Encode, bitcode::Decode)]
+pub(crate) struct CachedProviderRow {
+    pub(crate) provider_key: String,
+    pub(crate) api_url: String,
+    pub(crate) env_vars: Vec<String>,
+    pub(crate) api_type: ProviderType,
+}
+
+#[derive(Debug, Clone, PartialEq, bitcode::Encode, bitcode::Decode)]
+pub(crate) struct CachedModelRow {
+    pub(crate) provider_idx: ProviderIdx,
+    pub(crate) model_key: String,
+    pub(crate) modalities_bits: u8,
+    pub(crate) max_input: u32,
+    pub(crate) max_output: u32,
+    pub(crate) temperature: Option<f32>,
+    pub(crate) top_p: Option<f32>,
+}
+
+pub(crate) fn encode_cache_payload(payload: &CatalogCachePayload) -> Vec<u8> {
+    bitcode::encode(payload)
+}
+
+pub(crate) fn decode_cache_payload(bytes: &[u8]) -> CatalogResult<CatalogCachePayload> {
+    bitcode::decode(bytes).map_err(|error| CatalogError::BitcodeDecode(error.to_string()))
+}
+
+pub(crate) fn catalog_from_cache_payload(
+    payload: CatalogCachePayload,
+) -> CatalogResult<ModelCatalog> {
+    let CatalogCachePayload { providers, models } = payload;
+
+    let mut provider_sources = Vec::with_capacity(providers.len());
+    for row in providers {
+        provider_sources.push(ProviderSource {
+            provider_key: row.provider_key,
+            provider: ProviderInfo {
+                api_url: row.api_url,
+                env_vars: row.env_vars,
+                api_type: row.api_type,
+            },
+        });
+    }
+
+    let mut model_sources = Vec::with_capacity(models.len());
+    for row in &models {
+        let provider_source =
+            provider_sources
+                .get(row.provider_idx.as_usize())
+                .ok_or(CatalogError::CacheFormat(
+                    "provider index out of range in cache payload",
+                ))?;
+
+        model_sources.push(ProviderModelSource {
+            provider_key: provider_source.provider_key.as_str(),
+            model_key: row.model_key.as_str(),
+            model: ModelInfo {
+                modalities: Modality::from_bits_retain(row.modalities_bits),
+                max_input: row.max_input,
+                max_output: row.max_output,
+                temperature: row.temperature,
+                top_p: row.top_p,
+            },
+        });
+    }
+
+    Ok(ModelCatalog::build(&provider_sources, &model_sources)?)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn sample_payload() -> CatalogCachePayload {
+        CatalogCachePayload {
+            providers: vec![CachedProviderRow {
+                provider_key: "openai".to_string(),
+                api_url: "https://api.openai.com/v1".to_string(),
+                env_vars: vec!["OPENAI_API_KEY".to_string()],
+                api_type: ProviderType::OpenAiCompletions,
+            }],
+            models: vec![CachedModelRow {
+                provider_idx: ProviderIdx::new(0),
+                model_key: "gpt-4".to_string(),
+                modalities_bits: Modality::TEXT.bits(),
+                max_input: 8192,
+                max_output: 4096,
+                temperature: Some(0.7),
+                top_p: Some(0.9),
+            }],
+        }
+    }
+
+    #[test]
+    fn payload_round_trip() {
+        let original = sample_payload();
+        let encoded = encode_cache_payload(&original);
+        let decoded = decode_cache_payload(&encoded).expect("decode should succeed");
+        assert_eq!(original, decoded);
+    }
+
+    #[test]
+    fn catalog_from_payload_reconstructs_provider() {
+        let payload = sample_payload();
+        let catalog = catalog_from_cache_payload(payload).expect("catalog build should succeed");
+
+        let provider = catalog
+            .lookup_provider("openai")
+            .expect("provider should exist");
+        assert_eq!(provider.api_url, "https://api.openai.com/v1");
+        assert_eq!(provider.api_type, ProviderType::OpenAiCompletions);
+    }
+
+    #[test]
+    fn catalog_from_payload_reconstructs_model() {
+        let payload = sample_payload();
+        let catalog = catalog_from_cache_payload(payload).expect("catalog build should succeed");
+
+        let model = catalog
+            .lookup_provider_model("openai", "gpt-4")
+            .expect("model should exist");
+        assert_eq!(model.max_input, 8192);
+        assert_eq!(model.max_output, 4096);
+        assert_eq!(model.modalities, Modality::TEXT);
+    }
+
+    #[test]
+    fn catalog_from_payload_rejects_out_of_range_provider_idx() {
+        let payload = CatalogCachePayload {
+            providers: vec![CachedProviderRow {
+                provider_key: "test".to_string(),
+                api_url: "".to_string(),
+                env_vars: vec![],
+                api_type: ProviderType::Unknown,
+            }],
+            models: vec![CachedModelRow {
+                provider_idx: ProviderIdx::new(999),
+                model_key: "model".to_string(),
+                modalities_bits: Modality::TEXT.bits(),
+                max_input: 0,
+                max_output: 0,
+                temperature: None,
+                top_p: None,
+            }],
+        };
+
+        let result = catalog_from_cache_payload(payload);
+        assert!(matches!(result, Err(CatalogError::CacheFormat(_))));
+    }
+
+    #[test]
+    fn all_known_provider_types_round_trip() {
+        let types = [
+            ProviderType::Unknown,
+            ProviderType::OpenAiCompletions,
+            ProviderType::OpenAiResponses,
+            ProviderType::Anthropic,
+            ProviderType::Google,
+            ProviderType::Groq,
+            ProviderType::Mistral,
+            ProviderType::Ollama,
+            ProviderType::Bedrock,
+            ProviderType::Azure,
+            ProviderType::OpenRouter,
+            ProviderType::HuggingFace,
+            ProviderType::Cohere,
+            ProviderType::ChatGptOAuth,
+            ProviderType::ClaudeCodeOAuth,
+            ProviderType::Antigravity,
+        ];
+
+        for provider_type in types {
+            let payload = CatalogCachePayload {
+                providers: vec![CachedProviderRow {
+                    provider_key: "test".to_string(),
+                    api_url: "".to_string(),
+                    env_vars: vec![],
+                    api_type: provider_type,
+                }],
+                models: vec![],
+            };
+
+            let catalog = catalog_from_cache_payload(payload).expect("should succeed");
+            let provider = catalog
+                .lookup_provider("test")
+                .expect("provider should exist");
+            assert_eq!(
+                provider.api_type, provider_type,
+                "provider type should round-trip correctly"
+            );
+        }
+    }
+}
diff --git a/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs
new file mode 100644
index 00000000..bc4f79c3
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs
@@ -0,0 +1,89 @@
+use crate::cache::format::CacheFileData;
+use crate::cache::payload::{catalog_from_cache_payload, decode_cache_payload};
+use crate::catalog::load_result::{CatalogLoadResult, CatalogLoadSource};
+use crate::error::{CatalogError, CatalogResult};
+
+pub(crate) fn load_catalog_from_cache_file_data(
+    cache_file: &CacheFileData,
+    source: CatalogLoadSource,
+) -> CatalogResult<CatalogLoadResult> {
+    let expected_len = cache_file.payload_len_decompressed() as usize;
+    let decoded = zstd::bulk::decompress(cache_file.payload_compressed(), expected_len)
+        .map_err(|error| CatalogError::Zstd(error.to_string()))?;
+    if decoded.len() != expected_len {
+        return Err(CatalogError::CacheFormat(
+            "cache payload length mismatch after decompression",
+        ));
+    }
+
+    let payload = decode_cache_payload(&decoded)?;
+    let catalog = catalog_from_cache_payload(payload)?;
+    Ok(CatalogLoadResult { catalog, source })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cache::format::{write_cache_file, CacheWriteInput};
+    use crate::cache::payload::{
+        encode_cache_payload, CachedModelRow, CachedProviderRow, CatalogCachePayload,
+    };
+    use llm_coding_tools_core::models::{Modality, ProviderIdx, ProviderType};
+    use tempfile::TempDir;
+
+    fn sample_payload() -> CatalogCachePayload {
+        CatalogCachePayload {
+            providers: vec![CachedProviderRow {
+                provider_key: "test".to_string(),
+                api_url: "https://test.example".to_string(),
+                env_vars: vec![],
+                api_type: ProviderType::OpenAiCompletions,
+            }],
+            models: vec![CachedModelRow {
+                provider_idx: ProviderIdx::new(0),
+                model_key: "model1".to_string(),
+                modalities_bits: Modality::TEXT.bits(),
+                max_input: 4096,
+                max_output: 2048,
+                temperature: None,
+                top_p: None,
+            }],
+        }
+    }
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn round_trip_through_cache_file() {
+        let temp = TempDir::new().expect("tempdir");
+        let path = temp.path().join("test.cache");
+
+        let payload = sample_payload();
+        let encoded = encode_cache_payload(&payload);
+        let compressed =
+            zstd::bulk::compress(&encoded, zstd::DEFAULT_COMPRESSION_LEVEL).expect("compress");
+
+        write_cache_file(
+            &path,
+            &CacheWriteInput {
+                etag: Some(b"test-etag"),
+                payload_compressed: &compressed,
+                payload_len_decompressed: encoded.len(),
+            },
+        )
+        .await
+        .expect("write cache");
+
+        let cache_file = crate::cache::format::read_cache_file(&path)
+            .await
+            .expect("read cache");
+        let result =
+            load_catalog_from_cache_file_data(&cache_file, CatalogLoadSource::NotModifiedCache)
+                .expect("load from cache");
+
+        assert_eq!(result.source, CatalogLoadSource::NotModifiedCache);
+        let provider = result
+            .catalog
+            .lookup_provider("test")
+            .expect("provider should exist");
+        assert_eq!(provider.api_type, ProviderType::OpenAiCompletions);
+    }
+}
diff --git a/src/llm-coding-tools-models-dev/src/catalog/mod.rs b/src/llm-coding-tools-models-dev/src/catalog/mod.rs
index dca3b904..1764915d 100644
--- a/src/llm-coding-tools-models-dev/src/catalog/mod.rs
+++ b/src/llm-coding-tools-models-dev/src/catalog/mod.rs
@@ -5,10 +5,16 @@
 //! - Reuse cache on `304 Not Modified`
 //! - Fall back to cached data if the network path fails
 
+mod load_cache;
 mod load_result;
+mod sync;
+
+#[cfg(test)]
+mod test_utils;
 
 pub use load_result::{CatalogLoadResult, CatalogLoadSource};
 
+use crate::cache::shared_cache_path;
 use crate::error::CatalogError;
 use std::path::Path;
 
@@ -74,7 +80,8 @@ impl ModelsDevCatalog {
     /// ```
     #[maybe_async::maybe_async]
     pub async fn load() -> Result<CatalogLoadResult, CatalogError> {
-        todo!("ModelsDevCatalog::load() not yet implemented")
+        let path = shared_cache_path()?;
+        Self::load_at(path).await
     }
 
     /// Loads the catalog from a specific cache file path.
@@ -132,7 +139,77 @@ impl ModelsDevCatalog {
     /// ```
     #[maybe_async::maybe_async]
     pub async fn load_at(path: impl AsRef<Path>) -> Result<CatalogLoadResult, CatalogError> {
-        let _path = path.as_ref();
-        todo!("ModelsDevCatalog::load_at() not yet implemented")
+        sync::load_catalog_at_path(path.as_ref()).await
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cache::CACHE_PATH_ENV_VAR;
+    use llm_coding_tools_core::models::ProviderType;
+    use tempfile::TempDir;
+
+    /// Guard that restores environment variables on drop
+    struct EnvGuard {
+        cache_path_var: Option<String>,
+    }
+
+    impl EnvGuard {
+        fn new(value: Option<&str>) -> Self {
+            let cache_path_var = std::env::var(CACHE_PATH_ENV_VAR).ok();
+            match value {
+                Some(v) => std::env::set_var(CACHE_PATH_ENV_VAR, v),
+                None => std::env::remove_var(CACHE_PATH_ENV_VAR),
+            }
+            Self { cache_path_var }
+        }
+    }
+
+    impl Drop for EnvGuard {
+        fn drop(&mut self) {
+            // Clear test URL override
+            super::sync::set_test_models_dev_api_url(None);
+
+            // Restore or remove cache path env var
+            match &self.cache_path_var {
+                Some(v) => std::env::set_var(CACHE_PATH_ENV_VAR, v),
+                None => std::env::remove_var(CACHE_PATH_ENV_VAR),
+            }
+        }
+    }
+
+    use super::test_utils::{sample_api_json, start_mock_server, MockResponse};
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    #[serial_test::serial]
+    async fn facade_load_uses_shared_cache_path() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("facade-test.cache");
+        let _guard = EnvGuard::new(Some(cache_path.to_str().unwrap()));
+
+        // Start mock server and set URL override
+        let body = String::from_utf8_lossy(sample_api_json()).to_string();
+        let (_handle, url) = start_mock_server(MockResponse::Ok {
+            etag: "\"facade-test-etag\"",
+            body,
+        });
+        super::sync::set_test_models_dev_api_url(Some(url));
+
+        // Call public facade
+        let result = ModelsDevCatalog::load().await.expect("load should succeed");
+
+        assert_eq!(result.source, CatalogLoadSource::Downloaded);
+        let provider = result
+            .catalog
+            .lookup_provider("openai")
+            .expect("openai provider should exist");
+        assert_eq!(provider.api_type, ProviderType::OpenAiCompletions);
+
+        // Verify cache was written
+        assert!(
+            cache_path.exists(),
+            "cache file should exist at shared path"
+        );
     }
 }
diff --git a/src/llm-coding-tools-models-dev/src/catalog/sync.rs b/src/llm-coding-tools-models-dev/src/catalog/sync.rs
new file mode 100644
index 00000000..8d2b7cc5
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/catalog/sync.rs
@@ -0,0 +1,210 @@
+use crate::api::catalog_sources::cache_payload_from_api_json_bytes;
+use crate::cache::format::{read_cache_file, write_cache_file, CacheWriteInput};
+use crate::cache::payload::{catalog_from_cache_payload, encode_cache_payload};
+use crate::catalog::load_cache::load_catalog_from_cache_file_data;
+use crate::catalog::load_result::{CatalogLoadResult, CatalogLoadSource};
+use crate::error::{CatalogError, CatalogResult};
+use reqwest::header::{ETAG, IF_NONE_MATCH};
+use reqwest::StatusCode;
+use std::borrow::Cow;
+use std::io::ErrorKind;
+use std::path::Path;
+
+const MODELS_DEV_API_URL: &str = "https://models.dev/api.json";
+
+#[cfg(test)]
+static TEST_MODELS_DEV_API_URL: std::sync::Mutex<Option<String>> = std::sync::Mutex::new(None);
+
+#[cfg(test)]
+pub(crate) fn set_test_models_dev_api_url(url: Option<String>) {
+    *TEST_MODELS_DEV_API_URL.lock().unwrap() = url;
+}
+
+fn models_dev_api_url() -> Cow<'static, str> {
+    #[cfg(test)]
+    if let Some(url) = TEST_MODELS_DEV_API_URL.lock().unwrap().clone() {
+        return Cow::Owned(url);
+    }
+
+    Cow::Borrowed(MODELS_DEV_API_URL)
+}
+
+#[maybe_async::maybe_async]
+pub(crate) async fn load_catalog_at_path(path: &Path) -> CatalogResult<CatalogLoadResult> {
+    let url = models_dev_api_url();
+    load_catalog_from_url(path, url.as_ref()).await
+}
+
+#[maybe_async::maybe_async]
+pub(crate) async fn load_catalog_from_url(
+    path: &Path,
+    url: &str,
+) -> CatalogResult<CatalogLoadResult> {
+    let mut cache_file = None;
+    let mut cache_error = None;
+    match read_cache_file(path).await {
+        Ok(file) => cache_file = Some(file),
+        Err(CatalogError::Io(error)) if error.kind() == ErrorKind::NotFound => {}
+        Err(error) => cache_error = Some(error),
+    }
+
+    #[cfg(feature = "tokio")]
+    let client = reqwest::Client::new();
+    #[cfg(feature = "blocking")]
+    let client = reqwest::blocking::Client::new();
+
+    let mut request = client.get(url);
+    if let Some(etag) = cache_file.as_ref().and_then(|file| file.etag_bytes()) {
+        request = request.header(IF_NONE_MATCH, etag);
+    }
+
+    let response = request.send().await?;
+    match response.status() {
+        StatusCode::OK => {
+            let response_etag: Option<Vec<u8>> = response
+                .headers()
+                .get(ETAG)
+                .map(|value| value.as_bytes().to_vec());
+            let body = response.bytes().await?;
+            let payload = cache_payload_from_api_json_bytes(body.as_ref())?;
+            let payload_encoded = encode_cache_payload(&payload);
+            let catalog = catalog_from_cache_payload(payload)?;
+            let payload_compressed =
+                zstd::bulk::compress(payload_encoded.as_slice(), zstd::DEFAULT_COMPRESSION_LEVEL)
+                    .map_err(|error| CatalogError::Zstd(error.to_string()))?;
+
+            write_cache_file(
+                path,
+                &CacheWriteInput {
+                    etag: response_etag.as_deref(),
+                    payload_compressed: &payload_compressed,
+                    payload_len_decompressed: payload_encoded.len(),
+                },
+            )
+            .await?;
+
+            Ok(CatalogLoadResult {
+                catalog,
+                source: CatalogLoadSource::Downloaded,
+            })
+        }
+        StatusCode::NOT_MODIFIED => {
+            if let Some(cache_file) = cache_file.as_ref() {
+                load_catalog_from_cache_file_data(cache_file, CatalogLoadSource::NotModifiedCache)
+            } else if let Some(error) = cache_error {
+                Err(error)
+            } else {
+                Err(CatalogError::CacheFormat(
+                    "received 304 but no cached payload is available",
+                ))
+            }
+        }
+        status => Err(CatalogError::Configuration(format!(
+            "unexpected catalog sync status: {status}",
+        ))),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::super::test_utils::{sample_api_json, start_mock_server, MockResponse};
+    use super::*;
+    use crate::cache::format::CacheWriteInput;
+    use crate::cache::payload::{
+        encode_cache_payload, CachedModelRow, CachedProviderRow, CatalogCachePayload,
+    };
+    use llm_coding_tools_core::models::{Modality, ProviderIdx, ProviderType};
+    use tempfile::TempDir;
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn sync_returns_downloaded_on_200() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("test.cache");
+
+        // Start mock server returning 200 OK with fresh catalog data
+        let body = String::from_utf8_lossy(sample_api_json()).to_string();
+        let (_handle, url) = start_mock_server(MockResponse::Ok {
+            etag: "\"test-etag-123\"",
+            body,
+        });
+
+        let result = load_catalog_from_url(&cache_path, &url)
+            .await
+            .expect("sync should succeed");
+
+        // Verify source is Downloaded (not from cache)
+        assert_eq!(result.source, CatalogLoadSource::Downloaded);
+        let provider = result
+            .catalog
+            .lookup_provider("openai")
+            .expect("openai provider should exist");
+        assert_eq!(provider.api_type, ProviderType::OpenAiCompletions);
+        assert_eq!(provider.api_url, "https://api.openai.com/v1");
+
+        // Verify cache file was written with the ETag from response
+        let cache_file = read_cache_file(&cache_path)
+            .await
+            .expect("cache should exist");
+        assert_eq!(
+            cache_file.etag_bytes(),
+            Some(b"\"test-etag-123\"".as_slice())
+        );
+    }
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn sync_returns_cached_on_304_with_if_none_match() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("test.cache");
+
+        // Pre-seed cache with a valid catalog payload
+        let payload = CatalogCachePayload {
+            providers: vec![CachedProviderRow {
+                provider_key: "openai".to_string(),
+                api_url: "https://api.openai.com/v1".to_string(),
+                env_vars: vec!["OPENAI_API_KEY".to_string()],
+                api_type: ProviderType::OpenAiCompletions,
+            }],
+            models: vec![CachedModelRow {
+                provider_idx: ProviderIdx::new(0),
+                model_key: "gpt-4".to_string(),
+                modalities_bits: Modality::TEXT.bits(),
+                max_input: 8192,
+                max_output: 4096,
+                temperature: None,
+                top_p: None,
+            }],
+        };
+        let encoded = encode_cache_payload(&payload);
+        let compressed =
+            zstd::bulk::compress(&encoded, zstd::DEFAULT_COMPRESSION_LEVEL).expect("compress");
+
+        // Write the seeded cache file with ETag
+        crate::cache::format::write_cache_file(
+            &cache_path,
+            &CacheWriteInput {
+                etag: Some(b"\"cached-etag-456\""),
+                payload_compressed: &compressed,
+                payload_len_decompressed: encoded.len(),
+            },
+        )
+        .await
+        .expect("seed cache");
+
+        // Server returns 304 Not Modified (ETag matches If-None-Match)
+        let (_handle, url) = start_mock_server(MockResponse::NotModified {
+            etag: "\"cached-etag-456\"",
+        });
+
+        let result = load_catalog_from_url(&cache_path, &url)
+            .await
+            .expect("sync should succeed");
+
+        // Verify source is NotModifiedCache (loaded from local file)
+        assert_eq!(result.source, CatalogLoadSource::NotModifiedCache);
+        let provider = result
+            .catalog
+            .lookup_provider("openai")
+            .expect("openai provider should exist");
+        assert_eq!(provider.api_type, ProviderType::OpenAiCompletions);
+    }
+}
diff --git a/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs b/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs
new file mode 100644
index 00000000..c6bec11b
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs
@@ -0,0 +1,77 @@
+use std::io::{BufRead, Write};
+
+pub enum MockResponse {
+    Ok { etag: &'static str, body: String },
+    NotModified { etag: &'static str },
+}
+
+pub fn sample_api_json() -> &'static [u8] {
+    br#"
+        {
+            "openai": {
+                "id": "openai",
+                "npm": "@ai-sdk/openai",
+                "api": "https://api.openai.com/v1",
+                "env": ["OPENAI_API_KEY"],
+                "models": {
+                    "gpt-4": {
+                        "modalities": {
+                            "input": ["text"],
+                            "output": ["text"]
+                        },
+                        "limit": {
+                            "context": 8192,
+                            "input": 8192,
+                            "output": 4096
+                        }
+                    }
+                }
+            }
+        }
+        "#
+}
+
+pub fn start_mock_server(response: MockResponse) -> (std::thread::JoinHandle<()>, String) {
+    let listener = std::net::TcpListener::bind("127.0.0.1:0").expect("bind");
+    let port = listener.local_addr().unwrap().port();
+    let url = format!("http://127.0.0.1:{}/api.json", port);
+
+    let handle = std::thread::spawn(move || {
+        let (mut stream, _) = listener.accept().expect("accept");
+        let mut reader = std::io::BufReader::new(&stream);
+        let mut request = String::new();
+
+        loop {
+            let mut line = String::new();
+            if reader.read_line(&mut line).expect("read line") == 0 {
+                break;
+            }
+            if line == "\r\n" || line.is_empty() {
+                break;
+            }
+            request.push_str(&line);
+        }
+
+        let _has_if_none_match = request.contains("If-None-Match");
+
+        match response {
+            MockResponse::Ok { etag, body } => {
+                let response = format!(
+                    "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nETag: {}\r\nContent-Length: {}\r\n\r\n{}",
+                    etag,
+                    body.len(),
+                    body
+                );
+                stream.write_all(response.as_bytes()).expect("write");
+                stream.flush().expect("flush");
+            }
+            MockResponse::NotModified { etag } => {
+                let response = format!("HTTP/1.1 304 Not Modified\r\nETag: {}\r\n\r\n", etag);
+                stream.write_all(response.as_bytes()).expect("write");
+                stream.flush().expect("flush");
+            }
+        }
+    });
+
+    (handle, url)
+}
diff --git a/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs
index 5bfbc808..d35ffbec 100644
--- a/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs
+++ b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs
@@ -32,3 +32,9 @@ pub(crate) fn write(path: impl AsRef<Path>, bytes: &[u8]) -> std::io::Result<()>
 pub(crate) fn create_dir_all(path: impl AsRef<Path>) -> std::io::Result<()> {
     std::fs::create_dir_all(path)
 }
+
+/// Renames a file, replacing the destination if it exists.
+#[inline]
+pub(crate) fn rename(from: impl AsRef<Path>, to: impl AsRef<Path>) -> std::io::Result<()> {
+    std::fs::rename(from, to)
+}
diff --git a/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs
index 53474ce1..830d29e9 100644
--- a/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs
+++ b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs
@@ -33,3 +33,9 @@ pub(crate) async fn write(path: impl AsRef<Path>, bytes: &[u8]) -> std::io::Resu
 pub(crate) async fn create_dir_all(path: impl AsRef<Path>) -> std::io::Result<()> {
     tokio::fs::create_dir_all(path).await
 }
+
+/// Renames a file, replacing the destination if it exists.
+#[inline]
+pub(crate) async fn rename(from: impl AsRef<Path>, to: impl AsRef<Path>) -> std::io::Result<()> {
+    tokio::fs::rename(from, to).await
+}

From 2929b8ee193b937226dedc6943c21fc0556162f6 Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Sat, 7 Mar 2026 00:01:17 +0000
Subject: [PATCH 07/22] Changed: Replace ProviderModelSource.provider_key with
 ProviderIdx

Replace string-based provider lookup with index-based lookup in the
catalog builder hot path, moving the lookup cost out of the builder.

Changes:
- ProviderModelSource now stores provider_idx: ProviderIdx instead of provider_key: &'a str
- Builder validates provider existence via index bounds check instead of HashMap lookup
- Duplicate detection uses (ProviderIdx, &str) instead of (&str, &str)
- Renamed ProviderKeyNotFoundForModel error to ProviderIdxOutOfRangeForModel
- Updated benchmark to pass ProviderIdx directly
- Updated models-dev cache payload to use stored provider_idx directly

Benefits:
- Eliminates per-model HashMap lookup in builder hot path
- ~16-20% faster catalog construction (measured via benchmark)
- Simpler validation logic using slice bounds checking
---
 .../benches/model_catalog_builder.rs          |  13 +-
 .../src/models/catalog/internal/builder.rs    | 121 +++++++++---------
 .../src/models/catalog/mod.rs                 |  17 ++-
 .../models/catalog/public/builder_types.rs    |  42 +++---
 .../src/cache/payload.rs                      |  18 +--
 5 files changed, 116 insertions(+), 95 deletions(-)

diff --git a/src/llm-coding-tools-core/benches/model_catalog_builder.rs b/src/llm-coding-tools-core/benches/model_catalog_builder.rs
index b9fcdd7d..d772ec34 100644
--- a/src/llm-coding-tools-core/benches/model_catalog_builder.rs
+++ b/src/llm-coding-tools-core/benches/model_catalog_builder.rs
@@ -3,12 +3,12 @@
 use core::hint::black_box;
 use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
 use llm_coding_tools_core::models::{
-    Modality, ModelCatalog, ModelInfo, ProviderInfo, ProviderModelSource, ProviderSource,
-    ProviderType,
+    Modality, ModelCatalog, ModelInfo, ProviderIdx, ProviderInfo, ProviderModelSource,
+    ProviderSource, ProviderType,
 };
 
 struct ProviderModelSpec {
-    provider_idx: usize,
+    provider_idx: ProviderIdx,
     model_key: String,
     model: ModelInfo,
 }
@@ -22,11 +22,8 @@ impl Dataset {
     fn provider_model_sources(&self) -> Vec<ProviderModelSource<'_>> {
         let mut sources = Vec::with_capacity(self.provider_models.len());
         for provider_model in &self.provider_models {
-            let provider_key = self.providers[provider_model.provider_idx]
-                .provider_key
-                .as_str();
             sources.push(ProviderModelSource::new(
-                provider_key,
+                provider_model.provider_idx,
                 provider_model.model_key.as_str(),
                 provider_model.model,
             ));
@@ -57,7 +54,7 @@ fn make_dataset(provider_count: usize, model_count: usize) -> Dataset {
     let mut provider_models = Vec::with_capacity(model_count);
     let unique_cfg_count = (model_count / 5).max(1);
     for i in 0..model_count {
-        let provider_idx = i % provider_count;
+        let provider_idx = ProviderIdx::new((i % provider_count) as u16);
         let cfg = i % unique_cfg_count;
         let temperature = if (cfg & 1) == 0 {
             Some(1.0 + ((cfg % 5000) as f32 * 0.001))
diff --git a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs
index 02a412d4..d00b333c 100644
--- a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs
@@ -93,59 +93,50 @@ fn populate_tables_once(
     provider_models: &[ProviderModelSource<'_>],
 ) -> Result<(), ModelCatalogBuildError> {
     let mut env_start: u16 = 0;
-    let mut provider_idx_by_key: AHashMap<&str, ProviderIdx> =
-        AHashMap::with_capacity(providers.len());
-    let mut seen_provider_models: AHashSet<(&str, &str)> =
+    let mut seen_provider_keys: AHashSet<&str> = AHashSet::with_capacity(providers.len());
+    let mut seen_provider_models: AHashSet<(ProviderIdx, &str)> =
         AHashSet::with_capacity(provider_models.len());
 
     for provider in providers {
         let provider_info = &provider.provider;
         let env_count = provider_info.env_vars.len() as u8;
 
-        match provider_idx_by_key.entry(provider.provider_key.as_str()) {
-            MapEntry::Occupied(_) => {
-                return Err(ModelCatalogBuildError::DuplicateKey {
-                    table: LookupTableKind::Provider,
-                    key: provider.provider_key.clone(),
-                });
-            }
-            MapEntry::Vacant(e) => {
-                let provider_idx = insert_provider(
-                    state,
-                    &provider.provider_key,
-                    env_start,
-                    env_count,
-                    provider_info.api_type,
-                )?;
-                e.insert(provider_idx);
-            }
+        if !seen_provider_keys.insert(provider.provider_key.as_str()) {
+            return Err(ModelCatalogBuildError::DuplicateKey {
+                table: LookupTableKind::Provider,
+                key: provider.provider_key.clone(),
+            });
         }
 
+        insert_provider(
+            state,
+            &provider.provider_key,
+            env_start,
+            env_count,
+            provider_info.api_type,
+        )?;
+
         // SAFETY: analyze_provider_sources bounds env_start and env_count (<= 3).
         env_start += u16::from(env_count);
     }
 
     for provider_model in provider_models {
-        // Validate provider exists before inserting model.
-        if !provider_idx_by_key.contains_key(provider_model.provider_key) {
-            return Err(ModelCatalogBuildError::ProviderKeyNotFoundForModel {
-                provider_key: provider_model.provider_key.to_owned(),
+        let provider = providers
+            .get(provider_model.provider_idx.as_usize())
+            .ok_or(ModelCatalogBuildError::ProviderIdxOutOfRangeForModel {
+                provider_idx: provider_model.provider_idx,
                 model_key: provider_model.model_key.to_owned(),
-            });
-        }
+            })?;
 
-        // Check for duplicate (provider_key, model_key) pair.
-        let key = (provider_model.provider_key, provider_model.model_key);
+        // Check for duplicate (provider_idx, model_key) pair.
+        let key = (provider_model.provider_idx, provider_model.model_key);
         if !seen_provider_models.insert(key) {
             return Err(ModelCatalogBuildError::DuplicateKey {
                 table: LookupTableKind::ProviderModel,
-                key: format!(
-                    "{}/{}",
-                    provider_model.provider_key, provider_model.model_key
-                ),
+                key: format!("{}/{}", provider.provider_key, provider_model.model_key),
             });
         }
-        insert_provider_model(state, provider_model)?;
+        insert_provider_model(state, provider.provider_key.as_str(), provider_model)?;
     }
 
     Ok(())
@@ -194,6 +185,7 @@ fn insert_provider(
 #[inline]
 fn insert_provider_model(
     state: &mut BuildState,
+    provider_key: &str,
     provider_model: &ProviderModelSource<'_>,
 ) -> Result<(), ModelCatalogBuildError> {
     let info = provider_model.model;
@@ -235,11 +227,7 @@ fn insert_provider_model(
         }
     };
 
-    let key = hash_provider_model_key(
-        &state.hash_state,
-        provider_model.provider_key,
-        provider_model.model_key,
-    );
+    let key = hash_provider_model_key(&state.hash_state, provider_key, provider_model.model_key);
     let hash48 = PackedProviderModelTableEntry::truncate_hash48(key.as_u64());
 
     // Insert provider-model entry.
@@ -424,7 +412,7 @@ fn build_provider_env_key_table(
 mod tests {
     use super::build_from_source;
     use crate::models::catalog::{
-        LookupTableKind, Modality, ModelCatalogBuildError, ModelInfo, ProviderInfo,
+        LookupTableKind, Modality, ModelCatalogBuildError, ModelInfo, ProviderIdx, ProviderInfo,
         ProviderModelSource, ProviderSource,
     };
     use crate::models::ProviderType;
@@ -452,11 +440,11 @@ mod tests {
     }
 
     fn provider_model_source<'a>(
-        provider_key: &'a str,
+        provider_idx: ProviderIdx,
         model_key: &'a str,
         model: ModelInfo,
     ) -> ProviderModelSource<'a> {
-        ProviderModelSource::new(provider_key, model_key, model)
+        ProviderModelSource::new(provider_idx, model_key, model)
     }
 
     fn test_sources() -> (Vec<ProviderSource>, Vec<ProviderModelSource<'static>>) {
@@ -469,7 +457,11 @@ mod tests {
                     ProviderType::OpenAiCompletions,
                 ),
             )],
-            vec![provider_model_source("alpha", "m1", info(4096, 512))],
+            vec![provider_model_source(
+                ProviderIdx::new(0),
+                "m1",
+                info(4096, 512),
+            )],
         )
     }
 
@@ -496,7 +488,11 @@ mod tests {
                 provider("https://beta.example", &["BETA_KEY"], ProviderType::Azure),
             ),
         ];
-        let provider_models = vec![provider_model_source("alpha", "m1", info(4096, 512))];
+        let provider_models = vec![provider_model_source(
+            ProviderIdx::new(0),
+            "m1",
+            info(4096, 512),
+        )];
 
         match build_from_source(&providers, &provider_models) {
             Err(err) => {
@@ -519,8 +515,8 @@ mod tests {
             provider("https://alpha.example", &["ALPHA_KEY"], ProviderType::Azure),
         )];
         let provider_models = vec![
-            provider_model_source("alpha", "m1", info(4096, 512)),
-            provider_model_source("alpha", "m1", info(4096, 512)),
+            provider_model_source(ProviderIdx::new(0), "m1", info(4096, 512)),
+            provider_model_source(ProviderIdx::new(0), "m1", info(4096, 512)),
         ];
 
         match build_from_source(&providers, &provider_models) {
@@ -551,7 +547,7 @@ mod tests {
         ];
         let provider_models = vec![
             provider_model_source(
-                "alpha",
+                ProviderIdx::new(0),
                 "m1",
                 ModelInfo {
                     modalities: Modality::TEXT,
@@ -562,7 +558,7 @@ mod tests {
                 },
             ),
             provider_model_source(
-                "beta",
+                ProviderIdx::new(1),
                 "m1",
                 ModelInfo {
                     modalities: Modality::TEXT,
@@ -589,14 +585,18 @@ mod tests {
             "alpha",
             provider("https://alpha.example", &["ALPHA_KEY"], ProviderType::Azure),
         )];
-        let provider_models = vec![provider_model_source("beta", "m1", info(4096, 512))];
+        let provider_models = vec![provider_model_source(
+            ProviderIdx::new(1),
+            "m1",
+            info(4096, 512),
+        )];
 
         match build_from_source(&providers, &provider_models) {
             Err(err) => {
                 assert_eq!(
                     err,
-                    ModelCatalogBuildError::ProviderKeyNotFoundForModel {
-                        provider_key: "beta".to_string(),
+                    ModelCatalogBuildError::ProviderIdxOutOfRangeForModel {
+                        provider_idx: ProviderIdx::new(1),
                         model_key: "m1".to_string(),
                     }
                 );
@@ -615,7 +615,11 @@ mod tests {
                 ProviderType::Azure,
             ),
         )];
-        let provider_models = vec![provider_model_source("alpha", "m1", info(4096, 512))];
+        let provider_models = vec![provider_model_source(
+            ProviderIdx::new(0),
+            "m1",
+            info(4096, 512),
+        )];
 
         match build_from_source(&providers, &provider_models) {
             Err(err) => {
@@ -636,7 +640,7 @@ mod tests {
         let (providers, _) = test_sources();
         let max_output = super::MAX_OUTPUT_TOKENS;
         let provider_models = vec![provider_model_source(
-            "alpha",
+            ProviderIdx::new(0),
             "m1",
             info(4096, max_output.saturating_add(1)),
         )];
@@ -660,7 +664,7 @@ mod tests {
         let (providers, _) = test_sources();
         let max_input = super::MAX_INPUT_TOKENS;
         let provider_models = vec![provider_model_source(
-            "alpha",
+            ProviderIdx::new(0),
             "m1",
             info(max_input.saturating_add(1), 512),
         )];
@@ -695,7 +699,11 @@ mod tests {
             ));
         }
         let mut provider_models = Vec::with_capacity(1);
-        provider_models.push(provider_model_source("provider_0", "m1", info(4096, 512)));
+        provider_models.push(provider_model_source(
+            ProviderIdx::new(0),
+            "m1",
+            info(4096, 512),
+        ));
 
         match build_from_source(&providers, &provider_models) {
             Err(err) => {
@@ -726,9 +734,8 @@ mod tests {
                 ),
             ));
         }
-        let last_provider_key = format!("provider_{}", 5461usize);
         let provider_models = vec![ProviderModelSource::new(
-            &last_provider_key,
+            ProviderIdx::new(5461),
             "m1",
             info(4096, 512),
         )];
@@ -736,7 +743,7 @@ mod tests {
         let catalog =
             build_from_source(&providers, &provider_models).expect("boundary case should pass");
         let (provider, _) = catalog
-            .lookup(&last_provider_key, "m1")
+            .lookup("provider_5461", "m1")
             .expect("last provider should be addressable");
 
         assert_eq!(provider.env_vars(), &["VAR1", "VAR2", "VAR3"]);
diff --git a/src/llm-coding-tools-core/src/models/catalog/mod.rs b/src/llm-coding-tools-core/src/models/catalog/mod.rs
index 4d7095c6..d369d843 100644
--- a/src/llm-coding-tools-core/src/models/catalog/mod.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/mod.rs
@@ -27,7 +27,7 @@
 //!
 //! - [`ModelCatalog::build`] - Batch builder entry point
 //! - [`ProviderSource`] - Provider key + metadata input
-//! - [`ProviderModelSource`] - Model key + metadata input for a provider
+//! - [`ProviderModelSource`] - Model key + metadata input keyed by [`ProviderIdx`] and model key
 //! - [`ModelInfo`] - Model metadata input (modalities, token limits, sampling)
 //! - [`ProviderInfo`] - Provider metadata input (API URL, env vars, type)
 //! - [`Modality`] - Content modality flags (text, image, audio, video)
@@ -286,14 +286,15 @@ impl ModelCatalog {
     /// # Parameters
     ///
     /// * `providers` - [`ProviderSource`] values keyed by provider identifier.
-    /// * `provider_models` - [`ProviderModelSource`] values keyed by provider and model.
+    /// * `provider_models` - [`ProviderModelSource`] values keyed by [`ProviderIdx`] and model key.
+    ///   The `provider_idx` must point at an element in the `providers` slice.
     ///
     /// # Errors
     ///
     /// Returns [`ModelCatalogBuildError`] when:
     /// - input exceeds supported numeric limits,
     /// - token limits cannot be represented in packed model entries,
-    /// - provider model sources reference unknown providers,
+    /// - provider model sources reference out-of-range provider indices,
     /// - or all seed-retry attempts still result in collisions.
     #[inline]
     pub fn build(
@@ -518,7 +519,7 @@ impl ModelCatalog {
 mod tests {
     use super::*;
     use crate::models::catalog::{
-        Modality, ModelInfo, ProviderInfo, ProviderModelSource, ProviderSource,
+        Modality, ModelInfo, ProviderIdx, ProviderInfo, ProviderModelSource, ProviderSource,
     };
 
     fn provider(api_url: &str, env_vars: &[&str], api_type: ProviderType) -> ProviderInfo {
@@ -565,7 +566,13 @@ mod tests {
         let provider_model_sources: Vec<ProviderModelSource<'_>> = provider_models
             .into_iter()
             .map(|(provider_key, model_key, info)| {
-                ProviderModelSource::new(provider_key, model_key, info)
+                let provider_idx = ProviderIdx::new(
+                    provider_sources
+                        .iter()
+                        .position(|provider| provider.provider_key == provider_key)
+                        .expect("provider key should exist") as u16,
+                );
+                ProviderModelSource::new(provider_idx, model_key, info)
             })
             .collect();
         ModelCatalog::build(&provider_sources, &provider_model_sources)
diff --git a/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs b/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs
index 28b45227..014b17b6 100644
--- a/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs
@@ -3,6 +3,7 @@
 //! [`ModelCatalog`]: crate::models::catalog::ModelCatalog
 
 use super::Modality;
+use super::ProviderIdx;
 use crate::models::ProviderType;
 use thiserror::Error;
 
@@ -81,14 +82,21 @@ impl From<(String, ProviderInfo)> for ProviderSource {
 /// This wrapper keeps builder input self-documenting and avoids tuple-position
 /// ambiguity at call sites.
 ///
-/// The keys are borrowed because the catalog builder hashes them during
-/// construction and does not retain them afterward. Callers must therefore keep
-/// the referenced strings alive until [`crate::models::catalog::ModelCatalog::build`]
+/// The `model_key` is borrowed because the catalog builder hashes it during
+/// construction and does not retain it afterward. Callers must therefore keep
+/// the referenced string alive until [`crate::models::catalog::ModelCatalog::build`]
 /// returns.
+///
+/// The `provider_idx` must correspond to an entry in the `providers` slice passed
+/// to [`ModelCatalog::build`].
+///
+/// [`ModelCatalog::build`]: crate::models::catalog::ModelCatalog::build
 #[derive(Debug, Clone, PartialEq)]
 pub struct ProviderModelSource<'a> {
-    /// Borrowed provider identifier used by lookups (for example, `"openai"`).
-    pub provider_key: &'a str,
+    /// Index into the `providers` slice passed to [`ModelCatalog::build`].
+    ///
+    /// [`ModelCatalog::build`]: crate::models::catalog::ModelCatalog::build
+    pub provider_idx: ProviderIdx,
     /// Borrowed model identifier used by lookups (for example, `"gpt-4"`).
     pub model_key: &'a str,
     /// Model metadata associated with [`Self::model_key`].
@@ -100,28 +108,30 @@ impl<'a> ProviderModelSource<'a> {
     ///
     /// # Parameters
     ///
-    /// * `provider_key` - Provider identifier used during provider lookup.
+    /// * `provider_idx` - Index into the `providers` slice passed to [`ModelCatalog::build`].
     /// * `model_key` - Model identifier used during model lookup for this provider.
     /// * `model` - Model metadata for this provider model.
     ///
     /// # Returns
     ///
     /// A new [`ProviderModelSource`].
+    ///
+    /// [`ModelCatalog::build`]: crate::models::catalog::ModelCatalog::build
     #[inline]
-    pub fn new(provider_key: &'a str, model_key: &'a str, model: ModelInfo) -> Self {
+    pub fn new(provider_idx: ProviderIdx, model_key: &'a str, model: ModelInfo) -> Self {
         Self {
-            provider_key,
+            provider_idx,
             model_key,
             model,
         }
     }
 }
 
-impl<'a> From<(&'a str, &'a str, ModelInfo)> for ProviderModelSource<'a> {
+impl<'a> From<(ProviderIdx, &'a str, ModelInfo)> for ProviderModelSource<'a> {
     #[inline]
-    fn from((provider_key, model_key, model): (&'a str, &'a str, ModelInfo)) -> Self {
+    fn from((provider_idx, model_key, model): (ProviderIdx, &'a str, ModelInfo)) -> Self {
         Self {
-            provider_key,
+            provider_idx,
             model_key,
             model,
         }
@@ -173,11 +183,11 @@ pub enum ModelCatalogBuildError {
         /// Maximum supported env vars for one provider.
         max: usize,
     },
-    /// A provider model source references a provider key that does not exist.
-    #[error("provider model source references unknown provider_key={provider_key:?} for model_key={model_key:?}")]
-    ProviderKeyNotFoundForModel {
-        /// Provider key from the provider model source.
-        provider_key: String,
+    /// A provider model source references a provider index that does not exist.
+    #[error("provider model source references out-of-range provider_idx={} for model_key={model_key:?}", provider_idx.as_usize())]
+    ProviderIdxOutOfRangeForModel {
+        /// Provider index from the provider model source.
+        provider_idx: ProviderIdx,
         /// Model key from the provider model source.
         model_key: String,
     },
diff --git a/src/llm-coding-tools-models-dev/src/cache/payload.rs b/src/llm-coding-tools-models-dev/src/cache/payload.rs
index bf5bf25a..d86d7caf 100644
--- a/src/llm-coding-tools-models-dev/src/cache/payload.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/payload.rs
@@ -56,15 +56,8 @@ pub(crate) fn catalog_from_cache_payload(
 
     let mut model_sources = Vec::with_capacity(models.len());
     for row in &models {
-        let provider_source =
-            provider_sources
-                .get(row.provider_idx.as_usize())
-                .ok_or(CatalogError::CacheFormat(
-                    "provider index out of range in cache payload",
-                ))?;
-
         model_sources.push(ProviderModelSource {
-            provider_key: provider_source.provider_key.as_str(),
+            provider_idx: row.provider_idx,
             model_key: row.model_key.as_str(),
             model: ModelInfo {
                 modalities: Modality::from_bits_retain(row.modalities_bits),
@@ -138,6 +131,8 @@ mod tests {
 
     #[test]
     fn catalog_from_payload_rejects_out_of_range_provider_idx() {
+        use llm_coding_tools_core::models::ModelCatalogBuildError;
+
         let payload = CatalogCachePayload {
             providers: vec![CachedProviderRow {
                 provider_key: "test".to_string(),
@@ -157,7 +152,12 @@ mod tests {
         };
 
         let result = catalog_from_cache_payload(payload);
-        assert!(matches!(result, Err(CatalogError::CacheFormat(_))));
+        assert!(matches!(
+            result,
+            Err(CatalogError::ModelCatalogBuild(
+                ModelCatalogBuildError::ProviderIdxOutOfRangeForModel { .. }
+            ))
+        ));
     }
 
     #[test]

From 8b84247a662e38bd29bf05a2dfa4b7c6121a3a75 Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Fri, 6 Mar 2026 21:10:55 +0000
Subject: [PATCH 08/22] Added: Implicit fallback behavior for models.dev
 catalog loading

Implements cache-based fallback when network requests fail, improving
catalog loading reliability for offline or intermittent connectivity.

Changes:
- Added `load_after_request_failure` helper to handle network failures
- Modified `load_catalog_from_url` to catch send() failures and attempt cache fallback
- Added test: sync_returns_fallback_cache_on_request_failure_with_valid_cache
- Added test: sync_returns_request_error_when_request_fails_without_cache
- Added test: sync_returns_cache_error_when_request_fails_with_corrupt_cache

Benefits:
- On transient network/contact failure, load cache if available
- CatalogLoadSource populated accurately (FallbackCache)
- Automated tests for fallback success and no-cache failure
- All 31 tests pass in both tokio and blocking modes
---
 .../src/catalog/sync.rs                       | 100 +++++++++++++++++-
 1 file changed, 98 insertions(+), 2 deletions(-)

diff --git a/src/llm-coding-tools-models-dev/src/catalog/sync.rs b/src/llm-coding-tools-models-dev/src/catalog/sync.rs
index 8d2b7cc5..49b32098 100644
--- a/src/llm-coding-tools-models-dev/src/catalog/sync.rs
+++ b/src/llm-coding-tools-models-dev/src/catalog/sync.rs
@@ -1,5 +1,5 @@
 use crate::api::catalog_sources::cache_payload_from_api_json_bytes;
-use crate::cache::format::{read_cache_file, write_cache_file, CacheWriteInput};
+use crate::cache::format::{read_cache_file, write_cache_file, CacheFileData, CacheWriteInput};
 use crate::cache::payload::{catalog_from_cache_payload, encode_cache_payload};
 use crate::catalog::load_cache::load_catalog_from_cache_file_data;
 use crate::catalog::load_result::{CatalogLoadResult, CatalogLoadSource};
@@ -29,6 +29,22 @@ fn models_dev_api_url() -> Cow<'static, str> {
     Cow::Borrowed(MODELS_DEV_API_URL)
 }
 
+fn load_after_request_failure(
+    request_error: reqwest::Error,
+    cache_file: Option<&CacheFileData>,
+    cache_error: Option<CatalogError>,
+) -> CatalogResult<CatalogLoadResult> {
+    if let Some(cache_file) = cache_file {
+        return load_catalog_from_cache_file_data(cache_file, CatalogLoadSource::FallbackCache);
+    }
+
+    if let Some(cache_error) = cache_error {
+        return Err(cache_error);
+    }
+
+    Err(CatalogError::Reqwest(request_error))
+}
+
 #[maybe_async::maybe_async]
 pub(crate) async fn load_catalog_at_path(path: &Path) -> CatalogResult<CatalogLoadResult> {
     let url = models_dev_api_url();
@@ -58,7 +74,12 @@ pub(crate) async fn load_catalog_from_url(
         request = request.header(IF_NONE_MATCH, etag);
     }
 
-    let response = request.send().await?;
+    let response = match request.send().await {
+        Ok(response) => response,
+        Err(error) => {
+            return load_after_request_failure(error, cache_file.as_ref(), cache_error);
+        }
+    };
     match response.status() {
         StatusCode::OK => {
             let response_etag: Option<Vec<u8>> = response
@@ -207,4 +228,79 @@ mod tests {
             .expect("openai provider should exist");
         assert_eq!(provider.api_type, ProviderType::OpenAiCompletions);
     }
+
+    fn refused_local_url() -> String {
+        let listener = std::net::TcpListener::bind("127.0.0.1:0").expect("bind");
+        let port = listener.local_addr().expect("local addr").port();
+        drop(listener);
+        format!("http://127.0.0.1:{port}/api.json")
+    }
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn sync_returns_fallback_cache_on_request_failure_with_valid_cache() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("test.cache");
+
+        let payload = CatalogCachePayload {
+            providers: vec![CachedProviderRow {
+                provider_key: "openai".to_string(),
+                api_url: "https://api.openai.com/v1".to_string(),
+                env_vars: vec!["OPENAI_API_KEY".to_string()],
+                api_type: ProviderType::OpenAiCompletions,
+            }],
+            models: vec![CachedModelRow {
+                provider_idx: ProviderIdx::new(0),
+                model_key: "gpt-4".to_string(),
+                modalities_bits: Modality::TEXT.bits(),
+                max_input: 8192,
+                max_output: 4096,
+                temperature: None,
+                top_p: None,
+            }],
+        };
+        let encoded = encode_cache_payload(&payload);
+        let compressed =
+            zstd::bulk::compress(&encoded, zstd::DEFAULT_COMPRESSION_LEVEL).expect("compress");
+        crate::cache::format::write_cache_file(
+            &cache_path,
+            &CacheWriteInput {
+                etag: Some(b"\"cached-etag-456\""),
+                payload_compressed: &compressed,
+                payload_len_decompressed: encoded.len(),
+            },
+        )
+        .await
+        .expect("seed cache");
+
+        let result = load_catalog_from_url(&cache_path, &refused_local_url())
+            .await
+            .expect("fallback should succeed");
+
+        assert_eq!(result.source, CatalogLoadSource::FallbackCache);
+        assert!(result.catalog.lookup_provider("openai").is_some());
+    }
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn sync_returns_request_error_when_request_fails_without_cache() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("missing.cache");
+
+        match load_catalog_from_url(&cache_path, &refused_local_url()).await {
+            Err(error) => assert!(matches!(error, CatalogError::Reqwest(_))),
+            Ok(_) => panic!("request failure without cache should error"),
+        }
+    }
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn sync_returns_cache_error_when_request_fails_with_corrupt_cache() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("corrupt.cache");
+
+        std::fs::write(&cache_path, [0_u8; 11]).expect("write corrupt cache");
+
+        match load_catalog_from_url(&cache_path, &refused_local_url()).await {
+            Err(error) => assert!(matches!(error, CatalogError::CacheFormat(_))),
+            Ok(_) => panic!("request failure with corrupt cache should error"),
+        }
+    }
 }

From 1e5497afc2c57aad1e058a00d54dc5efad8f5d88 Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Sat, 7 Mar 2026 00:23:25 +0000
Subject: [PATCH 09/22] Changed: Optimize zstd compression level to 17 for
 catalog caching

Benchmarked compression ratios and times across all zstd levels (0-22)
using a models.dev api.json snapshot. Level 17 provides a good balance
of compression ratio (1.84% of JSON, ~27ms) vs higher levels with
diminishing returns (level 22 takes 207ms for only marginally better
compression).

Changes:
- Use zstd level 17 for production cache compression (was default ~3)
- Use zstd level 1 in tests for faster execution
- Add compression benchmark table to payload.rs module docs

Benefits:
- Smaller cache files (~22% smaller than level 3)
- Reasonable compression time (~27ms for 1.26 MB JSON)
- Documented tradeoffs for future reference
---
 .../src/cache/payload.rs                      | 38 +++++++++++++++++++
 .../src/catalog/load_cache.rs                 |  2 +-
 .../src/catalog/sync.rs                       |  6 +--
 3 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/src/llm-coding-tools-models-dev/src/cache/payload.rs b/src/llm-coding-tools-models-dev/src/cache/payload.rs
index d86d7caf..05644c50 100644
--- a/src/llm-coding-tools-models-dev/src/cache/payload.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/payload.rs
@@ -1,3 +1,41 @@
+//! Cache payload serialization for models.dev catalog data.
+//!
+//! ## Compression Benchmark
+//!
+//! Using a 1.26 MB `api.json` snapshot (models.dev), converted to bitcode
+//! then compressed with zstd at various levels:
+//!
+//! | Level          | Size      | % of JSON | Time    |
+//! |----------------|-----------|-----------|---------|
+//! | JSON           | 1260.7 KB | 100.00%   | -       |
+//! | (raw bitcode)  | 105.7 KB  | 8.39%     | -       |
+//! | 0              | 29.7 KB   | 2.36%     | 1.4ms   |
+//! | 1              | 32.1 KB   | 2.55%     | 1.0ms   |
+//! | 2              | 31.7 KB   | 2.51%     | 1.0ms   |
+//! | 3              | 29.7 KB   | 2.36%     | 1.1ms   |
+//! | 4              | 29.7 KB   | 2.36%     | 1.9ms   |
+//! | 5              | 27.5 KB   | 2.18%     | 2.9ms   |
+//! | 6              | 27.1 KB   | 2.15%     | 3.6ms   |
+//! | 7              | 26.6 KB   | 2.11%     | 4.8ms   |
+//! | 8              | 26.7 KB   | 2.12%     | 5.0ms   |
+//! | 9              | 26.7 KB   | 2.12%     | 6.3ms   |
+//! | 10             | 26.4 KB   | 2.09%     | 9.1ms   |
+//! | 11             | 26.1 KB   | 2.07%     | 8.5ms   |
+//! | 12             | 26.1 KB   | 2.07%     | 14.4ms  |
+//! | 13             | 26.0 KB   | 2.06%     | 12.0ms  |
+//! | 14             | 26.0 KB   | 2.06%     | 16.4ms  |
+//! | 15             | 25.9 KB   | 2.06%     | 21.6ms  |
+//! | 16             | 23.6 KB   | 1.87%     | 24.2ms  |
+//! | 17             | 23.2 KB   | 1.84%     | 27.6ms  |
+//! | 18             | 23.2 KB   | 1.84%     | 42.6ms  |
+//! | 19             | 23.1 KB   | 1.83%     | 81.3ms  |
+//! | 20             | 23.1 KB   | 1.83%     | 96.3ms  |
+//! | 21             | 23.1 KB   | 1.83%     | 125.4ms |
+//! | 22             | 23.1 KB   | 1.83%     | 207.5ms |
+//!
+//! Levels 1-3 offer the best speed/ratio tradeoff (~1ms, ~2.4% of JSON).
+//! Levels 19-22 provide maximal compression but take 80-200ms.
+
 use crate::error::{CatalogError, CatalogResult};
 use llm_coding_tools_core::models::{
     Modality, ModelCatalog, ModelInfo, ProviderIdx, ProviderInfo, ProviderModelSource,
diff --git a/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs
index bc4f79c3..23869d30 100644
--- a/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs
+++ b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs
@@ -59,7 +59,7 @@ mod tests {
         let payload = sample_payload();
         let encoded = encode_cache_payload(&payload);
         let compressed =
-            zstd::bulk::compress(&encoded, zstd::DEFAULT_COMPRESSION_LEVEL).expect("compress");
+            zstd::bulk::compress(&encoded, 1).expect("compress");
 
         write_cache_file(
             &path,
diff --git a/src/llm-coding-tools-models-dev/src/catalog/sync.rs b/src/llm-coding-tools-models-dev/src/catalog/sync.rs
index 49b32098..199f1cce 100644
--- a/src/llm-coding-tools-models-dev/src/catalog/sync.rs
+++ b/src/llm-coding-tools-models-dev/src/catalog/sync.rs
@@ -91,7 +91,7 @@ pub(crate) async fn load_catalog_from_url(
             let payload_encoded = encode_cache_payload(&payload);
             let catalog = catalog_from_cache_payload(payload)?;
             let payload_compressed =
-                zstd::bulk::compress(payload_encoded.as_slice(), zstd::DEFAULT_COMPRESSION_LEVEL)
+                zstd::bulk::compress(payload_encoded.as_slice(), 17)
                     .map_err(|error| CatalogError::Zstd(error.to_string()))?;
 
             write_cache_file(
@@ -197,7 +197,7 @@ mod tests {
         };
         let encoded = encode_cache_payload(&payload);
         let compressed =
-            zstd::bulk::compress(&encoded, zstd::DEFAULT_COMPRESSION_LEVEL).expect("compress");
+            zstd::bulk::compress(&encoded, 1).expect("compress");
 
         // Write the seeded cache file with ETag
         crate::cache::format::write_cache_file(
@@ -260,7 +260,7 @@ mod tests {
         };
         let encoded = encode_cache_payload(&payload);
         let compressed =
-            zstd::bulk::compress(&encoded, zstd::DEFAULT_COMPRESSION_LEVEL).expect("compress");
+            zstd::bulk::compress(&encoded, 1).expect("compress");
         crate::cache::format::write_cache_file(
             &cache_path,
             &CacheWriteInput {

From e7d0eb8b0abec95077add7e79f8aedfcf02e804b Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Sat, 7 Mar 2026 00:31:23 +0000
Subject: [PATCH 10/22] Changed: Document models.dev cache internals and cache
 read behavior

Clarify the internal catalog and cache docs so the sync flow and cache format explain the whole-file read path and why it is the right tradeoff here.

Changes:
- Added module and helper docs across sync, cache loading, and payload modules
- Documented that cache reads load the whole file and referenced typical compressed payload sizes
- Noted that models.dev changes infrequently, so cache hits are common and one sequential read is generally faster

Benefits:
- Makes the models.dev internals easier to follow and maintain
- Preserves the performance rationale for the current cache I/O strategy where it is used
---
 .../src/cache/format.rs                       | 19 +++++++-
 .../src/cache/payload.rs                      | 33 ++++++++++++++
 .../src/catalog/load_cache.rs                 | 13 ++++++
 .../src/catalog/sync.rs                       | 43 +++++++++++++++++++
 4 files changed, 107 insertions(+), 1 deletion(-)

diff --git a/src/llm-coding-tools-models-dev/src/cache/format.rs b/src/llm-coding-tools-models-dev/src/cache/format.rs
index fbfad751..e131810b 100644
--- a/src/llm-coding-tools-models-dev/src/cache/format.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/format.rs
@@ -20,6 +20,14 @@
 //! one pre-sized allocation, then parse/slice into `prelude`, `etag`, and
 //! `payload` views without additional copying.
 //!
+//! ## Performance
+//!
+//! models.dev changes infrequently, so cache hits are expected to be common.
+//! [`crate::cache::payload`] documents typical compressed payload sizes of about
+//! 23-32 kB, which keeps the whole container small enough that a single
+//! sequential read is generally the faster, simpler hot path on modern
+//! NVMe-backed systems.
+//!
 //! ## Safety
 //!
 //! Not a 'safe' parser. We assume the file was created by the user.
@@ -122,10 +130,19 @@ impl CacheFileData {
 
 /// Reads a cache container from disk.
 ///
-/// This reads only the prelude + raw blocks and does not decompress payload.
+/// This reads the entire cache file into memory in one shot, then parses only
+/// the prelude + raw blocks and does not decompress payload.
 /// Compressed payload length is validated against prelude metadata to catch
 /// unexpected truncation or trailing bytes before decode.
 ///
+/// # Performance
+///
+/// This intentionally performs one whole-file read. models.dev changes
+/// infrequently, so cache hits are expected to be common, and
+/// [`crate::cache::payload`] documents typical compressed payload sizes of about
+/// 23-32 kB. That is generally faster in practice than a streaming path while
+/// remaining effectively negligible on modern NVMe-backed systems.
+///
 /// # Errors
 ///
 /// Returns [`CatalogError::CacheFormat`] when the prelude is truncated, when
diff --git a/src/llm-coding-tools-models-dev/src/cache/payload.rs b/src/llm-coding-tools-models-dev/src/cache/payload.rs
index 05644c50..37bc5fd7 100644
--- a/src/llm-coding-tools-models-dev/src/cache/payload.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/payload.rs
@@ -1,5 +1,9 @@
 //! Cache payload serialization for models.dev catalog data.
 //!
+//! The payload is stored as simple owned rows so it can be encoded compactly
+//! with bitcode and rebuilt into a [`ModelCatalog`](llm_coding_tools_core::models::ModelCatalog)
+//! without reparsing the original JSON.
+//!
 //! ## Compression Benchmark
 //!
 //! Using a 1.26 MB `api.json` snapshot (models.dev), converted to bitcode
@@ -42,39 +46,68 @@ use llm_coding_tools_core::models::{
     ProviderSource, ProviderType,
 };
 
+/// Serializable cache representation of the models.dev catalog.
 #[derive(Debug, Clone, PartialEq, bitcode::Encode, bitcode::Decode)]
 pub(crate) struct CatalogCachePayload {
+    /// Provider rows in catalog order.
     pub(crate) providers: Vec<CachedProviderRow>,
+    /// Model rows that reference providers by index.
     pub(crate) models: Vec<CachedModelRow>,
 }
 
+/// Serializable provider row stored in the cache payload.
 #[derive(Debug, Clone, PartialEq, Eq, bitcode::Encode, bitcode::Decode)]
 pub(crate) struct CachedProviderRow {
+    /// Stable provider lookup key.
     pub(crate) provider_key: String,
+    /// Base API URL for requests to this provider.
     pub(crate) api_url: String,
+    /// Environment variables that can supply credentials.
     pub(crate) env_vars: Vec<String>,
+    /// Provider protocol or API shape.
     pub(crate) api_type: ProviderType,
 }
 
+/// Serializable model row stored in the cache payload.
 #[derive(Debug, Clone, PartialEq, bitcode::Encode, bitcode::Decode)]
 pub(crate) struct CachedModelRow {
+    /// Index into [`CatalogCachePayload::providers`].
     pub(crate) provider_idx: ProviderIdx,
+    /// Stable model lookup key within the provider.
     pub(crate) model_key: String,
+    /// Serialized [`Modality`] bitflags.
     pub(crate) modalities_bits: u8,
+    /// Maximum supported input tokens.
     pub(crate) max_input: u32,
+    /// Maximum supported output tokens.
     pub(crate) max_output: u32,
+    /// Optional default temperature.
     pub(crate) temperature: Option<f32>,
+    /// Optional default top-p value.
     pub(crate) top_p: Option<f32>,
 }
 
+/// Encodes a cache payload into bitcode bytes.
 pub(crate) fn encode_cache_payload(payload: &CatalogCachePayload) -> Vec<u8> {
     bitcode::encode(payload)
 }
 
+/// Decodes bitcode bytes into an owned cache payload.
+///
+/// # Errors
+///
+/// Returns [`CatalogError::BitcodeDecode`] when the bytes are not a valid cache
+/// payload encoding.
 pub(crate) fn decode_cache_payload(bytes: &[u8]) -> CatalogResult<CatalogCachePayload> {
     bitcode::decode(bytes).map_err(|error| CatalogError::BitcodeDecode(error.to_string()))
 }
 
+/// Rebuilds a [`ModelCatalog`] from decoded cache rows.
+///
+/// # Errors
+///
+/// Returns [`CatalogError`] when any cached row data cannot be used to build a
+/// valid catalog, such as when a model references an out-of-range provider.
 pub(crate) fn catalog_from_cache_payload(
     payload: CatalogCachePayload,
 ) -> CatalogResult<ModelCatalog> {
diff --git a/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs
index 23869d30..338f6ee5 100644
--- a/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs
+++ b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs
@@ -1,8 +1,21 @@
+//! Loading a model catalog from cached on-disk data.
+//!
+//! This module handles the offline half of catalog loading: it decompresses the
+//! stored payload, decodes the serialized rows, and rebuilds a
+//! [`ModelCatalog`](llm_coding_tools_core::models::ModelCatalog).
+
 use crate::cache::format::CacheFileData;
 use crate::cache::payload::{catalog_from_cache_payload, decode_cache_payload};
 use crate::catalog::load_result::{CatalogLoadResult, CatalogLoadSource};
 use crate::error::{CatalogError, CatalogResult};
 
+/// Decompresses cache file data and rebuilds a catalog from it.
+///
+/// # Errors
+///
+/// Returns [`CatalogError`] when zstd decompression fails, the decompressed
+/// length does not match the cache metadata, the serialized payload cannot be
+/// decoded, or catalog reconstruction fails.
 pub(crate) fn load_catalog_from_cache_file_data(
     cache_file: &CacheFileData,
     source: CatalogLoadSource,
diff --git a/src/llm-coding-tools-models-dev/src/catalog/sync.rs b/src/llm-coding-tools-models-dev/src/catalog/sync.rs
index 199f1cce..bacc8a57 100644
--- a/src/llm-coding-tools-models-dev/src/catalog/sync.rs
+++ b/src/llm-coding-tools-models-dev/src/catalog/sync.rs
@@ -1,3 +1,11 @@
+//! Catalog synchronization against the remote models.dev API.
+//!
+//! This module owns the online-first load path used by
+//! [`ModelsDevCatalog`](crate::catalog::ModelsDevCatalog). It reads any cached
+//! container in one shot, sends a conditional request with the cached ETag when
+//! available, refreshes the cache on `200 OK`, reuses it on `304 Not Modified`,
+//! and falls back to cached data when the request fails.
+
 use crate::api::catalog_sources::cache_payload_from_api_json_bytes;
 use crate::cache::format::{read_cache_file, write_cache_file, CacheFileData, CacheWriteInput};
 use crate::cache::payload::{catalog_from_cache_payload, encode_cache_payload};
@@ -10,16 +18,19 @@ use std::borrow::Cow;
 use std::io::ErrorKind;
 use std::path::Path;
 
+/// Default production endpoint for the models.dev catalog snapshot.
 const MODELS_DEV_API_URL: &str = "https://models.dev/api.json";
 
 #[cfg(test)]
 static TEST_MODELS_DEV_API_URL: std::sync::Mutex<Option<String>> = std::sync::Mutex::new(None);
 
 #[cfg(test)]
+/// Overrides the remote catalog URL for sync tests.
 pub(crate) fn set_test_models_dev_api_url(url: Option<String>) {
     *TEST_MODELS_DEV_API_URL.lock().unwrap() = url;
 }
 
+/// Returns the active catalog endpoint, including the test override when set.
 fn models_dev_api_url() -> Cow<'static, str> {
     #[cfg(test)]
     if let Some(url) = TEST_MODELS_DEV_API_URL.lock().unwrap().clone() {
@@ -29,6 +40,10 @@ fn models_dev_api_url() -> Cow<'static, str> {
     Cow::Borrowed(MODELS_DEV_API_URL)
 }
 
+/// Resolves the result to return after a request failure.
+///
+/// Cached data takes precedence over surfacing the request error so callers can
+/// continue with the last known-good catalog when possible.
 fn load_after_request_failure(
     request_error: reqwest::Error,
     cache_file: Option<&CacheFileData>,
@@ -46,12 +61,40 @@ fn load_after_request_failure(
 }
 
 #[maybe_async::maybe_async]
+/// Loads the catalog at `path` using the default models.dev endpoint.
+///
+/// # Errors
+///
+/// Returns the same errors as [`load_catalog_from_url`] while targeting the
+/// default production URL.
 pub(crate) async fn load_catalog_at_path(path: &Path) -> CatalogResult<CatalogLoadResult> {
     let url = models_dev_api_url();
     load_catalog_from_url(path, url.as_ref()).await
 }
 
 #[maybe_async::maybe_async]
+/// Synchronizes the cache at `path` against `url` and returns a catalog.
+///
+/// The sync flow is:
+/// - read any existing cache file in one whole-file read
+/// - send `If-None-Match` when the cache includes an ETag
+/// - on `200 OK`, decode the response and rewrite the cache
+/// - on `304 Not Modified`, load the existing cache
+/// - on request failure, fall back to cache when available
+///
+/// # Performance
+///
+/// Cache probing performs one up-front whole-file read through
+/// [`read_cache_file`]. models.dev changes infrequently, so cache hits are
+/// expected to be common, and [`crate::cache::payload`] documents typical
+/// compressed payload sizes of about 23-32 kB. That makes a single sequential
+/// read generally the faster hot path on modern NVMe-backed systems.
+///
+/// # Errors
+///
+/// Returns [`CatalogError`] when cache I/O fails without a usable fallback,
+/// response data cannot be decoded, the cache cannot be written, or the server
+/// responds with an unexpected status.
 pub(crate) async fn load_catalog_from_url(
     path: &Path,
     url: &str,

From 82e7695474da7d9e21b146854f3694e2a82a1052 Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Sun, 8 Mar 2026 23:26:10 +0000
Subject: [PATCH 11/22] Changed: Finalize Slice 10 docs and agent model
 integration

Refresh the workspace docs and restore agent-side model parsing so the
public guidance matches the current API surface and workflow behavior.

Changes:
- updated the root README with all workspace crates and current setup guidance
- expanded the agents and models-dev READMEs around catalog integration
- restored `AgentConfig::model_parts()` with coverage for valid and invalid input
- corrected the workflow note for models-dev runtime support
- kept rustfmt-only cleanup in models-dev cache loading tests

Benefits:
- makes the workspace easier to adopt without stale examples or missing docs
- keeps `provider/model` parsing close to the agent config that owns it
---
 .github/workflows/rust.yml                    |  2 +-
 README.MD                                     | 70 ++++++++++---------
 src/llm-coding-tools-agents/README.md         | 36 +++++++---
 .../src/types/config.rs                       | 60 ++++++++++++++++
 src/llm-coding-tools-models-dev/README.md     | 67 +++++++++++++++---
 .../src/catalog/load_cache.rs                 |  3 +-
 .../src/catalog/sync.rs                       | 11 ++-
 src/llm-coding-tools-models-dev/src/lib.rs    |  5 +-
 8 files changed, 192 insertions(+), 62 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index ee44bdc9..37f6b714 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -58,7 +58,7 @@ jobs:
               echo "Running semver checks for ${CRATE}..."
               # Note: llm-coding-tools-core has mutually exclusive async/blocking features,
               # so we must use --only-explicit-features to avoid enabling all features.
-              # The serdesai and models-dev crates are async-only and don't have the tokio feature.
+              # llm-coding-tools-serdesai is async-only. models-dev supports both tokio and blocking.
               if [ "${CRATE}" = "llm-coding-tools-core" ]; then
                 cargo +stable semver-checks -p "${CRATE}" --target ${{ matrix.target }} --only-explicit-features --features tokio
               else
diff --git a/README.MD b/README.MD
index 2d728aac..303acd67 100644
--- a/README.MD
+++ b/README.MD
@@ -1,18 +1,30 @@
 # llm-coding-tools
 
 [![Crates.io - llm-coding-tools-core](https://img.shields.io/crates/v/llm-coding-tools-core.svg)](https://crates.io/crates/llm-coding-tools-core)
+[![Crates.io - llm-coding-tools-agents](https://img.shields.io/crates/v/llm-coding-tools-agents.svg)](https://crates.io/crates/llm-coding-tools-agents)
 [![Crates.io - llm-coding-tools-serdesai](https://img.shields.io/crates/v/llm-coding-tools-serdesai.svg)](https://crates.io/crates/llm-coding-tools-serdesai)
+[![Crates.io - llm-coding-tools-models-dev](https://img.shields.io/crates/v/llm-coding-tools-models-dev.svg)](https://crates.io/crates/llm-coding-tools-models-dev)
 [![Docs.rs](https://docs.rs/llm-coding-tools-serdesai/badge.svg)](https://docs.rs/llm-coding-tools-serdesai)
 [![CI](https://github.com/Sewer56/llm-coding-tools/actions/workflows/rust.yml/badge.svg)](https://github.com/Sewer56/llm-coding-tools/actions)
 
-Lightweight, high-performance coding tool implementations for LLM-powered development agents. Plug and play into your favourite frameworks.
+Lightweight, heavily optimized coding tool implementations for LLM-powered
+development agents.
+
+Suitable for server use (<3 MiB), or as building blocks for your own TUI coding agent.
 
 ## About This Workspace
 
-This workspace contains multiple Rust crates for integrating coding tools with LLM agents:
+This workspace contains multiple Rust crates for integrating coding tools with
+LLM agents:
 
-- **[llm-coding-tools-core](./src/llm-coding-tools-core/)**: Framework-agnostic core operations and utilities
-- **[llm-coding-tools-serdesai](./src/llm-coding-tools-serdesai/)**: serdesAI framework-specific Tool implementations
+- **[llm-coding-tools-core](./src/llm-coding-tools-core/)**:
+  Framework-agnostic core operations and utilities
+- **[llm-coding-tools-agents](./src/llm-coding-tools-agents/)**:
+  OpenCode agent markdown loader and typed catalogue
+- **[llm-coding-tools-serdesai](./src/llm-coding-tools-serdesai/)**:
+  serdesAI framework-specific Tool implementations
+- **[llm-coding-tools-models-dev](./src/llm-coding-tools-models-dev/)**:
+  models.dev catalog sync with cached fallback and ETag refresh
 
 ## Features
 
@@ -22,6 +34,9 @@ This workspace contains multiple Rust crates for integrating coding tools with L
 - **Web**: URL fetching with HTML-to-markdown conversion
 - **Path Security**: Choose between unrestricted or sandboxed file access
 - **Context Strings**: Embedded LLM guidance for tool usage
+- **Agent Loading**: Parse OpenCode-compatible agent markdown into typed configs
+- **Model Catalog Sync**: Download and cache the models.dev catalog for
+  provider/model lookups
 
 ## Feature Flags (llm-coding-tools-core)
 
@@ -30,41 +45,24 @@ This workspace contains multiple Rust crates for integrating coding tools with L
 
 ## Quick Start
 
-Add to your `Cargo.toml`:
+Pick the crate that matches your use case:
 
 ```toml
 [dependencies]
-llm-coding-tools-serdesai = "0.1"
+llm-coding-tools-core = "0.2"         # Framework-agnostic tool implementations
+llm-coding-tools-agents = "0.1"       # OpenCode agent markdown loader
+llm-coding-tools-models-dev = "0.1"   # models.dev catalog sync and cache
+llm-coding-tools-serdesai = "0.2"     # serdesAI integration
 ```
 
-```rust,no_run
-use llm_coding_tools_serdesai::{AgentBuilder, BashTool, TodoTools};
-use llm_coding_tools_serdesai::absolute::{ReadTool, WriteTool, EditTool, GlobTool, GrepTool};
-
-let mut builder = AgentBuilder::new();
-let todos = TodoTools::new();
-
-builder
-    .track(ReadTool::<true>::new())
-    .track(WriteTool::new())
-    .track(EditTool::<true>::new())
-    .track(GlobTool::new())
-    .track(GrepTool::<true>::new())
-    .track(BashTool::new())
-    .track(&todos.read)
-    .track(&todos.write);
-
-let mut agent = builder.build();
-
-// Use the agent
-// let response = agent.invoke("List all files").await?;
-```
+For a runnable agent setup, start with `llm-coding-tools-serdesai` and the
+examples below.
 
 ## Examples
 
 ```bash
 # serdesAI framework - Basic agent setup
-cargo run --example serdesai-agents -p llm-coding-tools-serdesai
+cargo run --example serdesai-basic -p llm-coding-tools-serdesai
 
 # serdesAI framework - Sandboxed file access
 cargo run --example serdesai-sandboxed -p llm-coding-tools-serdesai
@@ -73,18 +71,26 @@ cargo run --example serdesai-sandboxed -p llm-coding-tools-serdesai
 ## Documentation
 
 - [llm-coding-tools-core README](./src/llm-coding-tools-core/README.md)
+- [llm-coding-tools-agents README](./src/llm-coding-tools-agents/README.md)
 - [llm-coding-tools-serdesai README](./src/llm-coding-tools-serdesai/README.md)
+- [llm-coding-tools-models-dev README](./src/llm-coding-tools-models-dev/README.md)
 - [Developer Guidelines](./src/AGENTS.md)
 
 ## Contributing
 
-Contributions are welcome! Please ensure all tests pass and the code follows our guidelines.
+Contributions are welcome! Please ensure all tests pass and the code follows
+our guidelines.
 
 ## Deprecation Notice
 
-**Rig framework support (`llm-coding-tools-rig`) has been removed** (commit 17158db) due to library bugs that prevented examples from running reliably.
+**Rig framework support (`llm-coding-tools-rig`) has been removed**
+(commit 17158db) due to library bugs that prevented examples from running
+reliably.
 
-You're welcome to submit a PR re-adding rig support if you're willing to maintain it. Since I don't use rig personally, I'm not able to actively maintain that integration. Alternatively, you can create your own crate building on `llm-coding-tools-core` directly.
+You're welcome to submit a PR re-adding rig support if you're willing to
+maintain it. Since I don't use rig personally, I'm not able to actively
+maintain that integration. Alternatively, you can create your own crate
+building on `llm-coding-tools-core` directly.
 
 ## License
 
diff --git a/src/llm-coding-tools-agents/README.md b/src/llm-coding-tools-agents/README.md
index b17ce11d..e6d6bb85 100644
--- a/src/llm-coding-tools-agents/README.md
+++ b/src/llm-coding-tools-agents/README.md
@@ -4,13 +4,16 @@ Load OpenCode agent markdown files into a typed Rust catalogue.
 
 This crate is a loader for the [OpenCode agent schema](https://opencode.ai/docs/agents/).
 
-It is a drop-in replacement for OpenCode agent files: agents you create for OpenCode should load here unchanged.
+It is a drop-in replacement for OpenCode agent files: agents you create for
+OpenCode should load here unchanged.
 
 ## What it provides
 
-- [`AgentLoader`] for loading agent configs from directories, files, or in-memory markdown.
+- [`AgentLoader`] for loading agent configs from directories, files, or
+  in-memory markdown.
 - [`AgentCatalog`] for storing and looking up loaded [`AgentConfig`] entries.
-- [`RulesetExt`] for converting frontmatter `permission` data into runtime [`Ruleset`]s.
+- [`RulesetExt`] for converting frontmatter `permission` data into runtime
+  [`Ruleset`]s.
 
 ## Quick start
 
@@ -43,19 +46,34 @@ permission:
 Prompt body here...
 ```
 
-For field behaviour, see OpenCode docs for [`mode`](https://opencode.ai/docs/agents#mode), [`model`](https://opencode.ai/docs/agents#model), and [`permissions`](https://opencode.ai/docs/agents#permissions).
+For field behaviour, see OpenCode docs for
+[`mode`](https://opencode.ai/docs/agents#mode),
+[`model`](https://opencode.ai/docs/agents#model), and
+[`permissions`](https://opencode.ai/docs/agents#permissions).
 
 ## Compatibility notes
 
-This library does not provide interactive UX extensions (for example, TUI approval flows).
-To avoid false expectations, settings that require interaction are rejected, while settings with no runtime effect are accepted and ignored:
+This library does not provide interactive UX extensions (for example, TUI
+approval flows).
 
-- [`permission.task`](https://opencode.ai/docs/agents#task-permissions): `ask` is rejected with a schema validation error (`allow`/`deny` only), because `ask` is an interactive approval mode in OpenCode ([docs](https://opencode.ai/docs/permissions#what-ask-does)).
-- [`hidden`](https://opencode.ai/docs/agents#hidden) is accepted for compatibility, but ignored at runtime.
+To avoid false expectations, settings that require interaction are rejected,
+while settings with no runtime effect are accepted and ignored:
+
+- [`permission.task`](https://opencode.ai/docs/agents#task-permissions):
+  `ask` is rejected with a schema validation error (`allow`/`deny` only),
+  because `ask` is an interactive approval mode in OpenCode
+  ([docs](https://opencode.ai/docs/permissions#what-ask-does)).
+- [`hidden`](https://opencode.ai/docs/agents#hidden) is accepted for
+  compatibility, but ignored at runtime.
 
 ## Integration
 
 This crate only loads and validates agent configs.
-Pass [`AgentCatalog`] to your runtime adapter (for example, `llm-coding-tools-serdesai`) to build registries and Task tooling.
+Pass [`AgentCatalog`] to your runtime adapter (for example,
+`llm-coding-tools-serdesai`) to build registries and Task tooling.
+
+If you want to validate `model` strings against a catalog, call
+[`AgentConfig::model_parts`] and pass the returned `(provider, model)` into
+your lookup layer.
 
 [`Ruleset`]: llm_coding_tools_core::permissions::Ruleset
diff --git a/src/llm-coding-tools-agents/src/types/config.rs b/src/llm-coding-tools-agents/src/types/config.rs
index 78010529..1d4a74ab 100644
--- a/src/llm-coding-tools-agents/src/types/config.rs
+++ b/src/llm-coding-tools-agents/src/types/config.rs
@@ -108,6 +108,8 @@ pub struct AgentConfig {
     #[serde(default)]
     pub description: String,
     /// Optional model override (format: "provider/model-id").
+    ///
+    /// Use [`AgentConfig::model_parts`] before catalog lookup.
     #[serde(default)]
     pub model: Option<String>,
     /// Legacy visibility flag accepted for compatibility only.
@@ -136,6 +138,18 @@ pub struct AgentConfig {
 }
 
 impl AgentConfig {
+    /// Returns the configured model split into `(provider, model)` parts.
+    #[inline]
+    pub fn model_parts(&self) -> Option<(&str, &str)> {
+        let value = self.model.as_deref()?;
+        let (provider, model) = value.split_once('/')?;
+        if provider.is_empty() || model.is_empty() {
+            return None;
+        }
+
+        Some((provider, model))
+    }
+
     /// Creates an [`AgentConfig`] from raw frontmatter and parsed prompt body.
     pub(crate) fn from_raw(default_name: String, raw: RawFrontmatter, prompt: String) -> Self {
         Self {
@@ -152,3 +166,49 @@ impl AgentConfig {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::{AgentConfig, AgentMode};
+    use ahash::AHashMap;
+    use indexmap::IndexMap;
+
+    fn config_with_model(model: Option<&str>) -> AgentConfig {
+        AgentConfig {
+            name: "example".to_string(),
+            mode: AgentMode::All,
+            description: String::new(),
+            model: model.map(str::to_string),
+            hidden: false,
+            temperature: None,
+            top_p: None,
+            permission: IndexMap::new(),
+            options: AHashMap::new(),
+            prompt: String::new(),
+        }
+    }
+
+    #[test]
+    fn model_parts_returns_provider_and_model() {
+        let config = config_with_model(Some("synthetic/hf:moonshotai/Kimi-K2.5"));
+
+        assert_eq!(
+            config.model_parts(),
+            Some(("synthetic", "hf:moonshotai/Kimi-K2.5"))
+        );
+    }
+
+    #[test]
+    fn model_parts_rejects_missing_separator() {
+        let config = config_with_model(Some("synthetic-only"));
+
+        assert_eq!(config.model_parts(), None);
+    }
+
+    #[test]
+    fn model_parts_handles_absent_model() {
+        let config = config_with_model(None);
+
+        assert_eq!(config.model_parts(), None);
+    }
+}
diff --git a/src/llm-coding-tools-models-dev/README.md b/src/llm-coding-tools-models-dev/README.md
index 9a31b444..cf2e0d7a 100644
--- a/src/llm-coding-tools-models-dev/README.md
+++ b/src/llm-coding-tools-models-dev/README.md
@@ -8,8 +8,8 @@ for a cached fallback and caching via ETag(s).
 If you run coding agents against many providers, you want to have fresh data.
 [models.dev][models.dev] is one such source of data.
 
-This crate has sufficient code to download from models.dev, distill down only
-the relevant data we need; and create a llm_coding_tools_core `ModelCatalog`.
+This crate downloads from models.dev, keeps only the fields we need, and
+builds a `llm_coding_tools_core::models::ModelCatalog`.
 
 ## Usage
 
@@ -18,8 +18,10 @@ the relevant data we need; and create a llm_coding_tools_core `ModelCatalog`.
 1. Read cache header (if present) and get the old ETag.
 2. Send request to models.dev with `If-None-Match` when ETag exists.
 3. If server returns `304 Not Modified`, load catalog from cache.
-4. If server returns `200 OK`, parse JSON, map it into catalog sources, write fresh cache, then build catalog.
-5. If network fails, try cached data as fallback; if no valid cache exists, return an error.
+4. If server returns `200 OK`, parse JSON, map it into catalog sources,
+   write fresh cache, then build catalog.
+5. If network fails, try cached data as fallback; if no valid cache exists,
+   return an error.
 
 ### Non-blocking (`tokio`)
 
@@ -31,9 +33,15 @@ async fn load_catalog() -> Result<(), Box<dyn std::error::Error>> {
     let result = ModelsDevCatalog::load().await?;
 
     match result.source {
-        CatalogLoadSource::Downloaded => println!("Downloaded fresh catalog data."),
-        CatalogLoadSource::NotModifiedCache => println!("Cache is already up to date."),
-        CatalogLoadSource::FallbackCache => println!("Network unavailable, using cached catalog data."),
+        CatalogLoadSource::Downloaded => {
+            println!("Downloaded fresh catalog data.")
+        }
+        CatalogLoadSource::NotModifiedCache => {
+            println!("Cache is already up to date.")
+        }
+        CatalogLoadSource::FallbackCache => {
+            println!("Network unavailable, using cached catalog data.")
+        }
     }
 
     if let Some(entry) = result.catalog.lookup("openai", "gpt-4") {
@@ -55,9 +63,15 @@ fn load_catalog() -> Result<(), Box<dyn std::error::Error>> {
     let result = ModelsDevCatalog::load()?;
 
     match result.source {
-        CatalogLoadSource::Downloaded => println!("Downloaded fresh catalog data."),
-        CatalogLoadSource::NotModifiedCache => println!("Cache is already up to date."),
-        CatalogLoadSource::FallbackCache => println!("Network unavailable, using cached catalog data."),
+        CatalogLoadSource::Downloaded => {
+            println!("Downloaded fresh catalog data.")
+        }
+        CatalogLoadSource::NotModifiedCache => {
+            println!("Cache is already up to date.")
+        }
+        CatalogLoadSource::FallbackCache => {
+            println!("Network unavailable, using cached catalog data.")
+        }
     }
 
     if let Some(entry) = result.catalog.lookup("openai", "gpt-4") {
@@ -69,6 +83,39 @@ fn load_catalog() -> Result<(), Box<dyn std::error::Error>> {
 }
 ```
 
+### Load from a custom cache path
+
+```rust
+use llm_coding_tools_models_dev::ModelsDevCatalog;
+use std::path::PathBuf;
+
+#[cfg(feature = "tokio")]
+async fn load_catalog() -> Result<(), Box<dyn std::error::Error>> {
+    let cache_path = PathBuf::from("/tmp/models-dev.cache");
+    let _result = ModelsDevCatalog::load_at(&cache_path).await?;
+    Ok(())
+}
+
+#[cfg(feature = "blocking")]
+fn load_catalog() -> Result<(), Box<dyn std::error::Error>> {
+    let cache_path = PathBuf::from("/tmp/models-dev.cache");
+    let _result = ModelsDevCatalog::load_at(&cache_path)?;
+    Ok(())
+}
+```
+
+### Resolve the shared cache path
+
+```rust
+use llm_coding_tools_models_dev::shared_cache_path;
+
+fn print_cache_path() -> Result<(), Box<dyn std::error::Error>> {
+    let path = shared_cache_path()?;
+    println!("{}", path.display());
+    Ok(())
+}
+```
+
 ## Cache location
 
 By default, cache is stored in the platform cache directory:
diff --git a/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs
index 338f6ee5..8e807411 100644
--- a/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs
+++ b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs
@@ -71,8 +71,7 @@ mod tests {
 
         let payload = sample_payload();
         let encoded = encode_cache_payload(&payload);
-        let compressed =
-            zstd::bulk::compress(&encoded, 1).expect("compress");
+        let compressed = zstd::bulk::compress(&encoded, 1).expect("compress");
 
         write_cache_file(
             &path,
diff --git a/src/llm-coding-tools-models-dev/src/catalog/sync.rs b/src/llm-coding-tools-models-dev/src/catalog/sync.rs
index bacc8a57..26d9005e 100644
--- a/src/llm-coding-tools-models-dev/src/catalog/sync.rs
+++ b/src/llm-coding-tools-models-dev/src/catalog/sync.rs
@@ -133,9 +133,8 @@ pub(crate) async fn load_catalog_from_url(
             let payload = cache_payload_from_api_json_bytes(body.as_ref())?;
             let payload_encoded = encode_cache_payload(&payload);
             let catalog = catalog_from_cache_payload(payload)?;
-            let payload_compressed =
-                zstd::bulk::compress(payload_encoded.as_slice(), 17)
-                    .map_err(|error| CatalogError::Zstd(error.to_string()))?;
+            let payload_compressed = zstd::bulk::compress(payload_encoded.as_slice(), 17)
+                .map_err(|error| CatalogError::Zstd(error.to_string()))?;
 
             write_cache_file(
                 path,
@@ -239,8 +238,7 @@ mod tests {
             }],
         };
         let encoded = encode_cache_payload(&payload);
-        let compressed =
-            zstd::bulk::compress(&encoded, 1).expect("compress");
+        let compressed = zstd::bulk::compress(&encoded, 1).expect("compress");
 
         // Write the seeded cache file with ETag
         crate::cache::format::write_cache_file(
@@ -302,8 +300,7 @@ mod tests {
             }],
         };
         let encoded = encode_cache_payload(&payload);
-        let compressed =
-            zstd::bulk::compress(&encoded, 1).expect("compress");
+        let compressed = zstd::bulk::compress(&encoded, 1).expect("compress");
         crate::cache::format::write_cache_file(
             &cache_path,
             &CacheWriteInput {
diff --git a/src/llm-coding-tools-models-dev/src/lib.rs b/src/llm-coding-tools-models-dev/src/lib.rs
index 1b4eedae..29e3bb49 100644
--- a/src/llm-coding-tools-models-dev/src/lib.rs
+++ b/src/llm-coding-tools-models-dev/src/lib.rs
@@ -5,7 +5,10 @@
 compile_error!("Features `async` and `blocking` are mutually exclusive.");
 
 #[cfg(not(any(feature = "async", feature = "blocking")))]
-compile_error!("Either an async runtime (e.g., `tokio`) or `blocking` feature must be enabled.");
+compile_error!(concat!(
+    "Either an async runtime (e.g., `tokio`) or `blocking` feature ",
+    "must be enabled."
+));
 
 #[allow(dead_code)] // Wired into catalog build/load slices
 mod api;

From 5aa4f5e097570acf8632b01a7e67b22be05eda91 Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Sun, 8 Mar 2026 23:38:15 +0000
Subject: [PATCH 12/22] Changed: Clean up dead code directives and organize
 test-only code

Remove unnecessary dead_code annotations and move test-only code into test modules.

Changes:
- Removed `#[allow(dead_code)]` from actually-used hash64/hash63 methods
- Removed `#[allow(dead_code)]` from used api and format modules
- Moved test-only `catalog_from_api_json_bytes` into tests module
- Moved test-only imports into tests module
- Added `#[allow(dead_code)]` with "public API" comment for `payload_len_compressed`

Benefits:
- Cleaner codebase without misleading dead_code suppressions
- Better code organization with test code properly isolated
- Accurate documentation of intentional public API surface
---
 .../src/internal/hash63.rs                    |  2 --
 .../src/internal/hash64.rs                    |  4 ----
 .../src/api/catalog_sources.rs                | 23 ++++++++-----------
 .../src/cache/format.rs                       |  1 +
 .../src/cache/mod.rs                          |  1 -
 src/llm-coding-tools-models-dev/src/lib.rs    |  1 -
 6 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/src/llm-coding-tools-core/src/internal/hash63.rs b/src/llm-coding-tools-core/src/internal/hash63.rs
index 56df018f..a81934bc 100644
--- a/src/llm-coding-tools-core/src/internal/hash63.rs
+++ b/src/llm-coding-tools-core/src/internal/hash63.rs
@@ -18,14 +18,12 @@ impl Hash63 {
     ///
     /// The caller is responsible for ensuring bit 63 is 0.
     #[inline]
-    #[allow(dead_code)] // internal public API
     pub(crate) const fn from_u64(value: u64) -> Self {
         Self(value)
     }
 
     /// Returns the underlying u64 value.
     #[inline]
-    #[allow(dead_code)] // internal public API
     pub(crate) const fn as_u64(&self) -> u64 {
         self.0
     }
diff --git a/src/llm-coding-tools-core/src/internal/hash64.rs b/src/llm-coding-tools-core/src/internal/hash64.rs
index e880c044..6c96ea78 100644
--- a/src/llm-coding-tools-core/src/internal/hash64.rs
+++ b/src/llm-coding-tools-core/src/internal/hash64.rs
@@ -11,14 +11,12 @@ pub(crate) struct Hash64(u64);
 impl Hash64 {
     /// Creates a new Hash64 from a raw u64 value.
     #[inline]
-    #[allow(dead_code)] // internal public API
     pub(crate) fn from_u64(value: u64) -> Self {
         Self(value)
     }
 
     /// Returns the underlying u64 value.
     #[inline]
-    #[allow(dead_code)] // internal public API
     pub(crate) fn as_u64(&self) -> u64 {
         self.0
     }
@@ -26,14 +24,12 @@ impl Hash64 {
 
 /// Hashes a string to Hash64 using ahash64.
 #[inline(always)]
-#[allow(dead_code)] // internal public API
 pub(crate) fn hash_u64(s: &str) -> Hash64 {
     hash_u64_bytes(s.as_bytes())
 }
 
 /// Hashes raw bytes to Hash64 using ahash64.
 #[inline(always)]
-#[allow(dead_code)] // internal public API
 pub(crate) fn hash_u64_bytes(bytes: &[u8]) -> Hash64 {
     Hash64(ahash::RandomState::with_seed(0xDEAD_CAFE).hash_one(bytes))
 }
diff --git a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs
index 2a3c1ee7..e8b53ccf 100644
--- a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs
+++ b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs
@@ -14,12 +14,10 @@
 //!   core during catalog build.
 
 use super::schema::{parse_api_json, ApiModelEntry, ApiModelLimit, ApiModelModalities};
-use crate::cache::payload::{
-    catalog_from_cache_payload, CachedModelRow, CachedProviderRow, CatalogCachePayload,
-};
+use crate::cache::payload::{CachedModelRow, CachedProviderRow, CatalogCachePayload};
 use crate::error::{CatalogError, CatalogResult};
 use llm_coding_tools_core::models::{
-    Modality, ModelCatalog, ModelCatalogBuildError, ModelInfo, ProviderIdx, ProviderType,
+    Modality, ModelCatalogBuildError, ModelInfo, ProviderIdx, ProviderType,
 };
 
 pub(crate) fn cache_payload_from_api_json_bytes(
@@ -73,12 +71,6 @@ pub(crate) fn cache_payload_from_api_json_bytes(
     Ok(CatalogCachePayload { providers, models })
 }
 
-/// Parses models.dev `api.json` bytes and builds a [`ModelCatalog`].
-pub(crate) fn catalog_from_api_json_bytes(json_bytes: &[u8]) -> CatalogResult<ModelCatalog> {
-    let payload = cache_payload_from_api_json_bytes(json_bytes)?;
-    catalog_from_cache_payload(payload)
-}
-
 #[inline]
 fn model_info_from_entry(model_entry: &ApiModelEntry) -> ModelInfo {
     let (max_input, max_output) = match model_entry.limit.as_ref() {
@@ -168,12 +160,15 @@ fn provider_type_from_models_dev_npm(npm_package: Option<&str>) -> ProviderType
 
 #[cfg(test)]
 mod tests {
-    use super::{
-        cache_payload_from_api_json_bytes, catalog_from_api_json_bytes,
-        provider_type_from_models_dev_npm,
-    };
+    use super::{cache_payload_from_api_json_bytes, provider_type_from_models_dev_npm};
+    use crate::cache::payload::catalog_from_cache_payload;
     use llm_coding_tools_core::models::{Modality, ModelCatalog, ProviderIdx, ProviderType};
 
+    fn catalog_from_api_json_bytes(json_bytes: &[u8]) -> crate::error::CatalogResult<ModelCatalog> {
+        let payload = cache_payload_from_api_json_bytes(json_bytes)?;
+        catalog_from_cache_payload(payload)
+    }
+
     fn catalog(json: &[u8]) -> ModelCatalog {
         catalog_from_api_json_bytes(json).expect("API payload should map")
     }
diff --git a/src/llm-coding-tools-models-dev/src/cache/format.rs b/src/llm-coding-tools-models-dev/src/cache/format.rs
index e131810b..1e1db3f1 100644
--- a/src/llm-coding-tools-models-dev/src/cache/format.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/format.rs
@@ -116,6 +116,7 @@ impl CacheFileData {
     }
 
     /// Returns compressed payload length in bytes.
+    #[allow(dead_code)] // public API
     #[inline]
     pub(crate) fn payload_len_compressed(&self) -> u32 {
         self.payload_len_compressed
diff --git a/src/llm-coding-tools-models-dev/src/cache/mod.rs b/src/llm-coding-tools-models-dev/src/cache/mod.rs
index 695f7660..43af19b8 100644
--- a/src/llm-coding-tools-models-dev/src/cache/mod.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/mod.rs
@@ -12,7 +12,6 @@
 //! The public API currently exposes path resolution only; container helpers are
 //! crate-internal until the sync/load flow is wired.
 
-#[allow(dead_code)] // Wired into the load/sync path down the road
 pub(crate) mod format;
 mod path;
 pub(crate) mod payload;
diff --git a/src/llm-coding-tools-models-dev/src/lib.rs b/src/llm-coding-tools-models-dev/src/lib.rs
index 29e3bb49..60fef51e 100644
--- a/src/llm-coding-tools-models-dev/src/lib.rs
+++ b/src/llm-coding-tools-models-dev/src/lib.rs
@@ -10,7 +10,6 @@ compile_error!(concat!(
     "must be enabled."
 ));
 
-#[allow(dead_code)] // Wired into catalog build/load slices
 mod api;
 pub mod cache;
 pub mod catalog;

From 7e42c9c56aa41a242f5c70370a7db386c396eaa2 Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Mon, 9 Mar 2026 00:06:11 +0000
Subject: [PATCH 13/22] Fixed: Resolve intra-doc link errors in models-dev
 crate

CI was failing due to rustdoc treating warnings as errors with
RUSTDOCFLAGS="-D warnings". The local verify scripts did not catch
these because they weren't documenting private modules.

Changes:
- Fixed unresolved link to ModelCatalog in catalog_sources.rs using full path
- Fixed redundant explicit link target in payload.rs (ModelCatalog is in scope)
- Added --document-private-items to verify.sh and verify.ps1

Benefits:
- Local verify scripts now match CI doc validation behavior
- Catches intra-doc link errors before pushing
---
 src/.cargo/verify.ps1                                      | 2 +-
 src/.cargo/verify.sh                                       | 2 +-
 src/llm-coding-tools-models-dev/src/api/catalog_sources.rs | 2 +-
 src/llm-coding-tools-models-dev/src/cache/payload.rs       | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/.cargo/verify.ps1 b/src/.cargo/verify.ps1
index 912389f1..481ad21b 100644
--- a/src/.cargo/verify.ps1
+++ b/src/.cargo/verify.ps1
@@ -57,7 +57,7 @@ Write-Host "Docs..."
 $originalRustdocFlags = $env:RUSTDOCFLAGS
 $env:RUSTDOCFLAGS = "-D warnings"
 try {
-    Invoke-LoggedCommand "cargo" @("doc", "--workspace", "--no-deps", "--quiet")
+    Invoke-LoggedCommand "cargo" @("doc", "--workspace", "--document-private-items", "--no-deps", "--quiet")
 } finally {
     $env:RUSTDOCFLAGS = $originalRustdocFlags
 }
diff --git a/src/.cargo/verify.sh b/src/.cargo/verify.sh
index eefd0e0b..36ff4728 100755
--- a/src/.cargo/verify.sh
+++ b/src/.cargo/verify.sh
@@ -43,7 +43,7 @@ run_cmd cargo test -p llm-coding-tools-core --no-default-features --features blo
 run_cmd cargo test -p llm-coding-tools-models-dev --no-default-features --features blocking --quiet
 
 echo "Docs..."
-run_cmd env RUSTDOCFLAGS="-D warnings" cargo doc --workspace --no-deps --quiet
+run_cmd env RUSTDOCFLAGS="-D warnings" cargo doc --workspace --document-private-items --no-deps --quiet
 
 echo "Formatting..."
 run_cmd cargo fmt --all --check --quiet
diff --git a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs
index e8b53ccf..0f012ef3 100644
--- a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs
+++ b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs
@@ -1,7 +1,7 @@
 //! models.dev API -> `ModelCatalog` mapping.
 //!
 //! This module parses models.dev `api.json`, maps provider/model metadata into
-//! transient core builder inputs, and immediately constructs a [`ModelCatalog`].
+//! transient core builder inputs, and immediately constructs a [`ModelCatalog`](llm_coding_tools_core::models::ModelCatalog).
 //!
 //! Mapping policy:
 //! - missing limits default to `0`;
diff --git a/src/llm-coding-tools-models-dev/src/cache/payload.rs b/src/llm-coding-tools-models-dev/src/cache/payload.rs
index 37bc5fd7..3a1e65b0 100644
--- a/src/llm-coding-tools-models-dev/src/cache/payload.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/payload.rs
@@ -1,7 +1,7 @@
 //! Cache payload serialization for models.dev catalog data.
 //!
 //! The payload is stored as simple owned rows so it can be encoded compactly
-//! with bitcode and rebuilt into a [`ModelCatalog`](llm_coding_tools_core::models::ModelCatalog)
+//! with bitcode and rebuilt into a [`ModelCatalog`]
 //! without reparsing the original JSON.
 //!
 //! ## Compression Benchmark

From 3dccb9b10dd6a5610975bcade31925da8fd40faa Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Mon, 9 Mar 2026 00:16:59 +0000
Subject: [PATCH 14/22] Changed: Document models.dev cache size and load
 characteristics

Add concise README guidance for the cache footprint and hot-path timings so users can estimate runtime costs without reading the implementation.

Changes:
- Added cache size guidance based on a recent models.dev snapshot
- Documented compression, decompression, and ModelCatalog load timings
- Noted the single-core Ryzen 9950X3D measurement context

Benefits:
- Makes cache tradeoffs easier to understand at a glance
- Helps consumers estimate disk and load overhead more confidently
---
 src/llm-coding-tools-models-dev/README.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/llm-coding-tools-models-dev/README.md b/src/llm-coding-tools-models-dev/README.md
index cf2e0d7a..45698787 100644
--- a/src/llm-coding-tools-models-dev/README.md
+++ b/src/llm-coding-tools-models-dev/README.md
@@ -126,6 +126,17 @@ By default, cache is stored in the platform cache directory:
 
 Set `LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH` to override this path.
 
+## Cache size and performance
+
+Current ballpark from a recent `models.dev/api.json` snapshot:
+
+- Size: about `1.31 MiB` JSON -> `109 KiB` serialized payload -> `23.7 KiB` compressed cache
+- Compression: about `10.1 ms` with current `zstd` level `17`
+- Decompression: about `0.057 ms` (`57 us`) in `--release`
+- Cache load into `ModelCatalog`: about `0.31 ms` (`read + decompress + decode + build`)
+
+Measured on a single core of a Ryzen `9950X3D`; these are rough guidance numbers and will drift as the upstream catalog changes.
+
 ## Feature flags
 
 - `tokio` (default): async runtime support.

From 3d27fa27c2b62c4242d12992bc0e4c99320e872c Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Mon, 9 Mar 2026 09:19:26 +0000
Subject: [PATCH 15/22] Changed: Expand provider env-var capacity to support
 models.dev

Support providers with up to 7 env vars (e.g., amazon-bedrock with 4)
while maintaining 2-byte packed storage and memory efficiency.

Changes:
- Repacked PackedEnvRange from 14+2 bits to 13+3 bits (start+count)
- Max env vars per provider: 3 -> 7
- Max global env-var pool: 16384 -> 8192
- Provider lookup env vars: fixed array -> TinyVec (2 inline slots)
- Updated boundary tests for new limits and simplified to provider-only
- Benchmarks: max-provider test skips env vars to avoid pool limit

Benefits:
- Loads current models.dev payload (amazon-bedrock has 4 env vars)
- Keeps packed range at 2 bytes with minimal trade-offs
- Maintains memory-conscious lookup for common 1-2 env var providers
---
 src/Cargo.lock                                |  1 +
 src/llm-coding-tools-core/Cargo.toml          |  1 +
 .../benches/model_catalog_builder.rs          | 16 +++--
 .../src/models/catalog/internal/builder.rs    | 69 ++++++++++---------
 .../catalog/internal/packed_env_range.rs      | 20 +++---
 .../src/models/catalog/mod.rs                 | 24 +++----
 .../models/catalog/public/builder_types.rs    |  2 +-
 .../src/models/catalog/public/entry.rs        | 15 ++--
 .../src/models/catalog/public/mod.rs          |  1 +
 9 files changed, 80 insertions(+), 69 deletions(-)

diff --git a/src/Cargo.lock b/src/Cargo.lock
index 68757b47..2b708bc7 100644
--- a/src/Cargo.lock
+++ b/src/Cargo.lock
@@ -1491,6 +1491,7 @@ dependencies = [
  "serde_json",
  "tempfile",
  "thiserror 2.0.18",
+ "tinyvec",
  "tinyvec_string",
  "tokio",
  "wiremock",
diff --git a/src/llm-coding-tools-core/Cargo.toml b/src/llm-coding-tools-core/Cargo.toml
index b88ce699..3f7ec206 100644
--- a/src/llm-coding-tools-core/Cargo.toml
+++ b/src/llm-coding-tools-core/Cargo.toml
@@ -54,6 +54,7 @@ hashbrown = "0.16"
 
 # Inline string storage for patterns
 tinyvec_string = { version = "0.3", features = ["alloc"] }
+tinyvec = { version = "1.10", features = ["alloc"] }
 
 # Efficient immutable string table for provider URLs and env vars
 lite-strtab = "0.2"
diff --git a/src/llm-coding-tools-core/benches/model_catalog_builder.rs b/src/llm-coding-tools-core/benches/model_catalog_builder.rs
index d772ec34..cc83be5e 100644
--- a/src/llm-coding-tools-core/benches/model_catalog_builder.rs
+++ b/src/llm-coding-tools-core/benches/model_catalog_builder.rs
@@ -32,7 +32,7 @@ impl Dataset {
     }
 }
 
-fn make_dataset(provider_count: usize, model_count: usize) -> Dataset {
+fn make_dataset(provider_count: usize, model_count: usize, with_env_vars: bool) -> Dataset {
     debug_assert!(provider_count > 0);
 
     let mut providers = Vec::with_capacity(provider_count);
@@ -41,7 +41,11 @@ fn make_dataset(provider_count: usize, model_count: usize) -> Dataset {
             format!("provider-{i}"),
             ProviderInfo {
                 api_url: format!("https://provider-{i}.example/v1"),
-                env_vars: vec![format!("PROVIDER_{i}_API_KEY")],
+                env_vars: if with_env_vars {
+                    vec![format!("PROVIDER_{i}_API_KEY")]
+                } else {
+                    Vec::new()
+                },
                 api_type: if (i & 1) == 0 {
                     ProviderType::OpenAiCompletions
                 } else {
@@ -99,11 +103,11 @@ fn construct_batch(providers: &[ProviderSource], provider_models: &[ProviderMode
 fn benchmark_builder_construction(c: &mut Criterion) {
     let mut group = c.benchmark_group("model_catalog_builder_construct");
 
-    for (name, provider_count, model_count) in [
-        ("models_dev_snapshot", 96usize, 3031usize),
-        ("max", 16384usize, 65535usize),
+    for (name, provider_count, model_count, with_env_vars) in [
+        ("models_dev_snapshot", 96usize, 3031usize, true),
+        ("max", 16384usize, 65535usize, false),
     ] {
-        let dataset = make_dataset(provider_count, model_count);
+        let dataset = make_dataset(provider_count, model_count, with_env_vars);
         let provider_model_sources = dataset.provider_model_sources();
         group.throughput(Throughput::Elements(
             (provider_count + dataset.provider_models.len()) as u64,
diff --git a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs
index d00b333c..db1f59cf 100644
--- a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs
@@ -116,7 +116,7 @@ fn populate_tables_once(
             provider_info.api_type,
         )?;
 
-        // SAFETY: analyze_provider_sources bounds env_start and env_count (<= 3).
+        // SAFETY: analyze_provider_sources bounds env_start and env_count (<= 7).
         env_start += u16::from(env_count);
     }
 
@@ -331,7 +331,7 @@ fn analyze_provider_sources(
 
     for provider in providers {
         // SAFETY: total_env_keys is the start index for this provider.
-        // It must fit the 14-bit PackedEnvRange start field.
+        // It must fit the 13-bit PackedEnvRange start field.
         if total_env_keys > max_env_start {
             return Err(ModelCatalogBuildError::TooManyEnvVarKeys {
                 count: total_env_keys,
@@ -341,7 +341,7 @@ fn analyze_provider_sources(
 
         let provider_info = &provider.provider;
         let env_count = provider_info.env_vars.len();
-        // SAFETY: per-provider count must fit the 2-bit count field.
+        // SAFETY: per-provider count must fit the 3-bit count field.
         if env_count > max_env_count {
             return Err(
                 ModelCatalogBuildError::TooManyProviderEnvVarsForOneProvider {
@@ -611,7 +611,7 @@ mod tests {
             "alpha",
             provider(
                 "https://alpha.example",
-                &["A", "B", "C", "D"],
+                &["A", "B", "C", "D", "E", "F", "G", "H"],
                 ProviderType::Azure,
             ),
         )];
@@ -626,8 +626,8 @@ mod tests {
                 assert_eq!(
                     err,
                     ModelCatalogBuildError::TooManyProviderEnvVarsForOneProvider {
-                        count: 4,
-                        max: 3,
+                        count: 8,
+                        max: 7,
                     }
                 );
             }
@@ -685,17 +685,13 @@ mod tests {
 
     #[test]
     fn too_many_total_env_vars_returns_error() {
-        // 5462 providers * 3 env vars = 16386, so the 5463rd provider would have
-        // a start index of 16386, which exceeds MAX_ENV_START (16383).
-        let mut providers = Vec::with_capacity(5463);
-        for i in 0..5463usize {
+        // 8192 providers * 1 env var = 8192, so the 8193rd provider would have
+        // a start index of 8192, which exceeds MAX_ENV_START (8191).
+        let mut providers = Vec::with_capacity(8193);
+        for i in 0..8193usize {
             providers.push(provider_source(
                 &format!("provider_{}", i),
-                provider(
-                    "https://example.com",
-                    &["VAR1", "VAR2", "VAR3"],
-                    ProviderType::Azure,
-                ),
+                provider("https://example.com", &["VAR1"], ProviderType::Azure),
             ));
         }
         let mut provider_models = Vec::with_capacity(1);
@@ -710,8 +706,8 @@ mod tests {
                 assert_eq!(
                     err,
                     ModelCatalogBuildError::TooManyEnvVarKeys {
-                        count: 16_386,
-                        max: 16_383,
+                        count: 8_192,
+                        max: 8_191,
                     }
                 );
             }
@@ -720,32 +716,43 @@ mod tests {
     }
 
     #[test]
-    fn max_14bit_start_with_tail_entries_succeeds() {
-        // The last provider's start index can be 16383 and still be valid when it
-        // contributes 3 keys at indices 16383, 16384, and 16385.
-        let mut providers = Vec::with_capacity(5462);
-        for i in 0..5462usize {
+    fn max_13bit_start_with_tail_entries_succeeds() {
+        // The last provider's start index can be 8191 and still be valid when it
+        // contributes keys at indices 8191 through 8197.
+        let mut providers = Vec::with_capacity(1172);
+        for i in 0..1170usize {
             providers.push(provider_source(
                 &format!("provider_{}", i),
                 provider(
                     "https://example.com",
-                    &["VAR1", "VAR2", "VAR3"],
+                    &["VAR1", "VAR2", "VAR3", "VAR4", "VAR5", "VAR6", "VAR7"],
                     ProviderType::Azure,
                 ),
             ));
         }
-        let provider_models = vec![ProviderModelSource::new(
-            ProviderIdx::new(5461),
-            "m1",
-            info(4096, 512),
-        )];
+        providers.push(provider_source(
+            "provider_1170",
+            provider("https://example.com", &["VAR1"], ProviderType::Azure),
+        ));
+        providers.push(provider_source(
+            "provider_1171",
+            provider(
+                "https://example.com",
+                &["VAR1", "VAR2", "VAR3", "VAR4", "VAR5", "VAR6", "VAR7"],
+                ProviderType::Azure,
+            ),
+        ));
+        let provider_models = Vec::new();
 
         let catalog =
             build_from_source(&providers, &provider_models).expect("boundary case should pass");
-        let (provider, _) = catalog
-            .lookup("provider_5461", "m1")
+        let provider = catalog
+            .provider_from_index(ProviderIdx::new(1171))
             .expect("last provider should be addressable");
 
-        assert_eq!(provider.env_vars(), &["VAR1", "VAR2", "VAR3"]);
+        assert_eq!(
+            provider.env_vars(),
+            &["VAR1", "VAR2", "VAR3", "VAR4", "VAR5", "VAR6", "VAR7"]
+        );
     }
 }
diff --git a/src/llm-coding-tools-core/src/models/catalog/internal/packed_env_range.rs b/src/llm-coding-tools-core/src/models/catalog/internal/packed_env_range.rs
index bf7b982b..6122010f 100644
--- a/src/llm-coding-tools-core/src/models/catalog/internal/packed_env_range.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/internal/packed_env_range.rs
@@ -1,15 +1,15 @@
 //! Packed env-var range entry for provider-to-env-key mapping.
 //!
 //! Layout (`u16`):
-//! - `14` bits: start index into provider_env_keys StringTable
-//! - `2` bits: count of env keys for this provider (0..=3)
+//! - `13` bits: start index into provider_env_keys StringTable
+//! - `3` bits: count of env keys for this provider (0..=7)
 
 use bitfields::bitfield;
 
 /// Maximum env-var count per provider representable by PackedEnvRange.
-pub const MAX_ENV_RANGE_COUNT: u8 = 3;
-/// Maximum start index representable by PackedEnvRange (14 bits).
-pub const MAX_ENV_START: u16 = (1u16 << 14) - 1; // 16383
+pub const MAX_ENV_RANGE_COUNT: u8 = 7;
+/// Maximum start index representable by PackedEnvRange (13 bits).
+pub const MAX_ENV_START: u16 = (1u16 << 13) - 1; // 8191
 
 /// Packed env-var range entry.
 ///
@@ -17,9 +17,9 @@ pub const MAX_ENV_START: u16 = (1u16 << 14) - 1; // 16383
 #[bitfield(u16)]
 #[derive(Clone, Copy, PartialEq, Eq, Hash)]
 pub struct PackedEnvRange {
-    #[bits(14)]
+    #[bits(13)]
     start: u16,
-    #[bits(2)]
+    #[bits(3)]
     count: u8,
 }
 
@@ -27,7 +27,7 @@ impl PackedEnvRange {
     /// Creates one packed env-var range entry.
     ///
     /// SAFETY: The `start` parameter is not validated here. The caller must ensure
-    /// `start` fits within 14 bits (max 16383). This invariant is enforced in
+    /// `start` fits within 13 bits (max 8191). This invariant is enforced in
     /// `analyze_provider_sources` before `populate_tables_once` calls this function.
     #[inline]
     pub fn from_parts(start: u16, count: u8) -> Self {
@@ -56,7 +56,7 @@ mod tests {
 
     #[test]
     fn count_capped_at_max() {
-        let packed = PackedEnvRange::from_parts(0, 5);
-        assert_eq!(packed.count(), 3); // capped to MAX_ENV_RANGE_COUNT
+        let packed = PackedEnvRange::from_parts(0, 8);
+        assert_eq!(packed.count(), 7); // capped to MAX_ENV_RANGE_COUNT
     }
 }
diff --git a/src/llm-coding-tools-core/src/models/catalog/mod.rs b/src/llm-coding-tools-core/src/models/catalog/mod.rs
index d369d843..79ae717f 100644
--- a/src/llm-coding-tools-core/src/models/catalog/mod.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/mod.rs
@@ -151,8 +151,8 @@
 //! | ------------------------- | ----------: | ------------------------------------------------ |
 //! | Max providers             |      65,536 | Addressable by 16-bit provider index             |
 //! | Max model configs         |      65,536 | Addressable by 16-bit model configuration index  |
-//! | Max provider env vars     |      16,384 | Global env-var pool offset (14-bit)              |
-//! | Max env vars per provider |           3 | Count field in provider range entry (2-bit)      |
+//! | Max provider env vars     |       8,192 | Global env-var pool offset (13-bit)              |
+//! | Max env vars per provider |           7 | Count field in provider range entry (3-bit)      |
 //! | Max input tokens          | 536,870,911 | 29-bit packed field (≈536M)                      |
 //! | Max output tokens         | 134,217,727 | 27-bit packed field (≈134M)                      |
 //! | Hash bits retained        |          48 | Truncated from 64-bit hash output                |
@@ -231,6 +231,7 @@ use internal::{
     PackedEnvRange, PackedModelEntry, PackedProviderModelTableEntry, PackedProviderTableEntry,
 };
 use lite_strtab::{StringId, StringTable};
+use public::{ProviderEnvVars, INLINE_PROVIDER_ENV_VARS};
 
 pub use public::builder_types::{ModelCatalogBuildError, ProviderModelSource, ProviderSource};
 pub use public::*;
@@ -472,21 +473,16 @@ impl ModelCatalog {
         let start = range.start();
         let count = range.count() as usize;
 
-        let mut env_vars = ["", "", ""];
-        #[allow(clippy::needless_range_loop)]
+        let mut env_vars: ProviderEnvVars<'_> =
+            ProviderEnvVars::with_capacity(count.max(INLINE_PROVIDER_ENV_VARS));
         for x in 0..count {
-            env_vars[x] = self
-                .provider_env_keys
-                .get(StringId::new(ProviderIdx::new(start + x as u16)))?;
+            env_vars.push(
+                self.provider_env_keys
+                    .get(StringId::new(ProviderIdx::new(start + x as u16)))?,
+            );
         }
 
-        Some(Provider::new(
-            provider_idx,
-            api_url,
-            env_vars,
-            count as u8,
-            api_type,
-        ))
+        Some(Provider::new(provider_idx, api_url, env_vars, api_type))
     }
 
     /// Looks up a model by its configuration index.
diff --git a/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs b/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs
index 014b17b6..b1749660 100644
--- a/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs
@@ -175,7 +175,7 @@ pub enum ModelCatalogBuildError {
         /// Maximum supported unique model configuration count.
         max: usize,
     },
-    /// One provider has too many env vars for the packed count field (max 3).
+    /// One provider has too many env vars for the packed count field (max 7).
     #[error("provider env-var count {count} exceeds supported maximum {max}")]
     TooManyProviderEnvVarsForOneProvider {
         /// Number of env vars supplied for one provider.
diff --git a/src/llm-coding-tools-core/src/models/catalog/public/entry.rs b/src/llm-coding-tools-core/src/models/catalog/public/entry.rs
index fc4f4060..e747381d 100644
--- a/src/llm-coding-tools-core/src/models/catalog/public/entry.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/public/entry.rs
@@ -14,6 +14,11 @@
 use super::{Modality, ModelIdx, ProviderIdx};
 use crate::models::catalog::internal::Fixed4;
 use crate::models::ProviderType;
+use tinyvec::TinyVec;
+
+pub(crate) const INLINE_PROVIDER_ENV_VARS: usize = 2;
+
+pub(crate) type ProviderEnvVars<'a> = TinyVec<[&'a str; INLINE_PROVIDER_ENV_VARS]>;
 
 /// Provider lookup result.
 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -23,9 +28,7 @@ pub struct Provider<'a> {
     /// Provider base URL.
     pub api_url: &'a str,
     /// Candidate environment variables used to resolve API keys.
-    env_vars: [&'a str; 3],
-    /// Number of valid entries in `env_vars`.
-    env_vars_count: u8,
+    env_vars: ProviderEnvVars<'a>,
     /// Type of API used by the provider.
     pub api_type: ProviderType,
 }
@@ -36,15 +39,13 @@ impl<'a> Provider<'a> {
     pub(crate) fn new(
         provider_idx: ProviderIdx,
         api_url: &'a str,
-        env_vars: [&'a str; 3],
-        env_vars_count: u8,
+        env_vars: ProviderEnvVars<'a>,
         api_type: ProviderType,
     ) -> Self {
         Self {
             provider_idx,
             api_url,
             env_vars,
-            env_vars_count,
             api_type,
         }
     }
@@ -52,7 +53,7 @@ impl<'a> Provider<'a> {
     /// Returns the candidate environment variables used to resolve API keys.
     #[inline]
     pub fn env_vars(&self) -> &[&'a str] {
-        &self.env_vars[..self.env_vars_count as usize]
+        self.env_vars.as_slice()
     }
 }
 
diff --git a/src/llm-coding-tools-core/src/models/catalog/public/mod.rs b/src/llm-coding-tools-core/src/models/catalog/public/mod.rs
index c6b0c044..adf55b2e 100644
--- a/src/llm-coding-tools-core/src/models/catalog/public/mod.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/public/mod.rs
@@ -7,6 +7,7 @@
 
 pub use builder_types::{LookupTableKind, ModelInfo, ProviderInfo};
 pub use entry::{Model, Provider};
+pub(crate) use entry::{ProviderEnvVars, INLINE_PROVIDER_ENV_VARS};
 pub use modality::Modality;
 pub use model_idx::ModelIdx;
 pub use provider_idx::ProviderIdx;

From cb327f9eb5292ed555a2cf730a620434cf59e081 Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Mon, 9 Mar 2026 10:30:36 +0000
Subject: [PATCH 16/22] Fixed: Correct modality fallback documentation in
 catalog_sources.rs

The documentation incorrectly stated that unmapped modality labels default
to Modality::TEXT. The actual behavior returns Modality::empty(), which
correctly indicates a data error that should be fixed at the source.

Changes:
- Updated doc comment to reflect actual Modality::empty() fallback behavior
- Aligned documentation with existing test assertions

Benefits:
- Documentation now matches implementation
- Clarifies that empty modalities indicate data issues, not defaults
---
 src/llm-coding-tools-models-dev/src/api/catalog_sources.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs
index 0f012ef3..a0968293 100644
--- a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs
+++ b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs
@@ -8,8 +8,8 @@
 //! - model modalities are mapped from `modalities.input[]`/`modalities.output[]`
 //!   into directional [`Modality`] flags;
 //! - unknown npm package identifiers map to [`ProviderType::Unknown`];
-//! - unknown modality labels are ignored; if nothing maps, modalities default to
-//!   [`Modality::TEXT`];
+//! - unknown modality labels are ignored; if nothing maps, modalities remain
+//!   [`Modality::empty()`];
 //! - model rows remain provider-scoped; shared configurations are deduplicated by
 //!   core during catalog build.
 

From 709489f191f3e0df4620e4ea2bad27c9fe85f475 Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Mon, 9 Mar 2026 10:45:47 +0000
Subject: [PATCH 17/22] Fixed: Concurrent cache writers collision and Windows
 rename failure

Use tempfile::NamedTempFile with persist() for atomic cache writes instead of
fixed ".tmp" suffix that caused collisions and cross-platform issues.

Changes:
- Use NamedTempFile::new_in() for unique temp file names per write
- Use persist() for cross-platform atomic replacement (handles Windows)
- Move tempfile from dev-dependencies to production dependencies
- Add JoinHandle error variant for tokio spawn_blocking errors
- Remove unused write/rename fs helpers

Benefits:
- Eliminates race condition between concurrent cache writers
- Fixes atomic replace on Windows (delete+rename fallback)
- Guarantees each write uses its own temp file
---
 src/llm-coding-tools-models-dev/Cargo.toml    |  4 +-
 .../src/cache/format.rs                       | 49 +++++++++++++------
 src/llm-coding-tools-models-dev/src/error.rs  |  5 ++
 .../src/fs/blocking_impl.rs                   | 12 -----
 .../src/fs/tokio_impl.rs                      | 12 -----
 5 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/src/llm-coding-tools-models-dev/Cargo.toml b/src/llm-coding-tools-models-dev/Cargo.toml
index 26ef2550..85c65cd7 100644
--- a/src/llm-coding-tools-models-dev/Cargo.toml
+++ b/src/llm-coding-tools-models-dev/Cargo.toml
@@ -55,10 +55,12 @@ serde_json = "1.0.145"
 # Ergonomic error definitions
 thiserror = "2.0.18"
 
+# Temp file with atomic rename support
+tempfile = "3.26"
+
 # Async runtime (when tokio feature enabled)
 tokio = { version = "1.49", features = ["fs", "io-util"], optional = true }
 
 [dev-dependencies]
 tokio = { version = "1.49", features = ["rt", "macros"] }
-tempfile = "3.26"
 serial_test = "3"
diff --git a/src/llm-coding-tools-models-dev/src/cache/format.rs b/src/llm-coding-tools-models-dev/src/cache/format.rs
index 1e1db3f1..ca1ac88e 100644
--- a/src/llm-coding-tools-models-dev/src/cache/format.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/format.rs
@@ -41,7 +41,7 @@ use crate::{
 use endian_writer::{EndianReader, EndianWriter, HasSize, LittleEndianReader, LittleEndianWriter};
 use endian_writer_derive::EndianWritable;
 use std::mem::size_of;
-use std::path::{Path, PathBuf};
+use std::path::Path;
 use std::ptr::copy_nonoverlapping;
 
 /// Fixed v1 prelude, encoded little-endian.
@@ -87,12 +87,7 @@ pub(crate) struct CacheFileData {
     file_bytes: Box<[u8]>,
 }
 
-/// Returns a temporary path for atomic cache writes.
-fn temp_cache_path(path: &Path) -> PathBuf {
-    let mut temp = path.as_os_str().to_os_string();
-    temp.push(".tmp");
-    PathBuf::from(temp)
-}
+
 
 impl CacheFileData {
     /// Returns the optional ETag as a borrowed byte slice.
@@ -175,20 +170,24 @@ pub(crate) async fn read_cache_file(path: &Path) -> CatalogResult<CacheFileData>
     })
 }
 
-/// Writes a cache container to disk.
+/// Writes a cache container to disk atomically.
+///
+/// Uses `tempfile::NamedTempFile` to ensure unique temp files for concurrent
+/// writers and cross-platform atomic replacement via `persist()`.
 ///
 /// # Errors
 ///
 /// Returns [`CatalogError::CacheFormat`] if a block length exceeds v1 `u32`
-/// limits.
+/// limits, or [`CatalogError::Io`] on I/O failure.
 #[maybe_async::maybe_async]
 pub(crate) async fn write_cache_file(
     path: &Path,
     input: &CacheWriteInput<'_>,
 ) -> CatalogResult<()> {
-    if let Some(parent) = path.parent() {
-        fs::create_dir_all(parent).await?;
-    }
+    let parent = path.parent().ok_or_else(|| {
+        CatalogError::CacheFormat("cache path has no parent directory")
+    })?;
+    fs::create_dir_all(parent).await?;
 
     let etag_bytes = input.etag.unwrap_or(&[]);
     let prelude = CachePreludeV1 {
@@ -230,9 +229,29 @@ pub(crate) async fn write_cache_file(
     }
 
     let file_bytes = fs::assume_init_u8_slice(uninit);
-    let temp_path = temp_cache_path(path);
-    fs::write(&temp_path, &file_bytes).await?;
-    fs::rename(&temp_path, path).await?;
+
+    #[cfg(feature = "blocking")]
+    {
+        use std::io::Write as _;
+        let mut temp = tempfile::NamedTempFile::new_in(parent)?;
+        temp.write_all(&file_bytes)?;
+        temp.persist(path).map_err(|e| e.error)?;
+    }
+
+    #[cfg(feature = "tokio")]
+    {
+        let file_bytes: Box<[u8]> = file_bytes;
+        let path = path.to_path_buf();
+        let parent = parent.to_path_buf();
+        tokio::task::spawn_blocking(move || {
+            use std::io::Write as _;
+            let mut temp = tempfile::NamedTempFile::new_in(&parent)?;
+            temp.write_all(&file_bytes)?;
+            temp.persist(&path).map_err(|e| e.error)
+        })
+        .await??;
+    }
+
     Ok(())
 }
 
diff --git a/src/llm-coding-tools-models-dev/src/error.rs b/src/llm-coding-tools-models-dev/src/error.rs
index ba0b2b21..3b3fdfb4 100644
--- a/src/llm-coding-tools-models-dev/src/error.rs
+++ b/src/llm-coding-tools-models-dev/src/error.rs
@@ -41,6 +41,11 @@ pub enum CatalogError {
     /// The catalog failed to build from source rows.
     #[error("catalog build error: {0}")]
     ModelCatalogBuild(#[from] ModelCatalogBuildError),
+
+    /// A spawn_blocking task failed.
+    #[cfg(feature = "tokio")]
+    #[error("blocking task failed: {0}")]
+    JoinHandle(#[from] tokio::task::JoinError),
 }
 
 /// Convenience type alias for catalog operations.
diff --git a/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs
index d35ffbec..b087b7b1 100644
--- a/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs
+++ b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs
@@ -21,20 +21,8 @@ pub(crate) fn read(path: impl AsRef<Path>) -> std::io::Result<Box<[u8]>> {
     Ok(super::assume_init_u8_slice(bytes))
 }
 
-/// Writes all bytes to a file, creating or truncating it.
-#[inline]
-pub(crate) fn write(path: impl AsRef<Path>, bytes: &[u8]) -> std::io::Result<()> {
-    std::fs::write(path, bytes)
-}
-
 /// Creates a directory and all parent directories.
 #[inline]
 pub(crate) fn create_dir_all(path: impl AsRef<Path>) -> std::io::Result<()> {
     std::fs::create_dir_all(path)
 }
-
-/// Renames a file, replacing the destination if it exists.
-#[inline]
-pub(crate) fn rename(from: impl AsRef<Path>, to: impl AsRef<Path>) -> std::io::Result<()> {
-    std::fs::rename(from, to)
-}
diff --git a/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs
index 830d29e9..77f304dd 100644
--- a/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs
+++ b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs
@@ -22,20 +22,8 @@ pub(crate) async fn read(path: impl AsRef<Path>) -> std::io::Result<Box<[u8]>> {
     Ok(super::assume_init_u8_slice(bytes))
 }
 
-/// Writes all bytes to a file, creating or truncating it.
-#[inline]
-pub(crate) async fn write(path: impl AsRef<Path>, bytes: &[u8]) -> std::io::Result<()> {
-    tokio::fs::write(path, bytes).await
-}
-
 /// Creates a directory and all parent directories.
 #[inline]
 pub(crate) async fn create_dir_all(path: impl AsRef<Path>) -> std::io::Result<()> {
     tokio::fs::create_dir_all(path).await
 }
-
-/// Renames a file, replacing the destination if it exists.
-#[inline]
-pub(crate) async fn rename(from: impl AsRef<Path>, to: impl AsRef<Path>) -> std::io::Result<()> {
-    tokio::fs::rename(from, to).await
-}

From 4814497c13fece7d2bef551440f643bf7fafcafd Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Wed, 11 Mar 2026 21:55:31 +0000
Subject: [PATCH 18/22] Fixed: Apply rustfmt formatting to cache module

Correct formatting issues that caused CI failure.

Changes:
- Removed extra blank lines after struct definition
- Reformatted method chain for parent directory extraction

Benefits:
- CI passes consistently
- Code follows project style guidelines
---
 src/llm-coding-tools-models-dev/src/cache/format.rs | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/llm-coding-tools-models-dev/src/cache/format.rs b/src/llm-coding-tools-models-dev/src/cache/format.rs
index ca1ac88e..55e62dd6 100644
--- a/src/llm-coding-tools-models-dev/src/cache/format.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/format.rs
@@ -87,8 +87,6 @@ pub(crate) struct CacheFileData {
     file_bytes: Box<[u8]>,
 }
 
-
-
 impl CacheFileData {
     /// Returns the optional ETag as a borrowed byte slice.
     #[inline]
@@ -184,9 +182,9 @@ pub(crate) async fn write_cache_file(
     path: &Path,
     input: &CacheWriteInput<'_>,
 ) -> CatalogResult<()> {
-    let parent = path.parent().ok_or_else(|| {
-        CatalogError::CacheFormat("cache path has no parent directory")
-    })?;
+    let parent = path
+        .parent()
+        .ok_or_else(|| CatalogError::CacheFormat("cache path has no parent directory"))?;
     fs::create_dir_all(parent).await?;
 
     let etag_bytes = input.etag.unwrap_or(&[]);

From 7da6d4d8c30b7029b9e1db598159e7d0cf05dfb3 Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Wed, 11 Mar 2026 22:10:58 +0000
Subject: [PATCH 19/22] Fixed: Improve robustness of cache path handling and
 format parsing

Use var_os for environment variable to correctly handle non-UTF8 OS paths
and reject empty values explicitly. Add checked arithmetic for cache file
size calculations to prevent overflow.

Changes:
- path.rs: Replace var with var_os for non-UTF8 path support
- path.rs: Return Configuration error for empty env var values
- format.rs: Use checked_add to prevent integer overflow in size calc

Benefits:
- Correctly handles paths with arbitrary bytes on Unix systems
- Provides clear error for misconfigured empty cache path
- Prevents potential panic from integer overflow on malformed cache
---
 src/llm-coding-tools-models-dev/src/cache/format.rs |  7 ++++++-
 src/llm-coding-tools-models-dev/src/cache/path.rs   | 10 ++++++++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/llm-coding-tools-models-dev/src/cache/format.rs b/src/llm-coding-tools-models-dev/src/cache/format.rs
index 55e62dd6..8059ac1b 100644
--- a/src/llm-coding-tools-models-dev/src/cache/format.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/format.rs
@@ -152,7 +152,12 @@ pub(crate) async fn read_cache_file(path: &Path) -> CatalogResult<CacheFileData>
     let prelude = decode_prelude(&file_bytes[..CACHE_HEADER_LEN]);
     let etag_len = prelude.etag_len as usize;
     let payload_len_compressed = prelude.payload_len_compressed as usize;
-    let expected_total = CACHE_HEADER_LEN + etag_len + payload_len_compressed; // unlikely to overflow. file is trusted.
+    let expected_total = CACHE_HEADER_LEN
+        .checked_add(etag_len)
+        .and_then(|v| v.checked_add(payload_len_compressed))
+        .ok_or(CatalogError::CacheFormat(
+            "cache file size exceeds platform limits",
+        ))?;
 
     if file_bytes.len() != expected_total {
         return Err(CatalogError::CacheFormat(
diff --git a/src/llm-coding-tools-models-dev/src/cache/path.rs b/src/llm-coding-tools-models-dev/src/cache/path.rs
index 9aa5bf58..e43d256e 100644
--- a/src/llm-coding-tools-models-dev/src/cache/path.rs
+++ b/src/llm-coding-tools-models-dev/src/cache/path.rs
@@ -46,8 +46,14 @@ const CACHE_FILENAME: &str = "models.dev.catalog.v1.cache";
 /// ```
 pub fn shared_cache_path() -> CatalogResult<PathBuf> {
     // 1. Check env var first
-    if let Ok(path) = std::env::var(CACHE_PATH_ENV_VAR) {
-        return Ok(PathBuf::from(path));
+    if let Some(os_str) = std::env::var_os(CACHE_PATH_ENV_VAR) {
+        if os_str.is_empty() {
+            return Err(CatalogError::Configuration(format!(
+                "{} is set but empty",
+                CACHE_PATH_ENV_VAR
+            )));
+        }
+        return Ok(PathBuf::from(&os_str));
     }
 
     // 2. Fall back to dirs::cache_dir()

From b07462ed20ffb14495d16ac54636f85fc0ddcf90 Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Wed, 11 Mar 2026 22:15:56 +0000
Subject: [PATCH 20/22] Security: Add request timeouts to catalog sync HTTP
 clients

Prevent indefinite hangs on network issues by setting connect and
overall request timeouts on both async (tokio) and blocking reqwest
clients.

Changes:
- Added 30-second timeout constant for HTTP operations
- Configured connect_timeout and timeout on async client builder
- Configured connect_timeout and timeout on blocking client builder

Benefits:
- Prevents indefinite hangs on slow/unresponsive servers
- Provides predictable failure behavior for network issues
- Improves reliability of catalog synchronization
---
 .../src/catalog/sync.rs                           | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/llm-coding-tools-models-dev/src/catalog/sync.rs b/src/llm-coding-tools-models-dev/src/catalog/sync.rs
index 26d9005e..629c9bf7 100644
--- a/src/llm-coding-tools-models-dev/src/catalog/sync.rs
+++ b/src/llm-coding-tools-models-dev/src/catalog/sync.rs
@@ -21,6 +21,9 @@ use std::path::Path;
 /// Default production endpoint for the models.dev catalog snapshot.
 const MODELS_DEV_API_URL: &str = "https://models.dev/api.json";
 
+/// Timeout for HTTP connections and requests in seconds.
+const REQUEST_TIMEOUT_SECS: u64 = 30;
+
 #[cfg(test)]
 static TEST_MODELS_DEV_API_URL: std::sync::Mutex<Option<String>> = std::sync::Mutex::new(None);
 
@@ -108,9 +111,17 @@ pub(crate) async fn load_catalog_from_url(
     }
 
     #[cfg(feature = "tokio")]
-    let client = reqwest::Client::new();
+    let client = reqwest::Client::builder()
+        .connect_timeout(std::time::Duration::from_secs(REQUEST_TIMEOUT_SECS))
+        .timeout(std::time::Duration::from_secs(REQUEST_TIMEOUT_SECS))
+        .build()
+        .expect("client builder should not fail with valid config");
     #[cfg(feature = "blocking")]
-    let client = reqwest::blocking::Client::new();
+    let client = reqwest::blocking::Client::builder()
+        .connect_timeout(std::time::Duration::from_secs(REQUEST_TIMEOUT_SECS))
+        .timeout(std::time::Duration::from_secs(REQUEST_TIMEOUT_SECS))
+        .build()
+        .expect("client builder should not fail with valid config");
 
     let mut request = client.get(url);
     if let Some(etag) = cache_file.as_ref().and_then(|file| file.etag_bytes()) {

From 28434f52c96a2701b357b7e9bb71fea57a735938 Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Wed, 11 Mar 2026 22:26:22 +0000
Subject: [PATCH 21/22] Fixed: Reuse cached catalog on transient sync failures

Catalog sync now keeps the last known-good payload when reading the response body fails or the server returns retryable statuses.

Changes:
- Fallback to cached catalog when reading a 200 response body fails
- Treat 429 and 5xx responses as retryable when cache data is available
- Add sync tests and mock server responses for transient status and truncated body cases

Benefits:
- Avoids dropping usable cached catalog data during temporary upstream issues
- Keeps catalog loading stable across transient HTTP and transport failures
---
 .../src/catalog/sync.rs                       | 110 ++++++++++++++++--
 .../src/catalog/test_utils.rs                 |  35 +++++-
 2 files changed, 134 insertions(+), 11 deletions(-)

diff --git a/src/llm-coding-tools-models-dev/src/catalog/sync.rs b/src/llm-coding-tools-models-dev/src/catalog/sync.rs
index 629c9bf7..d63023d6 100644
--- a/src/llm-coding-tools-models-dev/src/catalog/sync.rs
+++ b/src/llm-coding-tools-models-dev/src/catalog/sync.rs
@@ -43,7 +43,7 @@ fn models_dev_api_url() -> Cow<'static, str> {
     Cow::Borrowed(MODELS_DEV_API_URL)
 }
 
-/// Resolves the result to return after a request failure.
+/// Resolves the result to return after a transient request failure.
 ///
 /// Cached data takes precedence over surfacing the request error so callers can
 /// continue with the last known-good catalog when possible.
@@ -63,6 +63,11 @@ fn load_after_request_failure(
     Err(CatalogError::Reqwest(request_error))
 }
 
+#[inline]
+fn is_transient_status(status: StatusCode) -> bool {
+    status == StatusCode::TOO_MANY_REQUESTS || status.is_server_error()
+}
+
 #[maybe_async::maybe_async]
 /// Loads the catalog at `path` using the default models.dev endpoint.
 ///
@@ -83,7 +88,7 @@ pub(crate) async fn load_catalog_at_path(path: &Path) -> CatalogResult<CatalogLo
 /// - send `If-None-Match` when the cache includes an ETag
 /// - on `200 OK`, decode the response and rewrite the cache
 /// - on `304 Not Modified`, load the existing cache
-/// - on request failure, fall back to cache when available
+/// - on request, response-body, or transient status failure, fall back to cache when available
 ///
 /// # Performance
 ///
@@ -140,7 +145,12 @@ pub(crate) async fn load_catalog_from_url(
                 .headers()
                 .get(ETAG)
                 .map(|value| value.as_bytes().to_vec());
-            let body = response.bytes().await?;
+            let body = match response.bytes().await {
+                Ok(body) => body,
+                Err(error) => {
+                    return load_after_request_failure(error, cache_file.as_ref(), cache_error);
+                }
+            };
             let payload = cache_payload_from_api_json_bytes(body.as_ref())?;
             let payload_encoded = encode_cache_payload(&payload);
             let catalog = catalog_from_cache_payload(payload)?;
@@ -173,6 +183,17 @@ pub(crate) async fn load_catalog_from_url(
                 ))
             }
         }
+        status if is_transient_status(status) => {
+            if let Some(cache_file) = cache_file.as_ref() {
+                load_catalog_from_cache_file_data(cache_file, CatalogLoadSource::FallbackCache)
+            } else if let Some(error) = cache_error {
+                Err(error)
+            } else {
+                Err(CatalogError::Configuration(format!(
+                    "unexpected catalog sync status: {status}",
+                )))
+            }
+        }
         status => Err(CatalogError::Configuration(format!(
             "unexpected catalog sync status: {status}",
         ))),
@@ -288,11 +309,8 @@ mod tests {
         format!("http://127.0.0.1:{port}/api.json")
     }
 
-    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
-    async fn sync_returns_fallback_cache_on_request_failure_with_valid_cache() {
-        let temp = TempDir::new().expect("tempdir");
-        let cache_path = temp.path().join("test.cache");
-
+    #[maybe_async::maybe_async]
+    async fn seed_cache(cache_path: &Path) {
         let payload = CatalogCachePayload {
             providers: vec![CachedProviderRow {
                 provider_key: "openai".to_string(),
@@ -313,7 +331,7 @@ mod tests {
         let encoded = encode_cache_payload(&payload);
         let compressed = zstd::bulk::compress(&encoded, 1).expect("compress");
         crate::cache::format::write_cache_file(
-            &cache_path,
+            cache_path,
             &CacheWriteInput {
                 etag: Some(b"\"cached-etag-456\""),
                 payload_compressed: &compressed,
@@ -322,6 +340,21 @@ mod tests {
         )
         .await
         .expect("seed cache");
+    }
+
+    #[test]
+    fn transient_status_detection_matches_retryable_responses() {
+        assert!(is_transient_status(StatusCode::TOO_MANY_REQUESTS));
+        assert!(is_transient_status(StatusCode::SERVICE_UNAVAILABLE));
+        assert!(!is_transient_status(StatusCode::NOT_FOUND));
+    }
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn sync_returns_fallback_cache_on_request_failure_with_valid_cache() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("test.cache");
+
+        seed_cache(&cache_path).await;
 
         let result = load_catalog_from_url(&cache_path, &refused_local_url())
             .await
@@ -331,6 +364,65 @@ mod tests {
         assert!(result.catalog.lookup_provider("openai").is_some());
     }
 
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn sync_returns_fallback_cache_on_transient_status_with_valid_cache() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("test.cache");
+
+        seed_cache(&cache_path).await;
+
+        let (_handle, url) = start_mock_server(MockResponse::Status {
+            code: 503,
+            reason: "Service Unavailable",
+        });
+
+        let result = load_catalog_from_url(&cache_path, &url)
+            .await
+            .expect("fallback should succeed");
+
+        assert_eq!(result.source, CatalogLoadSource::FallbackCache);
+        assert!(result.catalog.lookup_provider("openai").is_some());
+    }
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn sync_returns_cache_error_on_transient_status_with_corrupt_cache() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("corrupt.cache");
+
+        std::fs::write(&cache_path, [0_u8; 11]).expect("write corrupt cache");
+
+        let (_handle, url) = start_mock_server(MockResponse::Status {
+            code: 429,
+            reason: "Too Many Requests",
+        });
+
+        match load_catalog_from_url(&cache_path, &url).await {
+            Err(error) => assert!(matches!(error, CatalogError::CacheFormat(_))),
+            Ok(_) => panic!("transient status with corrupt cache should error"),
+        }
+    }
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn sync_returns_fallback_cache_on_body_read_failure_with_valid_cache() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("test.cache");
+
+        seed_cache(&cache_path).await;
+
+        let (_handle, url) = start_mock_server(MockResponse::PartialOk {
+            etag: "\"fresh-etag\"",
+            body: "{".to_string(),
+            content_length: 32,
+        });
+
+        let result = load_catalog_from_url(&cache_path, &url)
+            .await
+            .expect("fallback should succeed");
+
+        assert_eq!(result.source, CatalogLoadSource::FallbackCache);
+        assert!(result.catalog.lookup_provider("openai").is_some());
+    }
+
     #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
     async fn sync_returns_request_error_when_request_fails_without_cache() {
         let temp = TempDir::new().expect("tempdir");
diff --git a/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs b/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs
index c6bec11b..a7fec883 100644
--- a/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs
+++ b/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs
@@ -1,8 +1,22 @@
 use std::io::{BufRead, Write};
 
 pub enum MockResponse {
-    Ok { etag: &'static str, body: String },
-    NotModified { etag: &'static str },
+    Ok {
+        etag: &'static str,
+        body: String,
+    },
+    PartialOk {
+        etag: &'static str,
+        body: String,
+        content_length: usize,
+    },
+    NotModified {
+        etag: &'static str,
+    },
+    Status {
+        code: u16,
+        reason: &'static str,
+    },
 }
 
 pub fn sample_api_json() -> &'static [u8] {
@@ -65,11 +79,28 @@ pub fn start_mock_server(response: MockResponse) -> (std::thread::JoinHandle<()>
                 stream.write_all(response.as_bytes()).expect("write");
                 stream.flush().expect("flush");
             }
+            MockResponse::PartialOk {
+                etag,
+                body,
+                content_length,
+            } => {
+                let response = format!(
+                    "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nETag: {}\r\nContent-Length: {}\r\n\r\n{}",
+                    etag, content_length, body
+                );
+                stream.write_all(response.as_bytes()).expect("write");
+                stream.flush().expect("flush");
+            }
             MockResponse::NotModified { etag } => {
                 let response = format!("HTTP/1.1 304 Not Modified\r\nETag: {}\r\n\r\n", etag);
                 stream.write_all(response.as_bytes()).expect("write");
                 stream.flush().expect("flush");
             }
+            MockResponse::Status { code, reason } => {
+                let response = format!("HTTP/1.1 {code} {reason}\r\nContent-Length: 0\r\n\r\n");
+                stream.write_all(response.as_bytes()).expect("write");
+                stream.flush().expect("flush");
+            }
         }
     });
 

From df24a978e8a9da77886bf20180464c73978672b6 Mon Sep 17 00:00:00 2001
From: Sewer56 <admin@sewer56.dev>
Date: Wed, 11 Mar 2026 22:43:24 +0000
Subject: [PATCH 22/22] Changed: Document safety of read_exact with snapshot
 length

Add # Safety section to read functions explaining why the pattern of
snapshotting file length then calling read_exact is safe in this codebase.

Changes:
- Added safety documentation to blocking_impl.rs read function
- Added safety documentation to tokio_impl.rs read function

Benefits:
- Clarifies for future reviewers why this pattern cannot cause data loss
- Documents the atomic write pattern used throughout the codebase
---
 src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs | 8 ++++++++
 src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs    | 8 ++++++++
 2 files changed, 16 insertions(+)

diff --git a/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs
index b087b7b1..01252a9b 100644
--- a/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs
+++ b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs
@@ -4,6 +4,14 @@ use std::io::{ErrorKind, Read as _};
 use std::path::Path;
 
 /// Reads a file into memory in one pre-sized allocation.
+///
+/// # Safety
+///
+/// We snapshot file length then call `read_exact`, which would miss data appended after
+/// the metadata call if the file grew mid-read. However, within this codebase all
+/// writes go to a temp file first, then rename to target — so files are never
+/// appended to in place.
+/// Therefore this race cannot occur.
 #[inline]
 pub(crate) fn read(path: impl AsRef<Path>) -> std::io::Result<Box<[u8]>> {
     let mut file = std::fs::File::open(path)?;
diff --git a/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs
index 77f304dd..29d04d2c 100644
--- a/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs
+++ b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs
@@ -5,6 +5,14 @@ use std::path::Path;
 use tokio::io::AsyncReadExt as _;
 
 /// Reads a file into memory in one pre-sized allocation.
+///
+/// # Safety
+///
+/// We snapshot file length then call `read_exact`, which would miss data appended after
+/// the metadata call if the file grew mid-read. However, within this codebase all
+/// writes go to a temp file first, then rename to target — so files are never
+/// appended to in place.
+/// Therefore this race cannot occur.
 #[inline]
 pub(crate) async fn read(path: impl AsRef<Path>) -> std::io::Result<Box<[u8]>> {
     let mut file = tokio::fs::File::open(path).await?;