diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 8ebd0db2..37f6b714 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -52,13 +52,13 @@ jobs:
           cargo +stable binstall --no-confirm cargo-semver-checks --force
           rustup +stable target add ${{ matrix.target }}
 
-          for CRATE in "llm-coding-tools-core" "llm-coding-tools-agents" "llm-coding-tools-serdesai"; do
+          for CRATE in "llm-coding-tools-core" "llm-coding-tools-agents" "llm-coding-tools-serdesai" "llm-coding-tools-models-dev"; do
             SEARCH_RESULT=$(cargo search "^${CRATE}$" --limit 1)
             if echo "$SEARCH_RESULT" | grep -q "^${CRATE} "; then
               echo "Running semver checks for ${CRATE}..."
               # Note: llm-coding-tools-core has mutually exclusive async/blocking features,
               # so we must use --only-explicit-features to avoid enabling all features.
-              # The serdesai crate is async-only and doesn't have the tokio feature.
+              # llm-coding-tools-serdesai is async-only. models-dev supports both tokio and blocking.
               if [ "${CRATE}" = "llm-coding-tools-core" ]; then
                 cargo +stable semver-checks -p "${CRATE}" --target ${{ matrix.target }} --only-explicit-features --features tokio
               else
@@ -79,6 +79,7 @@ jobs:
           cargo doc -p llm-coding-tools-core --features tokio --document-private-items --no-deps --target ${{ matrix.target }}
           cargo doc -p llm-coding-tools-agents --document-private-items --no-deps --target ${{ matrix.target }}
           cargo doc -p llm-coding-tools-serdesai --document-private-items --no-deps --target ${{ matrix.target }}
+          cargo doc -p llm-coding-tools-models-dev --document-private-items --no-deps --target ${{ matrix.target }}
 
       - name: Run linter
         if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/')
@@ -88,6 +89,7 @@ jobs:
           cargo clippy -p llm-coding-tools-core --features tokio --target ${{ matrix.target }} -- -D warnings
           cargo clippy -p llm-coding-tools-agents --target ${{ matrix.target }} -- -D warnings
           cargo clippy -p llm-coding-tools-serdesai --target ${{ matrix.target }} -- -D warnings
+          cargo clippy -p llm-coding-tools-models-dev --target ${{ matrix.target }} -- -D warnings
 
       - name: Run formatter check
         uses: actions-rust-lang/rustfmt@v1
@@ -125,6 +127,7 @@ jobs:
           use-cross: ${{ matrix.use-cross }}
           packages: |
             llm-coding-tools-core
+            llm-coding-tools-models-dev
           no-default-features: true
           features: "blocking"
 
@@ -135,12 +138,14 @@ jobs:
           RUSTDOCFLAGS: "-D warnings"
         run: |
           cargo doc -p llm-coding-tools-core --no-default-features --features blocking --document-private-items --no-deps --target ${{ matrix.target }}
+          cargo doc -p llm-coding-tools-models-dev --no-default-features --features blocking --document-private-items --no-deps --target ${{ matrix.target }}
 
       - name: Run linter (Blocking)
         if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/')
         working-directory: src
         run: |
           cargo clippy -p llm-coding-tools-core --no-default-features --features blocking --target ${{ matrix.target }} -- -D warnings
+          cargo clippy -p llm-coding-tools-models-dev --no-default-features --features blocking --target ${{ matrix.target }} -- -D warnings
 
       - name: Run formatter check
         uses: actions-rust-lang/rustfmt@v1
@@ -165,6 +170,7 @@ jobs:
             src/llm-coding-tools-core
             src/llm-coding-tools-agents
             src/llm-coding-tools-serdesai
+            src/llm-coding-tools-models-dev
           compression-tool: 7z
           artifact-groups-file: .github/artifact-groups.yml
           changelog-enabled: "true"
diff --git a/README.MD b/README.MD
index 2d728aac..303acd67 100644
--- a/README.MD
+++ b/README.MD
@@ -1,18 +1,30 @@
 # llm-coding-tools
 
 [![Crates.io - llm-coding-tools-core](https://img.shields.io/crates/v/llm-coding-tools-core.svg)](https://crates.io/crates/llm-coding-tools-core)
+[![Crates.io - llm-coding-tools-agents](https://img.shields.io/crates/v/llm-coding-tools-agents.svg)](https://crates.io/crates/llm-coding-tools-agents)
 [![Crates.io - llm-coding-tools-serdesai](https://img.shields.io/crates/v/llm-coding-tools-serdesai.svg)](https://crates.io/crates/llm-coding-tools-serdesai)
+[![Crates.io - llm-coding-tools-models-dev](https://img.shields.io/crates/v/llm-coding-tools-models-dev.svg)](https://crates.io/crates/llm-coding-tools-models-dev)
 [![Docs.rs](https://docs.rs/llm-coding-tools-serdesai/badge.svg)](https://docs.rs/llm-coding-tools-serdesai)
 [![CI](https://github.com/Sewer56/llm-coding-tools/actions/workflows/rust.yml/badge.svg)](https://github.com/Sewer56/llm-coding-tools/actions)
 
-Lightweight, high-performance coding tool implementations for LLM-powered development agents. Plug and play into your favourite frameworks.
+Lightweight, heavily optimized coding tool implementations for LLM-powered
+development agents.
+
+Suitable for server use (<3 MiB), or as building blocks for your own TUI coding agent.
 
 ## About This Workspace
 
-This workspace contains multiple Rust crates for integrating coding tools with LLM agents:
+This workspace contains multiple Rust crates for integrating coding tools with
+LLM agents:
 
-- **[llm-coding-tools-core](./src/llm-coding-tools-core/)**: Framework-agnostic core operations and utilities
-- **[llm-coding-tools-serdesai](./src/llm-coding-tools-serdesai/)**: serdesAI framework-specific Tool implementations
+- **[llm-coding-tools-core](./src/llm-coding-tools-core/)**:
+  Framework-agnostic core operations and utilities
+- **[llm-coding-tools-agents](./src/llm-coding-tools-agents/)**:
+  OpenCode agent markdown loader and typed catalogue
+- **[llm-coding-tools-serdesai](./src/llm-coding-tools-serdesai/)**:
+  serdesAI framework-specific Tool implementations
+- **[llm-coding-tools-models-dev](./src/llm-coding-tools-models-dev/)**:
+  models.dev catalog sync with cached fallback and ETag refresh
 
 ## Features
 
@@ -22,6 +34,9 @@ This workspace contains multiple Rust crates for integrating coding tools with L
 - **Web**: URL fetching with HTML-to-markdown conversion
 - **Path Security**: Choose between unrestricted or sandboxed file access
 - **Context Strings**: Embedded LLM guidance for tool usage
+- **Agent Loading**: Parse OpenCode-compatible agent markdown into typed configs
+- **Model Catalog Sync**: Download and cache the models.dev catalog for
+  provider/model lookups
 
 ## Feature Flags (llm-coding-tools-core)
 
@@ -30,41 +45,24 @@ This workspace contains multiple Rust crates for integrating coding tools with L
 
 ## Quick Start
 
-Add to your `Cargo.toml`:
+Pick the crate that matches your use case:
 
 ```toml
 [dependencies]
-llm-coding-tools-serdesai = "0.1"
+llm-coding-tools-core = "0.2"         # Framework-agnostic tool implementations
+llm-coding-tools-agents = "0.1"       # OpenCode agent markdown loader
+llm-coding-tools-models-dev = "0.1"   # models.dev catalog sync and cache
+llm-coding-tools-serdesai = "0.2"     # serdesAI integration
 ```
 
-```rust,no_run
-use llm_coding_tools_serdesai::{AgentBuilder, BashTool, TodoTools};
-use llm_coding_tools_serdesai::absolute::{ReadTool, WriteTool, EditTool, GlobTool, GrepTool};
-
-let mut builder = AgentBuilder::new();
-let todos = TodoTools::new();
-
-builder
-    .track(ReadTool::<true>::new())
-    .track(WriteTool::new())
-    .track(EditTool::<true>::new())
-    .track(GlobTool::new())
-    .track(GrepTool::<true>::new())
-    .track(BashTool::new())
-    .track(&todos.read)
-    .track(&todos.write);
-
-let mut agent = builder.build();
-
-// Use the agent
-// let response = agent.invoke("List all files").await?;
-```
+For a runnable agent setup, start with `llm-coding-tools-serdesai` and the
+examples below.
 
 ## Examples
 
 ```bash
 # serdesAI framework - Basic agent setup
-cargo run --example serdesai-agents -p llm-coding-tools-serdesai
+cargo run --example serdesai-basic -p llm-coding-tools-serdesai
 
 # serdesAI framework - Sandboxed file access
 cargo run --example serdesai-sandboxed -p llm-coding-tools-serdesai
@@ -73,18 +71,26 @@ cargo run --example serdesai-sandboxed -p llm-coding-tools-serdesai
 ## Documentation
 
 - [llm-coding-tools-core README](./src/llm-coding-tools-core/README.md)
+- [llm-coding-tools-agents README](./src/llm-coding-tools-agents/README.md)
 - [llm-coding-tools-serdesai README](./src/llm-coding-tools-serdesai/README.md)
+- [llm-coding-tools-models-dev README](./src/llm-coding-tools-models-dev/README.md)
 - [Developer Guidelines](./src/AGENTS.md)
 
 ## Contributing
 
-Contributions are welcome! Please ensure all tests pass and the code follows our guidelines.
+Contributions are welcome! Please ensure all tests pass and the code follows
+our guidelines.
 
 ## Deprecation Notice
 
-**Rig framework support (`llm-coding-tools-rig`) has been removed** (commit 17158db) due to library bugs that prevented examples from running reliably.
+**Rig framework support (`llm-coding-tools-rig`) has been removed**
+(commit 17158db) due to library bugs that prevented examples from running
+reliably.
 
-You're welcome to submit a PR re-adding rig support if you're willing to maintain it. Since I don't use rig personally, I'm not able to actively maintain that integration. Alternatively, you can create your own crate building on `llm-coding-tools-core` directly.
+You're welcome to submit a PR re-adding rig support if you're willing to
+maintain it. Since I don't use rig personally, I'm not able to actively
+maintain that integration. Alternatively, you can create your own crate
+building on `llm-coding-tools-core` directly.
 
 ## License
 
diff --git a/src/.cargo/verify.ps1 b/src/.cargo/verify.ps1
index 745bf015..481ad21b 100644
--- a/src/.cargo/verify.ps1
+++ b/src/.cargo/verify.ps1
@@ -2,8 +2,8 @@
 # All steps must pass without warnings
 # Keep in sync with verify.sh
 #
-# Note: llm-coding-tools-serdesai is async-only (implements async Tool traits).
-# The blocking feature only applies to llm-coding-tools-core.
+# Note: llm-coding-tools-serdesai is async-only.
+# Blocking mode is validated for core and models-dev.
 
 $ErrorActionPreference = "Stop"
 
@@ -35,25 +35,29 @@ try {
 Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-core", "--quiet")
 Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-agents", "--quiet")
 Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-serdesai", "--quiet")
+Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-models-dev", "--quiet")
 
 Write-Host "Testing..."
 Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-core", "--quiet")
 Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-agents", "--quiet")
 Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-serdesai", "--quiet")
+Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-models-dev", "--quiet")
 
 Write-Host "Clippy..."
 Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-core", "--quiet", "--", "-D", "warnings")
 Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-agents", "--quiet", "--", "-D", "warnings")
 Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-serdesai", "--quiet", "--", "-D", "warnings")
+Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-models-dev", "--quiet", "--", "-D", "warnings")
 
 Write-Host "Testing blocking feature..."
 Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-core", "--no-default-features", "--features", "blocking", "--quiet")
+Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-models-dev", "--no-default-features", "--features", "blocking", "--quiet")
 
 Write-Host "Docs..."
 $originalRustdocFlags = $env:RUSTDOCFLAGS
 $env:RUSTDOCFLAGS = "-D warnings"
 try {
-    Invoke-LoggedCommand "cargo" @("doc", "--workspace", "--no-deps", "--quiet")
+    Invoke-LoggedCommand "cargo" @("doc", "--workspace", "--document-private-items", "--no-deps", "--quiet")
 } finally {
     $env:RUSTDOCFLAGS = $originalRustdocFlags
 }
@@ -65,6 +69,7 @@ Write-Host "Publish dry-run..."
 Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-core", "--quiet")
 Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-agents", "--quiet")
 Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-serdesai", "--quiet")
+Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-models-dev", "--quiet")
 
 Write-Host "All checks passed!"
 }
diff --git a/src/.cargo/verify.sh b/src/.cargo/verify.sh
index b51897cf..36ff4728 100755
--- a/src/.cargo/verify.sh
+++ b/src/.cargo/verify.sh
@@ -3,8 +3,8 @@
 # All steps must pass without warnings
 # Keep in sync with verify.ps1
 #
-# Note: llm-coding-tools-serdesai is async-only (implements async Tool traits).
-# The blocking feature only applies to llm-coding-tools-core.
+# Note: llm-coding-tools-serdesai is async-only.
+# Blocking mode is validated for core and models-dev.
 
 set -e
 
@@ -24,22 +24,26 @@ echo "Building..."
 run_cmd cargo build -p llm-coding-tools-core --quiet
 run_cmd cargo build -p llm-coding-tools-agents --quiet
 run_cmd cargo build -p llm-coding-tools-serdesai --quiet
+run_cmd cargo build -p llm-coding-tools-models-dev --quiet
 
 echo "Testing..."
 run_cmd cargo test -p llm-coding-tools-core --quiet
 run_cmd cargo test -p llm-coding-tools-agents --quiet
 run_cmd cargo test -p llm-coding-tools-serdesai --quiet
+run_cmd cargo test -p llm-coding-tools-models-dev --quiet
 
 echo "Clippy..."
 run_cmd cargo clippy -p llm-coding-tools-core --quiet -- -D warnings
 run_cmd cargo clippy -p llm-coding-tools-agents --quiet -- -D warnings
 run_cmd cargo clippy -p llm-coding-tools-serdesai --quiet -- -D warnings
+run_cmd cargo clippy -p llm-coding-tools-models-dev --quiet -- -D warnings
 
 echo "Testing blocking feature..."
 run_cmd cargo test -p llm-coding-tools-core --no-default-features --features blocking --quiet
+run_cmd cargo test -p llm-coding-tools-models-dev --no-default-features --features blocking --quiet
 
 echo "Docs..."
-run_cmd env RUSTDOCFLAGS="-D warnings" cargo doc --workspace --no-deps --quiet
+run_cmd env RUSTDOCFLAGS="-D warnings" cargo doc --workspace --document-private-items --no-deps --quiet
 
 echo "Formatting..."
 run_cmd cargo fmt --all --check --quiet
@@ -48,5 +52,6 @@ echo "Publish dry-run..."
 run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-core --quiet
 run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-agents --quiet
 run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-serdesai --quiet
+run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-models-dev --quiet
 
 echo "All checks passed!"
diff --git a/src/Cargo.lock b/src/Cargo.lock
index a9d43b94..2b708bc7 100644
--- a/src/Cargo.lock
+++ b/src/Cargo.lock
@@ -74,6 +74,12 @@ version = "1.0.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
 
+[[package]]
+name = "arrayvec"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
+
 [[package]]
 name = "assert-json-diff"
 version = "2.0.2"
@@ -144,6 +150,30 @@ version = "0.22.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
 
+[[package]]
+name = "bitcode"
+version = "0.6.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a6ed1b54d8dc333e7be604d00fa9262f4635485ffea923647b6521a5fff045d"
+dependencies = [
+ "arrayvec",
+ "bitcode_derive",
+ "bytemuck",
+ "glam",
+ "serde",
+]
+
+[[package]]
+name = "bitcode_derive"
+version = "0.6.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "238b90427dfad9da4a9abd60f3ec1cdee6b80454bde49ed37f1781dd8e9dc7f9"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "bitfields"
 version = "1.0.2"
@@ -197,6 +227,12 @@ version = "3.19.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510"
 
+[[package]]
+name = "bytemuck"
+version = "1.25.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
+
 [[package]]
 name = "bytes"
 version = "1.11.0"
@@ -324,6 +360,16 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "concat-idents"
+version = "1.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f76990911f2267d837d9d0ad060aa63aaad170af40904b29461734c339030d4d"
+dependencies = [
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "const-random"
 version = "0.1.18"
@@ -548,6 +594,27 @@ dependencies = [
  "crypto-common",
 ]
 
+[[package]]
+name = "dirs"
+version = "6.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e"
+dependencies = [
+ "dirs-sys",
+]
+
+[[package]]
+name = "dirs-sys"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab"
+dependencies = [
+ "libc",
+ "option-ext",
+ "redox_users",
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "displaydoc"
 version = "0.2.5"
@@ -595,6 +662,29 @@ dependencies = [
  "encoding_rs",
 ]
 
+[[package]]
+name = "endian-writer"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5fba6714ed232b3a46d07255c9cb2d20e9a8aee06a20d5d2e3eb4e2b48d28ae"
+dependencies = [
+ "concat-idents",
+ "paste",
+]
+
+[[package]]
+name = "endian-writer-derive"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "160e7b32d1a63d6f02993f5ce2da2b7125480ae40c45d9a0b74d158f203f7e53"
+dependencies = [
+ "endian-writer",
+ "memoffset",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "equivalent"
 version = "1.0.2"
@@ -794,6 +884,12 @@ dependencies = [
  "wasip3",
 ]
 
+[[package]]
+name = "glam"
+version = "0.32.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34627c5158214743a374170fed714833fdf4e4b0cbcc1ea98417866a4c5d4441"
+
 [[package]]
 name = "globset"
 version = "0.4.18"
@@ -1315,15 +1411,24 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
 
 [[package]]
 name = "libc"
-version = "0.2.180"
+version = "0.2.182"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
+checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
+
+[[package]]
+name = "libredox"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "062b52cd41eb8d929e81b592a47df833c33c15684933a9329440137a6d9f134c"
+dependencies = [
+ "libc",
+]
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.11.0"
+version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
+checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
 
 [[package]]
 name = "lite-strtab"
@@ -1364,6 +1469,7 @@ name = "llm-coding-tools-core"
 version = "0.2.0"
 dependencies = [
  "ahash",
+ "bitcode",
  "bitfields",
  "bitflags",
  "criterion",
@@ -1385,11 +1491,32 @@ dependencies = [
  "serde_json",
  "tempfile",
  "thiserror 2.0.18",
+ "tinyvec",
  "tinyvec_string",
  "tokio",
  "wiremock",
 ]
 
+[[package]]
+name = "llm-coding-tools-models-dev"
+version = "0.1.0"
+dependencies = [
+ "bitcode",
+ "dirs",
+ "endian-writer",
+ "endian-writer-derive",
+ "llm-coding-tools-core",
+ "maybe-async",
+ "reqwest 0.13.1",
+ "serde",
+ "serde_json",
+ "serial_test",
+ "tempfile",
+ "thiserror 2.0.18",
+ "tokio",
+ "zstd",
+]
+
 [[package]]
 name = "llm-coding-tools-serdesai"
 version = "0.2.0"
@@ -1475,6 +1602,15 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "memoffset"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
+dependencies = [
+ "autocfg",
+]
+
 [[package]]
 name = "mime"
 version = "0.3.17"
@@ -1500,9 +1636,9 @@ checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
 
 [[package]]
 name = "nix"
-version = "0.31.1"
+version = "0.31.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "225e7cfe711e0ba79a68baeddb2982723e4235247aefce1482f2f16c27865b66"
+checksum = "5d6d0705320c1e6ba1d912b5e37cf18071b6c2e9b7fa8215a1e8a7651966f5d3"
 dependencies = [
  "bitflags",
  "cfg-if",
@@ -1547,6 +1683,12 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391"
 
+[[package]]
+name = "option-ext"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
+
 [[package]]
 name = "page_size"
 version = "0.6.0"
@@ -1580,6 +1722,12 @@ dependencies = [
  "windows-link",
 ]
 
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
 [[package]]
 name = "percent-encoding"
 version = "2.3.2"
@@ -1637,6 +1785,12 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
+[[package]]
+name = "pkg-config"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
+
 [[package]]
 name = "plotters"
 version = "0.3.7"
@@ -1886,6 +2040,17 @@ dependencies = [
  "bitflags",
 ]
 
+[[package]]
+name = "redox_users"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac"
+dependencies = [
+ "getrandom 0.2.17",
+ "libredox",
+ "thiserror 2.0.18",
+]
+
 [[package]]
 name = "ref-cast"
 version = "1.0.25"
@@ -2036,9 +2201,9 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
 
 [[package]]
 name = "rustix"
-version = "1.1.3"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34"
+checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
 dependencies = [
  "bitflags",
  "errno",
@@ -2144,6 +2309,15 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "scc"
+version = "2.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc"
+dependencies = [
+ "sdd",
+]
+
 [[package]]
 name = "schannel"
 version = "0.1.28"
@@ -2184,6 +2358,12 @@ version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
+[[package]]
+name = "sdd"
+version = "3.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca"
+
 [[package]]
 name = "security-framework"
 version = "3.5.1"
@@ -2512,6 +2692,32 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "serial_test"
+version = "3.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "911bd979bf1070a3f3aa7b691a3b3e9968f339ceeec89e08c280a8a22207a32f"
+dependencies = [
+ "futures-executor",
+ "futures-util",
+ "log",
+ "once_cell",
+ "parking_lot",
+ "scc",
+ "serial_test_derive",
+]
+
+[[package]]
+name = "serial_test_derive"
+version = "3.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a7d91949b85b0d2fb687445e448b40d322b6b3e4af6b44a29b21d9a5f33e6d9"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "sha2"
 version = "0.10.9"
@@ -2642,12 +2848,12 @@ dependencies = [
 
 [[package]]
 name = "tempfile"
-version = "3.25.0"
+version = "3.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1"
+checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0"
 dependencies = [
  "fastrand",
- "getrandom 0.3.4",
+ "getrandom 0.4.2",
  "once_cell",
  "rustix",
  "windows-sys 0.61.2",
@@ -3773,3 +3979,31 @@ name = "zmij"
 version = "1.0.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dfcd145825aace48cff44a8844de64bf75feec3080e0aa5cdbde72961ae51a65"
+
+[[package]]
+name = "zstd"
+version = "0.13.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
+dependencies = [
+ "zstd-safe",
+]
+
+[[package]]
+name = "zstd-safe"
+version = "7.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
+dependencies = [
+ "zstd-sys",
+]
+
+[[package]]
+name = "zstd-sys"
+version = "2.0.16+zstd.1.5.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
+dependencies = [
+ "cc",
+ "pkg-config",
+]
diff --git a/src/Cargo.toml b/src/Cargo.toml
index 0dbd669e..7429dbb9 100644
--- a/src/Cargo.toml
+++ b/src/Cargo.toml
@@ -1,7 +1,7 @@
 
 [workspace]
 resolver = "2"
-members = ["llm-coding-tools-core", "llm-coding-tools-serdesai", "llm-coding-tools-agents"]
+members = ["llm-coding-tools-core", "llm-coding-tools-serdesai", "llm-coding-tools-agents", "llm-coding-tools-models-dev"]
 
 # Profile Build
 [profile.profile]
diff --git a/src/llm-coding-tools-agents/README.md b/src/llm-coding-tools-agents/README.md
index b17ce11d..e6d6bb85 100644
--- a/src/llm-coding-tools-agents/README.md
+++ b/src/llm-coding-tools-agents/README.md
@@ -4,13 +4,16 @@ Load OpenCode agent markdown files into a typed Rust catalogue.
 
 This crate is a loader for the [OpenCode agent schema](https://opencode.ai/docs/agents/).
 
-It is a drop-in replacement for OpenCode agent files: agents you create for OpenCode should load here unchanged.
+It is a drop-in replacement for OpenCode agent files: agents you create for
+OpenCode should load here unchanged.
 
 ## What it provides
 
-- [`AgentLoader`] for loading agent configs from directories, files, or in-memory markdown.
+- [`AgentLoader`] for loading agent configs from directories, files, or
+  in-memory markdown.
 - [`AgentCatalog`] for storing and looking up loaded [`AgentConfig`] entries.
-- [`RulesetExt`] for converting frontmatter `permission` data into runtime [`Ruleset`]s.
+- [`RulesetExt`] for converting frontmatter `permission` data into runtime
+  [`Ruleset`]s.
 
 ## Quick start
 
@@ -43,19 +46,34 @@ permission:
 Prompt body here...
 ```
 
-For field behaviour, see OpenCode docs for [`mode`](https://opencode.ai/docs/agents#mode), [`model`](https://opencode.ai/docs/agents#model), and [`permissions`](https://opencode.ai/docs/agents#permissions).
+For field behaviour, see OpenCode docs for
+[`mode`](https://opencode.ai/docs/agents#mode),
+[`model`](https://opencode.ai/docs/agents#model), and
+[`permissions`](https://opencode.ai/docs/agents#permissions).
 
 ## Compatibility notes
 
-This library does not provide interactive UX extensions (for example, TUI approval flows).
-To avoid false expectations, settings that require interaction are rejected, while settings with no runtime effect are accepted and ignored:
+This library does not provide interactive UX extensions (for example, TUI
+approval flows).
 
-- [`permission.task`](https://opencode.ai/docs/agents#task-permissions): `ask` is rejected with a schema validation error (`allow`/`deny` only), because `ask` is an interactive approval mode in OpenCode ([docs](https://opencode.ai/docs/permissions#what-ask-does)).
-- [`hidden`](https://opencode.ai/docs/agents#hidden) is accepted for compatibility, but ignored at runtime.
+To avoid false expectations, settings that require interaction are rejected,
+while settings with no runtime effect are accepted and ignored:
+
+- [`permission.task`](https://opencode.ai/docs/agents#task-permissions):
+  `ask` is rejected with a schema validation error (`allow`/`deny` only),
+  because `ask` is an interactive approval mode in OpenCode
+  ([docs](https://opencode.ai/docs/permissions#what-ask-does)).
+- [`hidden`](https://opencode.ai/docs/agents#hidden) is accepted for
+  compatibility, but ignored at runtime.
 
 ## Integration
 
 This crate only loads and validates agent configs.
-Pass [`AgentCatalog`] to your runtime adapter (for example, `llm-coding-tools-serdesai`) to build registries and Task tooling.
+Pass [`AgentCatalog`] to your runtime adapter (for example,
+`llm-coding-tools-serdesai`) to build registries and Task tooling.
+
+If you want to validate `model` strings against a catalog, call
+[`AgentConfig::model_parts`] and pass the returned `(provider, model)` into
+your lookup layer.
 
 [`Ruleset`]: llm_coding_tools_core::permissions::Ruleset
diff --git a/src/llm-coding-tools-agents/src/types/config.rs b/src/llm-coding-tools-agents/src/types/config.rs
index 78010529..1d4a74ab 100644
--- a/src/llm-coding-tools-agents/src/types/config.rs
+++ b/src/llm-coding-tools-agents/src/types/config.rs
@@ -108,6 +108,8 @@ pub struct AgentConfig {
     #[serde(default)]
     pub description: String,
     /// Optional model override (format: "provider/model-id").
+    ///
+    /// Use [`AgentConfig::model_parts`] before catalog lookup.
     #[serde(default)]
     pub model: Option<String>,
     /// Legacy visibility flag accepted for compatibility only.
@@ -136,6 +138,18 @@ pub struct AgentConfig {
 }
 
 impl AgentConfig {
+    /// Returns the configured model split into `(provider, model)` parts.
+    #[inline]
+    pub fn model_parts(&self) -> Option<(&str, &str)> {
+        let value = self.model.as_deref()?;
+        let (provider, model) = value.split_once('/')?;
+        if provider.is_empty() || model.is_empty() {
+            return None;
+        }
+
+        Some((provider, model))
+    }
+
     /// Creates an [`AgentConfig`] from raw frontmatter and parsed prompt body.
     pub(crate) fn from_raw(default_name: String, raw: RawFrontmatter, prompt: String) -> Self {
         Self {
@@ -152,3 +166,49 @@ impl AgentConfig {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::{AgentConfig, AgentMode};
+    use ahash::AHashMap;
+    use indexmap::IndexMap;
+
+    fn config_with_model(model: Option<&str>) -> AgentConfig {
+        AgentConfig {
+            name: "example".to_string(),
+            mode: AgentMode::All,
+            description: String::new(),
+            model: model.map(str::to_string),
+            hidden: false,
+            temperature: None,
+            top_p: None,
+            permission: IndexMap::new(),
+            options: AHashMap::new(),
+            prompt: String::new(),
+        }
+    }
+
+    #[test]
+    fn model_parts_returns_provider_and_model() {
+        let config = config_with_model(Some("synthetic/hf:moonshotai/Kimi-K2.5"));
+
+        assert_eq!(
+            config.model_parts(),
+            Some(("synthetic", "hf:moonshotai/Kimi-K2.5"))
+        );
+    }
+
+    #[test]
+    fn model_parts_rejects_missing_separator() {
+        let config = config_with_model(Some("synthetic-only"));
+
+        assert_eq!(config.model_parts(), None);
+    }
+
+    #[test]
+    fn model_parts_handles_absent_model() {
+        let config = config_with_model(None);
+
+        assert_eq!(config.model_parts(), None);
+    }
+}
diff --git a/src/llm-coding-tools-core/Cargo.toml b/src/llm-coding-tools-core/Cargo.toml
index 3238bfa4..3f7ec206 100644
--- a/src/llm-coding-tools-core/Cargo.toml
+++ b/src/llm-coding-tools-core/Cargo.toml
@@ -40,6 +40,9 @@ serde_json = "1.0"
 # Zero overhead compile time bitflag generation
 bitflags = "2.11.0"
 
+# Fast binary serialization for catalog cache types
+bitcode = "0.6.9"
+
 # Compile-time generated packed bitfield structs for model metadata
 bitfields = "1.0.2"
 
@@ -51,6 +54,7 @@ hashbrown = "0.16"
 
 # Inline string storage for patterns
 tinyvec_string = { version = "0.3", features = ["alloc"] }
+tinyvec = { version = "1.10", features = ["alloc"] }
 
 # Efficient immutable string table for provider URLs and env vars
 lite-strtab = "0.2"
diff --git a/src/llm-coding-tools-core/benches/model_catalog_builder.rs b/src/llm-coding-tools-core/benches/model_catalog_builder.rs
index 4aedbac3..cc83be5e 100644
--- a/src/llm-coding-tools-core/benches/model_catalog_builder.rs
+++ b/src/llm-coding-tools-core/benches/model_catalog_builder.rs
@@ -3,16 +3,36 @@
 use core::hint::black_box;
 use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
 use llm_coding_tools_core::models::{
-    Modality, ModelCatalog, ModelInfo, ProviderInfo, ProviderModelSource, ProviderSource,
-    ProviderType,
+    Modality, ModelCatalog, ModelInfo, ProviderIdx, ProviderInfo, ProviderModelSource,
+    ProviderSource, ProviderType,
 };
 
+struct ProviderModelSpec {
+    provider_idx: ProviderIdx,
+    model_key: String,
+    model: ModelInfo,
+}
+
 struct Dataset {
     providers: Vec<ProviderSource>,
-    provider_models: Vec<ProviderModelSource>,
+    provider_models: Vec<ProviderModelSpec>,
+}
+
+impl Dataset {
+    fn provider_model_sources(&self) -> Vec<ProviderModelSource<'_>> {
+        let mut sources = Vec::with_capacity(self.provider_models.len());
+        for provider_model in &self.provider_models {
+            sources.push(ProviderModelSource::new(
+                provider_model.provider_idx,
+                provider_model.model_key.as_str(),
+                provider_model.model,
+            ));
+        }
+        sources
+    }
 }
 
-fn make_dataset(provider_count: usize, model_count: usize) -> Dataset {
+fn make_dataset(provider_count: usize, model_count: usize, with_env_vars: bool) -> Dataset {
     debug_assert!(provider_count > 0);
 
     let mut providers = Vec::with_capacity(provider_count);
@@ -21,7 +41,11 @@ fn make_dataset(provider_count: usize, model_count: usize) -> Dataset {
             format!("provider-{i}"),
             ProviderInfo {
                 api_url: format!("https://provider-{i}.example/v1"),
-                env_vars: vec![format!("PROVIDER_{i}_API_KEY")],
+                env_vars: if with_env_vars {
+                    vec![format!("PROVIDER_{i}_API_KEY")]
+                } else {
+                    Vec::new()
+                },
                 api_type: if (i & 1) == 0 {
                     ProviderType::OpenAiCompletions
                 } else {
@@ -34,7 +58,7 @@ fn make_dataset(provider_count: usize, model_count: usize) -> Dataset {
     let mut provider_models = Vec::with_capacity(model_count);
     let unique_cfg_count = (model_count / 5).max(1);
     for i in 0..model_count {
-        let provider_idx = i % provider_count;
+        let provider_idx = ProviderIdx::new((i % provider_count) as u16);
         let cfg = i % unique_cfg_count;
         let temperature = if (cfg & 1) == 0 {
             Some(1.0 + ((cfg % 5000) as f32 * 0.001))
@@ -47,17 +71,17 @@ fn make_dataset(provider_count: usize, model_count: usize) -> Dataset {
             None
         };
 
-        provider_models.push(ProviderModelSource::new(
-            format!("provider-{provider_idx}"),
-            format!("org-{}/model-{i}", i % 17),
-            ModelInfo {
+        provider_models.push(ProviderModelSpec {
+            provider_idx,
+            model_key: format!("org-{}/model-{i}", i % 17),
+            model: ModelInfo {
                 modalities: Modality::TEXT,
                 max_input: 4096 + ((cfg as u32) * 32),
                 max_output: 512 + ((cfg as u32) * 8),
                 temperature,
                 top_p,
             },
-        ));
+        });
     }
 
     Dataset {
@@ -66,9 +90,8 @@ fn make_dataset(provider_count: usize, model_count: usize) -> Dataset {
     }
 }
 
-fn construct_batch(dataset: &Dataset) {
-    let catalog =
-        ModelCatalog::build(&dataset.providers, &dataset.provider_models).expect("batch build");
+fn construct_batch(providers: &[ProviderSource], provider_models: &[ProviderModelSource<'_>]) {
+    let catalog = ModelCatalog::build(providers, provider_models).expect("batch build");
 
     black_box((
         catalog.provider_count(),
@@ -80,17 +103,23 @@ fn construct_batch(dataset: &Dataset) {
 fn benchmark_builder_construction(c: &mut Criterion) {
     let mut group = c.benchmark_group("model_catalog_builder_construct");
 
-    for (name, provider_count, model_count) in [
-        ("models_dev_snapshot", 96usize, 3031usize),
-        ("max", 16384usize, 65535usize),
+    for (name, provider_count, model_count, with_env_vars) in [
+        ("models_dev_snapshot", 96usize, 3031usize, true),
+        ("max", 16384usize, 65535usize, false),
     ] {
-        let dataset = make_dataset(provider_count, model_count);
+        let dataset = make_dataset(provider_count, model_count, with_env_vars);
+        let provider_model_sources = dataset.provider_model_sources();
         group.throughput(Throughput::Elements(
             (provider_count + dataset.provider_models.len()) as u64,
         ));
 
         group.bench_with_input(BenchmarkId::new("batch", name), &dataset, |b, input| {
-            b.iter(|| construct_batch(black_box(input)))
+            b.iter(|| {
+                construct_batch(
+                    black_box(&input.providers),
+                    black_box(&provider_model_sources),
+                )
+            })
         });
     }
 
diff --git a/src/llm-coding-tools-core/src/internal/hash63.rs b/src/llm-coding-tools-core/src/internal/hash63.rs
index 56df018f..a81934bc 100644
--- a/src/llm-coding-tools-core/src/internal/hash63.rs
+++ b/src/llm-coding-tools-core/src/internal/hash63.rs
@@ -18,14 +18,12 @@ impl Hash63 {
     ///
     /// The caller is responsible for ensuring bit 63 is 0.
     #[inline]
-    #[allow(dead_code)] // internal public API
     pub(crate) const fn from_u64(value: u64) -> Self {
         Self(value)
     }
 
     /// Returns the underlying u64 value.
     #[inline]
-    #[allow(dead_code)] // internal public API
     pub(crate) const fn as_u64(&self) -> u64 {
         self.0
     }
diff --git a/src/llm-coding-tools-core/src/internal/hash64.rs b/src/llm-coding-tools-core/src/internal/hash64.rs
index e880c044..6c96ea78 100644
--- a/src/llm-coding-tools-core/src/internal/hash64.rs
+++ b/src/llm-coding-tools-core/src/internal/hash64.rs
@@ -11,14 +11,12 @@ pub(crate) struct Hash64(u64);
 impl Hash64 {
     /// Creates a new Hash64 from a raw u64 value.
     #[inline]
-    #[allow(dead_code)] // internal public API
     pub(crate) fn from_u64(value: u64) -> Self {
         Self(value)
     }
 
     /// Returns the underlying u64 value.
     #[inline]
-    #[allow(dead_code)] // internal public API
     pub(crate) fn as_u64(&self) -> u64 {
         self.0
     }
@@ -26,14 +24,12 @@ impl Hash64 {
 
 /// Hashes a string to Hash64 using ahash64.
 #[inline(always)]
-#[allow(dead_code)] // internal public API
 pub(crate) fn hash_u64(s: &str) -> Hash64 {
     hash_u64_bytes(s.as_bytes())
 }
 
 /// Hashes raw bytes to Hash64 using ahash64.
 #[inline(always)]
-#[allow(dead_code)] // internal public API
 pub(crate) fn hash_u64_bytes(bytes: &[u8]) -> Hash64 {
     Hash64(ahash::RandomState::with_seed(0xDEAD_CAFE).hash_one(bytes))
 }
diff --git a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs
index eb6b1fe5..db1f59cf 100644
--- a/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/internal/builder.rs
@@ -68,7 +68,7 @@ fn build_state_with_capacity(
 #[inline]
 pub(crate) fn build_from_source(
     providers: &[ProviderSource],
-    provider_models: &[ProviderModelSource],
+    provider_models: &[ProviderModelSource<'_>],
 ) -> Result<ModelCatalog, ModelCatalogBuildError> {
     let provider_stats = analyze_provider_sources(providers)?;
     let mut state = build_state_with_capacity(provider_stats.provider_count, provider_models.len());
@@ -90,65 +90,53 @@ pub(crate) fn build_from_source(
 fn populate_tables_once(
     state: &mut BuildState,
     providers: &[ProviderSource],
-    provider_models: &[ProviderModelSource],
+    provider_models: &[ProviderModelSource<'_>],
 ) -> Result<(), ModelCatalogBuildError> {
     let mut env_start: u16 = 0;
-    let mut provider_idx_by_key: AHashMap<&str, ProviderIdx> =
-        AHashMap::with_capacity(providers.len());
-    let mut seen_provider_models: AHashSet<(&str, &str)> =
+    let mut seen_provider_keys: AHashSet<&str> = AHashSet::with_capacity(providers.len());
+    let mut seen_provider_models: AHashSet<(ProviderIdx, &str)> =
         AHashSet::with_capacity(provider_models.len());
 
     for provider in providers {
         let provider_info = &provider.provider;
         let env_count = provider_info.env_vars.len() as u8;
 
-        match provider_idx_by_key.entry(provider.provider_key.as_str()) {
-            MapEntry::Occupied(_) => {
-                return Err(ModelCatalogBuildError::DuplicateKey {
-                    table: LookupTableKind::Provider,
-                    key: provider.provider_key.clone(),
-                });
-            }
-            MapEntry::Vacant(e) => {
-                let provider_idx = insert_provider(
-                    state,
-                    &provider.provider_key,
-                    env_start,
-                    env_count,
-                    provider_info.api_type,
-                )?;
-                e.insert(provider_idx);
-            }
+        if !seen_provider_keys.insert(provider.provider_key.as_str()) {
+            return Err(ModelCatalogBuildError::DuplicateKey {
+                table: LookupTableKind::Provider,
+                key: provider.provider_key.clone(),
+            });
         }
 
-        // SAFETY: analyze_provider_sources bounds env_start and env_count (<= 3).
+        insert_provider(
+            state,
+            &provider.provider_key,
+            env_start,
+            env_count,
+            provider_info.api_type,
+        )?;
+
+        // SAFETY: analyze_provider_sources bounds env_start and env_count (<= 7).
         env_start += u16::from(env_count);
     }
 
     for provider_model in provider_models {
-        // Validate provider exists before inserting model.
-        if !provider_idx_by_key.contains_key(provider_model.provider_key.as_str()) {
-            return Err(ModelCatalogBuildError::ProviderKeyNotFoundForModel {
-                provider_key: provider_model.provider_key.clone(),
-                model_key: provider_model.model_key.clone(),
-            });
-        }
-
-        // Check for duplicate (provider_key, model_key) pair.
-        let key = (
-            provider_model.provider_key.as_str(),
-            provider_model.model_key.as_str(),
-        );
+        let provider = providers
+            .get(provider_model.provider_idx.as_usize())
+            .ok_or(ModelCatalogBuildError::ProviderIdxOutOfRangeForModel {
+                provider_idx: provider_model.provider_idx,
+                model_key: provider_model.model_key.to_owned(),
+            })?;
+
+        // Check for duplicate (provider_idx, model_key) pair.
+        let key = (provider_model.provider_idx, provider_model.model_key);
         if !seen_provider_models.insert(key) {
             return Err(ModelCatalogBuildError::DuplicateKey {
                 table: LookupTableKind::ProviderModel,
-                key: format!(
-                    "{}/{}",
-                    provider_model.provider_key, provider_model.model_key
-                ),
+                key: format!("{}/{}", provider.provider_key, provider_model.model_key),
             });
         }
-        insert_provider_model(state, provider_model)?;
+        insert_provider_model(state, provider.provider_key.as_str(), provider_model)?;
     }
 
     Ok(())
@@ -197,7 +185,8 @@ fn insert_provider(
 #[inline]
 fn insert_provider_model(
     state: &mut BuildState,
-    provider_model: &ProviderModelSource,
+    provider_key: &str,
+    provider_model: &ProviderModelSource<'_>,
 ) -> Result<(), ModelCatalogBuildError> {
     let info = provider_model.model;
 
@@ -238,11 +227,7 @@ fn insert_provider_model(
         }
     };
 
-    let key = hash_provider_model_key(
-        &state.hash_state,
-        &provider_model.provider_key,
-        &provider_model.model_key,
-    );
+    let key = hash_provider_model_key(&state.hash_state, provider_key, provider_model.model_key);
     let hash48 = PackedProviderModelTableEntry::truncate_hash48(key.as_u64());
 
     // Insert provider-model entry.
@@ -346,7 +331,7 @@ fn analyze_provider_sources(
 
     for provider in providers {
         // SAFETY: total_env_keys is the start index for this provider.
-        // It must fit the 14-bit PackedEnvRange start field.
+        // It must fit the 13-bit PackedEnvRange start field.
         if total_env_keys > max_env_start {
             return Err(ModelCatalogBuildError::TooManyEnvVarKeys {
                 count: total_env_keys,
@@ -356,7 +341,7 @@ fn analyze_provider_sources(
 
         let provider_info = &provider.provider;
         let env_count = provider_info.env_vars.len();
-        // SAFETY: per-provider count must fit the 2-bit count field.
+        // SAFETY: per-provider count must fit the 3-bit count field.
         if env_count > max_env_count {
             return Err(
                 ModelCatalogBuildError::TooManyProviderEnvVarsForOneProvider {
@@ -427,7 +412,7 @@ fn build_provider_env_key_table(
 mod tests {
     use super::build_from_source;
     use crate::models::catalog::{
-        LookupTableKind, Modality, ModelCatalogBuildError, ModelInfo, ProviderInfo,
+        LookupTableKind, Modality, ModelCatalogBuildError, ModelInfo, ProviderIdx, ProviderInfo,
         ProviderModelSource, ProviderSource,
     };
     use crate::models::ProviderType;
@@ -454,15 +439,15 @@ mod tests {
         ProviderSource::new(provider_key, provider)
     }
 
-    fn provider_model_source(
-        provider_key: &str,
-        model_key: &str,
+    fn provider_model_source<'a>(
+        provider_idx: ProviderIdx,
+        model_key: &'a str,
         model: ModelInfo,
-    ) -> ProviderModelSource {
-        ProviderModelSource::new(provider_key, model_key, model)
+    ) -> ProviderModelSource<'a> {
+        ProviderModelSource::new(provider_idx, model_key, model)
     }
 
-    fn test_sources() -> (Vec<ProviderSource>, Vec<ProviderModelSource>) {
+    fn test_sources() -> (Vec<ProviderSource>, Vec<ProviderModelSource<'static>>) {
         (
             vec![provider_source(
                 "alpha",
@@ -472,7 +457,11 @@ mod tests {
                     ProviderType::OpenAiCompletions,
                 ),
             )],
-            vec![provider_model_source("alpha", "m1", info(4096, 512))],
+            vec![provider_model_source(
+                ProviderIdx::new(0),
+                "m1",
+                info(4096, 512),
+            )],
         )
     }
 
@@ -499,7 +488,11 @@ mod tests {
                 provider("https://beta.example", &["BETA_KEY"], ProviderType::Azure),
             ),
         ];
-        let provider_models = vec![provider_model_source("alpha", "m1", info(4096, 512))];
+        let provider_models = vec![provider_model_source(
+            ProviderIdx::new(0),
+            "m1",
+            info(4096, 512),
+        )];
 
         match build_from_source(&providers, &provider_models) {
             Err(err) => {
@@ -522,8 +515,8 @@ mod tests {
             provider("https://alpha.example", &["ALPHA_KEY"], ProviderType::Azure),
         )];
         let provider_models = vec![
-            provider_model_source("alpha", "m1", info(4096, 512)),
-            provider_model_source("alpha", "m1", info(4096, 512)),
+            provider_model_source(ProviderIdx::new(0), "m1", info(4096, 512)),
+            provider_model_source(ProviderIdx::new(0), "m1", info(4096, 512)),
         ];
 
         match build_from_source(&providers, &provider_models) {
@@ -554,7 +547,7 @@ mod tests {
         ];
         let provider_models = vec![
             provider_model_source(
-                "alpha",
+                ProviderIdx::new(0),
                 "m1",
                 ModelInfo {
                     modalities: Modality::TEXT,
@@ -565,7 +558,7 @@ mod tests {
                 },
             ),
             provider_model_source(
-                "beta",
+                ProviderIdx::new(1),
                 "m1",
                 ModelInfo {
                     modalities: Modality::TEXT,
@@ -592,14 +585,18 @@ mod tests {
             "alpha",
             provider("https://alpha.example", &["ALPHA_KEY"], ProviderType::Azure),
         )];
-        let provider_models = vec![provider_model_source("beta", "m1", info(4096, 512))];
+        let provider_models = vec![provider_model_source(
+            ProviderIdx::new(1),
+            "m1",
+            info(4096, 512),
+        )];
 
         match build_from_source(&providers, &provider_models) {
             Err(err) => {
                 assert_eq!(
                     err,
-                    ModelCatalogBuildError::ProviderKeyNotFoundForModel {
-                        provider_key: "beta".to_string(),
+                    ModelCatalogBuildError::ProviderIdxOutOfRangeForModel {
+                        provider_idx: ProviderIdx::new(1),
                         model_key: "m1".to_string(),
                     }
                 );
@@ -614,19 +611,23 @@ mod tests {
             "alpha",
             provider(
                 "https://alpha.example",
-                &["A", "B", "C", "D"],
+                &["A", "B", "C", "D", "E", "F", "G", "H"],
                 ProviderType::Azure,
             ),
         )];
-        let provider_models = vec![provider_model_source("alpha", "m1", info(4096, 512))];
+        let provider_models = vec![provider_model_source(
+            ProviderIdx::new(0),
+            "m1",
+            info(4096, 512),
+        )];
 
         match build_from_source(&providers, &provider_models) {
             Err(err) => {
                 assert_eq!(
                     err,
                     ModelCatalogBuildError::TooManyProviderEnvVarsForOneProvider {
-                        count: 4,
-                        max: 3,
+                        count: 8,
+                        max: 7,
                     }
                 );
             }
@@ -639,7 +640,7 @@ mod tests {
         let (providers, _) = test_sources();
         let max_output = super::MAX_OUTPUT_TOKENS;
         let provider_models = vec![provider_model_source(
-            "alpha",
+            ProviderIdx::new(0),
             "m1",
             info(4096, max_output.saturating_add(1)),
         )];
@@ -663,7 +664,7 @@ mod tests {
         let (providers, _) = test_sources();
         let max_input = super::MAX_INPUT_TOKENS;
         let provider_models = vec![provider_model_source(
-            "alpha",
+            ProviderIdx::new(0),
             "m1",
             info(max_input.saturating_add(1), 512),
         )];
@@ -684,29 +685,29 @@ mod tests {
 
     #[test]
     fn too_many_total_env_vars_returns_error() {
-        // 5462 providers * 3 env vars = 16386, so the 5463rd provider would have
-        // a start index of 16386, which exceeds MAX_ENV_START (16383).
-        let mut providers = Vec::with_capacity(5463);
-        for i in 0..5463usize {
+        // 8192 providers * 1 env var = 8192, so the 8193rd provider would have
+        // a start index of 8192, which exceeds MAX_ENV_START (8191).
+        let mut providers = Vec::with_capacity(8193);
+        for i in 0..8193usize {
             providers.push(provider_source(
                 &format!("provider_{}", i),
-                provider(
-                    "https://example.com",
-                    &["VAR1", "VAR2", "VAR3"],
-                    ProviderType::Azure,
-                ),
+                provider("https://example.com", &["VAR1"], ProviderType::Azure),
             ));
         }
         let mut provider_models = Vec::with_capacity(1);
-        provider_models.push(provider_model_source("provider_0", "m1", info(4096, 512)));
+        provider_models.push(provider_model_source(
+            ProviderIdx::new(0),
+            "m1",
+            info(4096, 512),
+        ));
 
         match build_from_source(&providers, &provider_models) {
             Err(err) => {
                 assert_eq!(
                     err,
                     ModelCatalogBuildError::TooManyEnvVarKeys {
-                        count: 16_386,
-                        max: 16_383,
+                        count: 8_192,
+                        max: 8_191,
                     }
                 );
             }
@@ -715,34 +716,43 @@ mod tests {
     }
 
     #[test]
-    fn max_14bit_start_with_tail_entries_succeeds() {
-        // The last provider's start index can be 16383 and still be valid when it
-        // contributes 3 keys at indices 16383, 16384, and 16385.
-        let mut providers = Vec::with_capacity(5462);
-        for i in 0..5462usize {
+    fn max_13bit_start_with_tail_entries_succeeds() {
+        // The last provider's start index can be 8191 and still be valid when it
+        // contributes keys at indices 8191 through 8197.
+        let mut providers = Vec::with_capacity(1172);
+        for i in 0..1170usize {
             providers.push(provider_source(
                 &format!("provider_{}", i),
                 provider(
                     "https://example.com",
-                    &["VAR1", "VAR2", "VAR3"],
+                    &["VAR1", "VAR2", "VAR3", "VAR4", "VAR5", "VAR6", "VAR7"],
                     ProviderType::Azure,
                 ),
             ));
         }
-        let last_provider_key = format!("provider_{}", 5461usize);
-        let mut provider_models = Vec::with_capacity(1);
-        provider_models.push(provider_model_source(
-            &last_provider_key,
-            "m1",
-            info(4096, 512),
+        providers.push(provider_source(
+            "provider_1170",
+            provider("https://example.com", &["VAR1"], ProviderType::Azure),
+        ));
+        providers.push(provider_source(
+            "provider_1171",
+            provider(
+                "https://example.com",
+                &["VAR1", "VAR2", "VAR3", "VAR4", "VAR5", "VAR6", "VAR7"],
+                ProviderType::Azure,
+            ),
         ));
+        let provider_models = Vec::new();
 
         let catalog =
             build_from_source(&providers, &provider_models).expect("boundary case should pass");
-        let (provider, _) = catalog
-            .lookup(&last_provider_key, "m1")
+        let provider = catalog
+            .provider_from_index(ProviderIdx::new(1171))
             .expect("last provider should be addressable");
 
-        assert_eq!(provider.env_vars(), &["VAR1", "VAR2", "VAR3"]);
+        assert_eq!(
+            provider.env_vars(),
+            &["VAR1", "VAR2", "VAR3", "VAR4", "VAR5", "VAR6", "VAR7"]
+        );
     }
 }
diff --git a/src/llm-coding-tools-core/src/models/catalog/internal/packed_env_range.rs b/src/llm-coding-tools-core/src/models/catalog/internal/packed_env_range.rs
index bf7b982b..6122010f 100644
--- a/src/llm-coding-tools-core/src/models/catalog/internal/packed_env_range.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/internal/packed_env_range.rs
@@ -1,15 +1,15 @@
 //! Packed env-var range entry for provider-to-env-key mapping.
 //!
 //! Layout (`u16`):
-//! - `14` bits: start index into provider_env_keys StringTable
-//! - `2` bits: count of env keys for this provider (0..=3)
+//! - `13` bits: start index into provider_env_keys StringTable
+//! - `3` bits: count of env keys for this provider (0..=7)
 
 use bitfields::bitfield;
 
 /// Maximum env-var count per provider representable by PackedEnvRange.
-pub const MAX_ENV_RANGE_COUNT: u8 = 3;
-/// Maximum start index representable by PackedEnvRange (14 bits).
-pub const MAX_ENV_START: u16 = (1u16 << 14) - 1; // 16383
+pub const MAX_ENV_RANGE_COUNT: u8 = 7;
+/// Maximum start index representable by PackedEnvRange (13 bits).
+pub const MAX_ENV_START: u16 = (1u16 << 13) - 1; // 8191
 
 /// Packed env-var range entry.
 ///
@@ -17,9 +17,9 @@ pub const MAX_ENV_START: u16 = (1u16 << 14) - 1; // 16383
 #[bitfield(u16)]
 #[derive(Clone, Copy, PartialEq, Eq, Hash)]
 pub struct PackedEnvRange {
-    #[bits(14)]
+    #[bits(13)]
     start: u16,
-    #[bits(2)]
+    #[bits(3)]
     count: u8,
 }
 
@@ -27,7 +27,7 @@ impl PackedEnvRange {
     /// Creates one packed env-var range entry.
     ///
     /// SAFETY: The `start` parameter is not validated here. The caller must ensure
-    /// `start` fits within 14 bits (max 16383). This invariant is enforced in
+    /// `start` fits within 13 bits (max 8191). This invariant is enforced in
     /// `analyze_provider_sources` before `populate_tables_once` calls this function.
     #[inline]
     pub fn from_parts(start: u16, count: u8) -> Self {
@@ -56,7 +56,7 @@ mod tests {
 
     #[test]
     fn count_capped_at_max() {
-        let packed = PackedEnvRange::from_parts(0, 5);
-        assert_eq!(packed.count(), 3); // capped to MAX_ENV_RANGE_COUNT
+        let packed = PackedEnvRange::from_parts(0, 8);
+        assert_eq!(packed.count(), 7); // capped to MAX_ENV_RANGE_COUNT
     }
 }
diff --git a/src/llm-coding-tools-core/src/models/catalog/mod.rs b/src/llm-coding-tools-core/src/models/catalog/mod.rs
index 9d99f090..79ae717f 100644
--- a/src/llm-coding-tools-core/src/models/catalog/mod.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/mod.rs
@@ -27,7 +27,7 @@
 //!
 //! - [`ModelCatalog::build`] - Batch builder entry point
 //! - [`ProviderSource`] - Provider key + metadata input
-//! - [`ProviderModelSource`] - Model key + metadata input for a provider
+//! - [`ProviderModelSource`] - Model key + metadata input keyed by [`ProviderIdx`] and model key
 //! - [`ModelInfo`] - Model metadata input (modalities, token limits, sampling)
 //! - [`ProviderInfo`] - Provider metadata input (API URL, env vars, type)
 //! - [`Modality`] - Content modality flags (text, image, audio, video)
@@ -151,8 +151,8 @@
 //! | ------------------------- | ----------: | ------------------------------------------------ |
 //! | Max providers             |      65,536 | Addressable by 16-bit provider index             |
 //! | Max model configs         |      65,536 | Addressable by 16-bit model configuration index  |
-//! | Max provider env vars     |      16,384 | Global env-var pool offset (14-bit)              |
-//! | Max env vars per provider |           3 | Count field in provider range entry (2-bit)      |
+//! | Max provider env vars     |       8,192 | Global env-var pool offset (13-bit)              |
+//! | Max env vars per provider |           7 | Count field in provider range entry (3-bit)      |
 //! | Max input tokens          | 536,870,911 | 29-bit packed field (≈536M)                      |
 //! | Max output tokens         | 134,217,727 | 27-bit packed field (≈134M)                      |
 //! | Hash bits retained        |          48 | Truncated from 64-bit hash output                |
@@ -231,6 +231,7 @@ use internal::{
     PackedEnvRange, PackedModelEntry, PackedProviderModelTableEntry, PackedProviderTableEntry,
 };
 use lite_strtab::{StringId, StringTable};
+use public::{ProviderEnvVars, INLINE_PROVIDER_ENV_VARS};
 
 pub use public::builder_types::{ModelCatalogBuildError, ProviderModelSource, ProviderSource};
 pub use public::*;
@@ -286,19 +287,20 @@ impl ModelCatalog {
     /// # Parameters
     ///
     /// * `providers` - [`ProviderSource`] values keyed by provider identifier.
-    /// * `provider_models` - [`ProviderModelSource`] values keyed by provider and model.
+    /// * `provider_models` - [`ProviderModelSource`] values keyed by [`ProviderIdx`] and model key.
+    ///   The `provider_idx` must point at an element in the `providers` slice.
     ///
     /// # Errors
     ///
     /// Returns [`ModelCatalogBuildError`] when:
     /// - input exceeds supported numeric limits,
     /// - token limits cannot be represented in packed model entries,
-    /// - provider model sources reference unknown providers,
+    /// - provider model sources reference out-of-range provider indices,
     /// - or all seed-retry attempts still result in collisions.
     #[inline]
     pub fn build(
         providers: &[ProviderSource],
-        provider_models: &[ProviderModelSource],
+        provider_models: &[ProviderModelSource<'_>],
     ) -> Result<Self, ModelCatalogBuildError> {
         build_from_source(providers, provider_models)
     }
@@ -471,21 +473,16 @@ impl ModelCatalog {
         let start = range.start();
         let count = range.count() as usize;
 
-        let mut env_vars = ["", "", ""];
-        #[allow(clippy::needless_range_loop)]
+        let mut env_vars: ProviderEnvVars<'_> =
+            ProviderEnvVars::with_capacity(count.max(INLINE_PROVIDER_ENV_VARS));
         for x in 0..count {
-            env_vars[x] = self
-                .provider_env_keys
-                .get(StringId::new(ProviderIdx::new(start + x as u16)))?;
+            env_vars.push(
+                self.provider_env_keys
+                    .get(StringId::new(ProviderIdx::new(start + x as u16)))?,
+            );
         }
 
-        Some(Provider::new(
-            provider_idx,
-            api_url,
-            env_vars,
-            count as u8,
-            api_type,
-        ))
+        Some(Provider::new(provider_idx, api_url, env_vars, api_type))
     }
 
     /// Looks up a model by its configuration index.
@@ -518,7 +515,7 @@ impl ModelCatalog {
 mod tests {
     use super::*;
     use crate::models::catalog::{
-        Modality, ModelInfo, ProviderInfo, ProviderModelSource, ProviderSource,
+        Modality, ModelInfo, ProviderIdx, ProviderInfo, ProviderModelSource, ProviderSource,
     };
 
     fn provider(api_url: &str, env_vars: &[&str], api_type: ProviderType) -> ProviderInfo {
@@ -562,10 +559,16 @@ mod tests {
             .into_iter()
             .map(|(key, info)| ProviderSource::new(key, info))
             .collect();
-        let provider_model_sources: Vec<ProviderModelSource> = provider_models
+        let provider_model_sources: Vec<ProviderModelSource<'_>> = provider_models
             .into_iter()
             .map(|(provider_key, model_key, info)| {
-                ProviderModelSource::new(provider_key, model_key, info)
+                let provider_idx = ProviderIdx::new(
+                    provider_sources
+                        .iter()
+                        .position(|provider| provider.provider_key == provider_key)
+                        .expect("provider key should exist") as u16,
+                );
+                ProviderModelSource::new(provider_idx, model_key, info)
             })
             .collect();
         ModelCatalog::build(&provider_sources, &provider_model_sources)
diff --git a/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs b/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs
index 7167c3cc..b1749660 100644
--- a/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/public/builder_types.rs
@@ -3,6 +3,7 @@
 //! [`ModelCatalog`]: crate::models::catalog::ModelCatalog
 
 use super::Modality;
+use super::ProviderIdx;
 use crate::models::ProviderType;
 use thiserror::Error;
 
@@ -80,47 +81,57 @@ impl From<(String, ProviderInfo)> for ProviderSource {
 ///
 /// This wrapper keeps builder input self-documenting and avoids tuple-position
 /// ambiguity at call sites.
+///
+/// The `model_key` is borrowed because the catalog builder hashes it during
+/// construction and does not retain it afterward. Callers must therefore keep
+/// the referenced string alive until [`crate::models::catalog::ModelCatalog::build`]
+/// returns.
+///
+/// The `provider_idx` must correspond to an entry in the `providers` slice passed
+/// to [`ModelCatalog::build`].
+///
+/// [`ModelCatalog::build`]: crate::models::catalog::ModelCatalog::build
 #[derive(Debug, Clone, PartialEq)]
-pub struct ProviderModelSource {
-    /// Provider identifier used by lookups (for example, `"openai"`).
-    pub provider_key: String,
-    /// Model identifier used by lookups (for example, `"gpt-4"`).
-    pub model_key: String,
+pub struct ProviderModelSource<'a> {
+    /// Index into the `providers` slice passed to [`ModelCatalog::build`].
+    ///
+    /// [`ModelCatalog::build`]: crate::models::catalog::ModelCatalog::build
+    pub provider_idx: ProviderIdx,
+    /// Borrowed model identifier used by lookups (for example, `"gpt-4"`).
+    pub model_key: &'a str,
     /// Model metadata associated with [`Self::model_key`].
     pub model: ModelInfo,
 }
 
-impl ProviderModelSource {
+impl<'a> ProviderModelSource<'a> {
     /// Creates a provider model source.
     ///
     /// # Parameters
     ///
-    /// * `provider_key` - Provider identifier used during provider lookup.
+    /// * `provider_idx` - Index into the `providers` slice passed to [`ModelCatalog::build`].
     /// * `model_key` - Model identifier used during model lookup for this provider.
     /// * `model` - Model metadata for this provider model.
     ///
     /// # Returns
     ///
     /// A new [`ProviderModelSource`].
+    ///
+    /// [`ModelCatalog::build`]: crate::models::catalog::ModelCatalog::build
     #[inline]
-    pub fn new(
-        provider_key: impl Into<String>,
-        model_key: impl Into<String>,
-        model: ModelInfo,
-    ) -> Self {
+    pub fn new(provider_idx: ProviderIdx, model_key: &'a str, model: ModelInfo) -> Self {
         Self {
-            provider_key: provider_key.into(),
-            model_key: model_key.into(),
+            provider_idx,
+            model_key,
             model,
         }
     }
 }
 
-impl From<(String, String, ModelInfo)> for ProviderModelSource {
+impl<'a> From<(ProviderIdx, &'a str, ModelInfo)> for ProviderModelSource<'a> {
     #[inline]
-    fn from((provider_key, model_key, model): (String, String, ModelInfo)) -> Self {
+    fn from((provider_idx, model_key, model): (ProviderIdx, &'a str, ModelInfo)) -> Self {
         Self {
-            provider_key,
+            provider_idx,
             model_key,
             model,
         }
@@ -164,7 +175,7 @@ pub enum ModelCatalogBuildError {
         /// Maximum supported unique model configuration count.
         max: usize,
     },
-    /// One provider has too many env vars for the packed count field (max 3).
+    /// One provider has too many env vars for the packed count field (max 7).
     #[error("provider env-var count {count} exceeds supported maximum {max}")]
     TooManyProviderEnvVarsForOneProvider {
         /// Number of env vars supplied for one provider.
@@ -172,11 +183,11 @@ pub enum ModelCatalogBuildError {
         /// Maximum supported env vars for one provider.
         max: usize,
     },
-    /// A provider model source references a provider key that does not exist.
-    #[error("provider model source references unknown provider_key={provider_key:?} for model_key={model_key:?}")]
-    ProviderKeyNotFoundForModel {
-        /// Provider key from the provider model source.
-        provider_key: String,
+    /// A provider model source references a provider index that does not exist.
+    #[error("provider model source references out-of-range provider_idx={} for model_key={model_key:?}", provider_idx.as_usize())]
+    ProviderIdxOutOfRangeForModel {
+        /// Provider index from the provider model source.
+        provider_idx: ProviderIdx,
         /// Model key from the provider model source.
         model_key: String,
     },
diff --git a/src/llm-coding-tools-core/src/models/catalog/public/entry.rs b/src/llm-coding-tools-core/src/models/catalog/public/entry.rs
index fc4f4060..e747381d 100644
--- a/src/llm-coding-tools-core/src/models/catalog/public/entry.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/public/entry.rs
@@ -14,6 +14,11 @@
 use super::{Modality, ModelIdx, ProviderIdx};
 use crate::models::catalog::internal::Fixed4;
 use crate::models::ProviderType;
+use tinyvec::TinyVec;
+
+pub(crate) const INLINE_PROVIDER_ENV_VARS: usize = 2;
+
+pub(crate) type ProviderEnvVars<'a> = TinyVec<[&'a str; INLINE_PROVIDER_ENV_VARS]>;
 
 /// Provider lookup result.
 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -23,9 +28,7 @@ pub struct Provider<'a> {
     /// Provider base URL.
     pub api_url: &'a str,
     /// Candidate environment variables used to resolve API keys.
-    env_vars: [&'a str; 3],
-    /// Number of valid entries in `env_vars`.
-    env_vars_count: u8,
+    env_vars: ProviderEnvVars<'a>,
     /// Type of API used by the provider.
     pub api_type: ProviderType,
 }
@@ -36,15 +39,13 @@ impl<'a> Provider<'a> {
     pub(crate) fn new(
         provider_idx: ProviderIdx,
         api_url: &'a str,
-        env_vars: [&'a str; 3],
-        env_vars_count: u8,
+        env_vars: ProviderEnvVars<'a>,
         api_type: ProviderType,
     ) -> Self {
         Self {
             provider_idx,
             api_url,
             env_vars,
-            env_vars_count,
             api_type,
         }
     }
@@ -52,7 +53,7 @@ impl<'a> Provider<'a> {
     /// Returns the candidate environment variables used to resolve API keys.
     #[inline]
     pub fn env_vars(&self) -> &[&'a str] {
-        &self.env_vars[..self.env_vars_count as usize]
+        self.env_vars.as_slice()
     }
 }
 
diff --git a/src/llm-coding-tools-core/src/models/catalog/public/mod.rs b/src/llm-coding-tools-core/src/models/catalog/public/mod.rs
index c6b0c044..adf55b2e 100644
--- a/src/llm-coding-tools-core/src/models/catalog/public/mod.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/public/mod.rs
@@ -7,6 +7,7 @@
 
 pub use builder_types::{LookupTableKind, ModelInfo, ProviderInfo};
 pub use entry::{Model, Provider};
+pub(crate) use entry::{ProviderEnvVars, INLINE_PROVIDER_ENV_VARS};
 pub use modality::Modality;
 pub use model_idx::ModelIdx;
 pub use provider_idx::ProviderIdx;
diff --git a/src/llm-coding-tools-core/src/models/catalog/public/provider_idx.rs b/src/llm-coding-tools-core/src/models/catalog/public/provider_idx.rs
index ba3951e7..d82121a9 100644
--- a/src/llm-coding-tools-core/src/models/catalog/public/provider_idx.rs
+++ b/src/llm-coding-tools-core/src/models/catalog/public/provider_idx.rs
@@ -4,7 +4,7 @@
 ///
 /// Used to reference a specific provider in the catalog's
 /// packed provider entry tables and string tables.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, bitcode::Encode, bitcode::Decode)]
 #[repr(transparent)]
 pub struct ProviderIdx(pub(crate) u16);
 
diff --git a/src/llm-coding-tools-core/src/models/mod.rs b/src/llm-coding-tools-core/src/models/mod.rs
index 17137b62..495de325 100644
--- a/src/llm-coding-tools-core/src/models/mod.rs
+++ b/src/llm-coding-tools-core/src/models/mod.rs
@@ -5,6 +5,6 @@ mod provider_type;
 
 pub use catalog::{
     LookupTableKind, Modality, Model, ModelCatalog, ModelCatalogBuildError, ModelInfo, Provider,
-    ProviderInfo, ProviderModelSource, ProviderSource,
+    ProviderIdx, ProviderInfo, ProviderModelSource, ProviderSource,
 };
 pub use provider_type::ProviderType;
diff --git a/src/llm-coding-tools-core/src/models/provider_type.rs b/src/llm-coding-tools-core/src/models/provider_type.rs
index af7c5d24..1b18e262 100644
--- a/src/llm-coding-tools-core/src/models/provider_type.rs
+++ b/src/llm-coding-tools-core/src/models/provider_type.rs
@@ -1,5 +1,5 @@
 /// Provider behavior profile used by model resolver logic.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, bitcode::Encode, bitcode::Decode)]
 #[repr(u8)]
 pub enum ProviderType {
     /// Unknown or unsupported provider package.
diff --git a/src/llm-coding-tools-models-dev/Cargo.toml b/src/llm-coding-tools-models-dev/Cargo.toml
new file mode 100644
index 00000000..85c65cd7
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/Cargo.toml
@@ -0,0 +1,66 @@
+[package]
+name = "llm-coding-tools-models-dev"
+version = "0.1.0"
+edition = "2021"
+description = "models.dev catalog ingestion with online-first sync pipeline"
+repository = "https://github.com/Sewer56/llm-coding-tools"
+license = "Apache-2.0"
+include = ["src/**/*", "README.md"]
+readme = "README.md"
+
+[features]
+default = ["tokio"]
+# Base async signatures (enabled by runtime features)
+async = []
+# Async with tokio runtime
+tokio = ["async", "dep:tokio", "dep:reqwest", "llm-coding-tools-core/tokio"]
+# Blocking/sync mode - mutually exclusive with tokio/async
+blocking = [
+    "dep:reqwest",
+    "reqwest/blocking",
+    "llm-coding-tools-core/blocking",
+    "maybe-async/is_sync",
+]
+
+[dependencies]
+# Core library for ModelCatalog and related types
+llm-coding-tools-core = { path = "../llm-coding-tools-core", version = "0.2.0", default-features = false }
+
+# Cross-platform cache directory detection
+dirs = "6.0.0"
+
+# HTTP client for conditional GET requests
+reqwest = { version = "0.13", default-features = false, features = [
+    "rustls",
+    "rustls-native-certs",
+], optional = true }
+
+# Fast binary serialization
+bitcode = "0.6.9"
+
+# Compression for cache payload
+zstd = "0.13.3"
+
+# Shared async/sync implementation for load/cache APIs
+maybe-async = "0.2"
+
+# Endian-aware fixed-header serialization helpers
+endian-writer = "2.2.0"
+endian-writer-derive = "0.1.0"
+
+# JSON parsing for models.dev API responses
+serde = { version = "1.0.228", features = ["derive"] }
+serde_json = "1.0.145"
+
+# Ergonomic error definitions
+thiserror = "2.0.18"
+
+# Temp file with atomic rename support
+tempfile = "3.26"
+
+# Async runtime (when tokio feature enabled)
+tokio = { version = "1.49", features = ["fs", "io-util"], optional = true }
+
+[dev-dependencies]
+tokio = { version = "1.49", features = ["rt", "macros"] }
+serial_test = "3"
diff --git a/src/llm-coding-tools-models-dev/README.md b/src/llm-coding-tools-models-dev/README.md
new file mode 100644
index 00000000..45698787
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/README.md
@@ -0,0 +1,151 @@
+# llm-coding-tools-models-dev
+
+Reads the online models.dev catalog into llm-coding-tools-core; with support
+for a cached fallback and caching via ETag(s).
+
+## Why this exists
+
+If you run coding agents against many providers, you want to have fresh data.
+[models.dev][models.dev] is one such source of data.
+
+This crate downloads from models.dev, keeps only the fields we need, and
+builds a `llm_coding_tools_core::models::ModelCatalog`.
+
+## Usage
+
+### Load flow (simple)
+
+1. Read cache header (if present) and get the old ETag.
+2. Send request to models.dev with `If-None-Match` when ETag exists.
+3. If server returns `304 Not Modified`, load catalog from cache.
+4. If server returns `200 OK`, parse JSON, map it into catalog sources,
+   write fresh cache, then build catalog.
+5. If network fails, try cached data as fallback; if no valid cache exists,
+   return an error.
+
+### Non-blocking (`tokio`)
+
+```rust
+use llm_coding_tools_models_dev::{CatalogLoadSource, ModelsDevCatalog};
+
+#[cfg(feature = "tokio")]
+async fn load_catalog() -> Result<(), Box<dyn std::error::Error>> {
+    let result = ModelsDevCatalog::load().await?;
+
+    match result.source {
+        CatalogLoadSource::Downloaded => {
+            println!("Downloaded fresh catalog data.")
+        }
+        CatalogLoadSource::NotModifiedCache => {
+            println!("Cache is already up to date.")
+        }
+        CatalogLoadSource::FallbackCache => {
+            println!("Network unavailable, using cached catalog data.")
+        }
+    }
+
+    if let Some(entry) = result.catalog.lookup("openai", "gpt-4") {
+        println!("provider api url: {}", entry.0.api_url);
+        println!("max input tokens: {}", entry.1.max_input);
+    }
+
+    Ok(())
+}
+```
+
+### Blocking (`blocking`)
+
+```rust
+use llm_coding_tools_models_dev::{CatalogLoadSource, ModelsDevCatalog};
+
+#[cfg(feature = "blocking")]
+fn load_catalog() -> Result<(), Box<dyn std::error::Error>> {
+    let result = ModelsDevCatalog::load()?;
+
+    match result.source {
+        CatalogLoadSource::Downloaded => {
+            println!("Downloaded fresh catalog data.")
+        }
+        CatalogLoadSource::NotModifiedCache => {
+            println!("Cache is already up to date.")
+        }
+        CatalogLoadSource::FallbackCache => {
+            println!("Network unavailable, using cached catalog data.")
+        }
+    }
+
+    if let Some(entry) = result.catalog.lookup("openai", "gpt-4") {
+        println!("provider api url: {}", entry.0.api_url);
+        println!("max input tokens: {}", entry.1.max_input);
+    }
+
+    Ok(())
+}
+```
+
+### Load from a custom cache path
+
+```rust
+use llm_coding_tools_models_dev::ModelsDevCatalog;
+use std::path::PathBuf;
+
+#[cfg(feature = "tokio")]
+async fn load_catalog() -> Result<(), Box<dyn std::error::Error>> {
+    let cache_path = PathBuf::from("/tmp/models-dev.cache");
+    let _result = ModelsDevCatalog::load_at(&cache_path).await?;
+    Ok(())
+}
+
+#[cfg(feature = "blocking")]
+fn load_catalog() -> Result<(), Box<dyn std::error::Error>> {
+    let cache_path = PathBuf::from("/tmp/models-dev.cache");
+    let _result = ModelsDevCatalog::load_at(&cache_path)?;
+    Ok(())
+}
+```
+
+### Resolve the shared cache path
+
+```rust
+use llm_coding_tools_models_dev::shared_cache_path;
+
+fn print_cache_path() -> Result<(), Box<dyn std::error::Error>> {
+    let path = shared_cache_path()?;
+    println!("{}", path.display());
+    Ok(())
+}
+```
+
+## Cache location
+
+By default, cache is stored in the platform cache directory:
+
+- Linux: `~/.cache/llm-coding-tools/models.dev.catalog.v1.cache`
+- macOS: `~/Library/Caches/llm-coding-tools/models.dev.catalog.v1.cache`
+- Windows: `%LOCALAPPDATA%\llm-coding-tools\models.dev.catalog.v1.cache`
+
+Set `LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH` to override this path.
+
+## Cache size and performance
+
+Current ballpark from a recent `models.dev/api.json` snapshot:
+
+- Size: about `1.31 MiB` JSON -> `109 KiB` serialized payload -> `23.7 KiB` compressed cache
+- Compression: about `10.1 ms` with current `zstd` level `17`
+- Decompression: about `0.057 ms` (`57 us`) in `--release`
+- Cache load into `ModelCatalog`: about `0.31 ms` (`read + decompress + decode + build`)
+
+Measured on a single core of a Ryzen `9950X3D`; these are rough guidance numbers and will drift as the upstream catalog changes.
+
+## Feature flags
+
+- `tokio` (default): async runtime support.
+- `blocking`: synchronous runtime support.
+
+Exactly one runtime mode must be enabled.
+
+## License
+
+Apache-2.0
+
+[models.dev]: https://models.dev
diff --git a/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs
new file mode 100644
index 00000000..a0968293
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/api/catalog_sources.rs
@@ -0,0 +1,563 @@
+//! models.dev API -> `ModelCatalog` mapping.
+//!
+//! This module parses models.dev `api.json`, maps provider/model metadata into
+//! transient core builder inputs, and immediately constructs a [`ModelCatalog`](llm_coding_tools_core::models::ModelCatalog).
+//!
+//! Mapping policy:
+//! - missing limits default to `0`;
+//! - model modalities are mapped from `modalities.input[]`/`modalities.output[]`
+//!   into directional [`Modality`] flags;
+//! - unknown npm package identifiers map to [`ProviderType::Unknown`];
+//! - unknown modality labels are ignored; if nothing maps, modalities remain
+//!   [`Modality::empty()`];
+//! - model rows remain provider-scoped; shared configurations are deduplicated by
+//!   core during catalog build.
+
+use super::schema::{parse_api_json, ApiModelEntry, ApiModelLimit, ApiModelModalities};
+use crate::cache::payload::{CachedModelRow, CachedProviderRow, CatalogCachePayload};
+use crate::error::{CatalogError, CatalogResult};
+use llm_coding_tools_core::models::{
+    Modality, ModelCatalogBuildError, ModelInfo, ProviderIdx, ProviderType,
+};
+
+pub(crate) fn cache_payload_from_api_json_bytes(
+    json_bytes: &[u8],
+) -> CatalogResult<CatalogCachePayload> {
+    let provider_entries = parse_api_json(json_bytes)?;
+
+    let provider_count = provider_entries.len();
+    if provider_count > (u16::MAX as usize) + 1 {
+        return Err(CatalogError::ModelCatalogBuild(
+            ModelCatalogBuildError::TooManyProviders {
+                count: provider_count,
+                max: (u16::MAX as usize) + 1,
+            },
+        ));
+    }
+
+    let mut providers = Vec::with_capacity(provider_count);
+    let mut models = Vec::with_capacity(
+        provider_entries
+            .values()
+            .map(|provider| provider.models.len())
+            .sum(),
+    );
+
+    for (provider_key, provider) in provider_entries {
+        let provider_idx = ProviderIdx::new(providers.len() as u16);
+        let api_type = provider_type_from_models_dev_npm(provider.npm.as_deref());
+
+        providers.push(CachedProviderRow {
+            provider_key,
+            api_url: provider.api.unwrap_or_default(),
+            env_vars: provider.env,
+            api_type,
+        });
+
+        for (model_key, model_entry) in provider.models {
+            let model = model_info_from_entry(&model_entry);
+            models.push(CachedModelRow {
+                provider_idx,
+                model_key,
+                modalities_bits: model.modalities.bits(),
+                max_input: model.max_input,
+                max_output: model.max_output,
+                temperature: model.temperature,
+                top_p: model.top_p,
+            });
+        }
+    }
+
+    Ok(CatalogCachePayload { providers, models })
+}
+
+#[inline]
+fn model_info_from_entry(model_entry: &ApiModelEntry) -> ModelInfo {
+    let (max_input, max_output) = match model_entry.limit.as_ref() {
+        Some(limit) => (model_max_input(limit), limit.output),
+        None => (0, 0),
+    };
+    let modalities = model_modalities(model_entry.modalities.as_ref());
+
+    ModelInfo {
+        modalities,
+        max_input,
+        max_output,
+        temperature: None,
+        top_p: None,
+    }
+}
+
+#[inline]
+fn model_modalities(raw: Option<&ApiModelModalities>) -> Modality {
+    let Some(raw) = raw else {
+        return Modality::TEXT;
+    };
+
+    let mut modalities = Modality::empty();
+    for label in &raw.input {
+        modalities |= input_modality_flag(label.as_str());
+    }
+    for label in &raw.output {
+        modalities |= output_modality_flag(label.as_str());
+    }
+
+    modalities
+}
+
+#[inline]
+fn input_modality_flag(label: &str) -> Modality {
+    match label {
+        "text" => Modality::TEXT_INPUT,
+        "image" => Modality::IMAGE_INPUT,
+        "audio" => Modality::AUDIO_INPUT,
+        "video" => Modality::VIDEO_INPUT,
+        _ => Modality::empty(), // pdf not supported
+    }
+}
+
+#[inline]
+fn output_modality_flag(label: &str) -> Modality {
+    match label {
+        "text" => Modality::TEXT_OUTPUT,
+        "image" => Modality::IMAGE_OUTPUT,
+        "audio" => Modality::AUDIO_OUTPUT,
+        "video" => Modality::VIDEO_OUTPUT,
+        _ => Modality::empty(),
+    }
+}
+
+#[inline]
+fn model_max_input(limit: &ApiModelLimit) -> u32 {
+    if limit.input == 0 {
+        limit.context
+    } else {
+        limit.input
+    }
+}
+
+#[inline]
+fn provider_type_from_models_dev_npm(npm_package: Option<&str>) -> ProviderType {
+    match npm_package {
+        Some("@ai-sdk/openai") => ProviderType::OpenAiCompletions,
+        Some("@ai-sdk/openai-responses") => ProviderType::OpenAiResponses,
+        Some("@ai-sdk/anthropic") => ProviderType::Anthropic,
+        Some("@ai-sdk/google") => ProviderType::Google,
+        Some("@ai-sdk/groq") => ProviderType::Groq,
+        Some("@ai-sdk/mistral") => ProviderType::Mistral,
+        Some("@ai-sdk/ollama") => ProviderType::Ollama,
+        Some("@ai-sdk/amazon-bedrock") => ProviderType::Bedrock,
+        Some("@ai-sdk/azure") => ProviderType::Azure,
+        Some("@openrouter/ai-sdk-provider") => ProviderType::OpenRouter,
+        Some("@ai-sdk/huggingface") => ProviderType::HuggingFace,
+        Some("@ai-sdk/cohere") => ProviderType::Cohere,
+        Some("@ai-sdk/chatgpt-oauth") => ProviderType::ChatGptOAuth,
+        Some("@ai-sdk/claude-code-oauth") => ProviderType::ClaudeCodeOAuth,
+        Some("@ai-sdk/antigravity") => ProviderType::Antigravity,
+        Some(_) | None => ProviderType::Unknown,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{cache_payload_from_api_json_bytes, provider_type_from_models_dev_npm};
+    use crate::cache::payload::catalog_from_cache_payload;
+    use llm_coding_tools_core::models::{Modality, ModelCatalog, ProviderIdx, ProviderType};
+
+    fn catalog_from_api_json_bytes(json_bytes: &[u8]) -> crate::error::CatalogResult<ModelCatalog> {
+        let payload = cache_payload_from_api_json_bytes(json_bytes)?;
+        catalog_from_cache_payload(payload)
+    }
+
+    fn catalog(json: &[u8]) -> ModelCatalog {
+        catalog_from_api_json_bytes(json).expect("API payload should map")
+    }
+
+    fn provider_snapshot(
+        catalog: &ModelCatalog,
+        provider_key: &str,
+    ) -> (String, Vec<String>, ProviderType) {
+        let provider = catalog
+            .lookup_provider(provider_key)
+            .expect("provider should exist");
+        (
+            provider.api_url.to_string(),
+            provider
+                .env_vars()
+                .iter()
+                .map(|env_var| (*env_var).to_string())
+                .collect(),
+            provider.api_type,
+        )
+    }
+
+    fn model_snapshot(
+        catalog: &ModelCatalog,
+        provider_key: &str,
+        model_key: &str,
+    ) -> (Modality, u32, u32, Option<f32>, Option<f32>) {
+        let model = catalog
+            .lookup_provider_model(provider_key, model_key)
+            .expect("provider model should exist");
+        (
+            model.modalities,
+            model.max_input,
+            model.max_output,
+            model.temperature(),
+            model.top_p(),
+        )
+    }
+
+    #[test]
+    fn cache_payload_maps_single_provider_with_models() {
+        let api_json = br#"
+        {
+            "openai": {
+                "npm": "@ai-sdk/openai",
+                "api": "https://api.openai.com/v1",
+                "env": ["OPENAI_API_KEY"],
+                "models": {
+                    "gpt-4": {
+                        "modalities": { "input": ["text"], "output": ["text"] },
+                        "limit": { "context": 8192, "output": 4096 }
+                    }
+                }
+            }
+        }
+        "#;
+
+        let payload = cache_payload_from_api_json_bytes(api_json).expect("payload should build");
+        assert_eq!(payload.providers.len(), 1);
+        assert_eq!(payload.models.len(), 1);
+
+        assert_eq!(payload.providers[0].provider_key, "openai");
+        assert_eq!(
+            payload.providers[0].api_type,
+            ProviderType::OpenAiCompletions
+        );
+
+        assert_eq!(payload.models[0].provider_idx, ProviderIdx::new(0));
+        assert_eq!(payload.models[0].model_key, "gpt-4");
+        assert_eq!(payload.models[0].modalities_bits, Modality::TEXT.bits());
+        assert_eq!(payload.models[0].max_input, 8192);
+        assert_eq!(payload.models[0].max_output, 4096);
+    }
+
+    #[test]
+    fn catalog_source_mapping_maps_provider_rows() {
+        let api_json = br#"
+        {
+            "alpha": {
+                "npm": "@ai-sdk/openai-responses",
+                "api": "https://alpha.example/v1",
+                "env": ["ALPHA_KEY"],
+                "models": {}
+            }
+        }
+        "#;
+        let catalog = catalog(api_json);
+
+        assert_eq!(catalog.provider_count(), 1);
+        let provider = catalog
+            .lookup_provider("alpha")
+            .expect("alpha provider should exist");
+        assert_eq!(provider.api_url, "https://alpha.example/v1");
+        assert_eq!(provider.env_vars(), ["ALPHA_KEY"]);
+        assert_eq!(provider.api_type, ProviderType::OpenAiResponses);
+    }
+
+    #[test]
+    fn catalog_source_mapping_defaults_missing_limits_to_zero() {
+        let api_json = br#"
+        {
+            "alpha": {
+                "npm": null,
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": {}
+                }
+            }
+        }
+        "#;
+        let catalog = catalog(api_json);
+
+        assert_eq!(catalog.provider_model_count(), 1);
+        let model = catalog
+            .lookup_provider_model("alpha", "m1")
+            .expect("alpha/m1 should exist");
+        assert_eq!(model.modalities, Modality::TEXT);
+        assert_eq!(model.max_input, 0);
+        assert_eq!(model.max_output, 0);
+    }
+
+    #[test]
+    fn catalog_source_mapping_uses_limit_input_when_present() {
+        let api_json = br#"
+        {
+            "alpha": {
+                "npm": null,
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": {
+                        "limit": {
+                            "context": 128000,
+                            "input": 124000,
+                            "output": 4096
+                        }
+                    }
+                }
+            }
+        }
+        "#;
+        let catalog = catalog(api_json);
+
+        let model = catalog
+            .lookup_provider_model("alpha", "m1")
+            .expect("alpha/m1 should exist");
+        assert_eq!(model.max_input, 124000);
+        assert_eq!(model.max_output, 4096);
+    }
+
+    #[test]
+    fn catalog_source_mapping_maps_directional_modalities() {
+        let api_json = br#"
+        {
+            "alpha": {
+                "npm": null,
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": {
+                        "modalities": {
+                            "input": ["text", "image", "pdf"],
+                            "output": ["text", "audio"]
+                        },
+                        "limit": { "context": 4096, "output": 512 }
+                    }
+                }
+            }
+        }
+        "#;
+
+        let catalog = catalog(api_json);
+        let model = catalog
+            .lookup_provider_model("alpha", "m1")
+            .expect("alpha/m1 should exist");
+        assert_eq!(
+            model.modalities,
+            Modality::TEXT_INPUT
+                | Modality::TEXT_OUTPUT
+                | Modality::IMAGE_INPUT
+                | Modality::AUDIO_OUTPUT
+        );
+    }
+
+    #[test]
+    fn catalog_source_mapping_maps_pdf_input_to_empty() {
+        let api_json = br#"
+        {
+            "alpha": {
+                "npm": null,
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": {
+                        "modalities": {
+                            "input": ["pdf"],
+                            "output": []
+                        }
+                    }
+                }
+            }
+        }
+        "#;
+
+        let catalog = catalog(api_json);
+        let model = catalog
+            .lookup_provider_model("alpha", "m1")
+            .expect("alpha/m1 should exist");
+        assert_eq!(model.modalities, Modality::empty());
+    }
+
+    #[test]
+    fn catalog_source_mapping_falls_back_to_empty_for_unknown_modalities() {
+        let api_json = br#"
+        {
+            "alpha": {
+                "npm": null,
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": {
+                        "modalities": {
+                            "input": ["binary"],
+                            "output": ["embedding"]
+                        }
+                    }
+                }
+            }
+        }
+        "#;
+
+        let catalog = catalog(api_json);
+        let model = catalog
+            .lookup_provider_model("alpha", "m1")
+            .expect("alpha/m1 should exist");
+        assert_eq!(model.modalities, Modality::empty());
+    }
+
+    #[test]
+    fn catalog_source_mapping_keeps_duplicate_model_ids_per_provider() {
+        let api_json = br#"
+        {
+            "alpha": {
+                "npm": "@ai-sdk/openai",
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": {
+                        "modalities": {
+                            "input": ["image"],
+                            "output": ["text"]
+                        },
+                        "limit": { "context": 4096, "output": 512 }
+                    }
+                }
+            },
+            "beta": {
+                "npm": "@ai-sdk/anthropic",
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": {
+                        "modalities": {
+                            "input": ["audio"],
+                            "output": ["video"]
+                        },
+                        "limit": { "context": 8192, "output": 256 }
+                    }
+                }
+            }
+        }
+        "#;
+        let catalog = catalog(api_json);
+
+        assert_eq!(catalog.provider_model_count(), 2);
+
+        let alpha_model = catalog
+            .lookup_provider_model("alpha", "m1")
+            .expect("alpha/m1 should exist");
+        assert_eq!(alpha_model.max_input, 4096);
+        assert_eq!(alpha_model.max_output, 512);
+        assert_eq!(
+            alpha_model.modalities,
+            Modality::IMAGE_INPUT | Modality::TEXT_OUTPUT
+        );
+
+        let beta_model = catalog
+            .lookup_provider_model("beta", "m1")
+            .expect("beta/m1 should exist");
+        assert_eq!(beta_model.max_input, 8192);
+        assert_eq!(beta_model.max_output, 256);
+        assert_eq!(
+            beta_model.modalities,
+            Modality::AUDIO_INPUT | Modality::VIDEO_OUTPUT
+        );
+    }
+
+    #[test]
+    fn catalog_source_mapping_keeps_same_data_for_different_input_key_order() {
+        let api_json_a = br#"
+        {
+            "beta": {
+                "npm": "@ai-sdk/anthropic",
+                "api": null,
+                "env": [],
+                "models": {
+                    "m2": { "limit": { "context": 2048, "output": 512 } }
+                }
+            },
+            "alpha": {
+                "npm": "@ai-sdk/openai",
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": { "limit": { "context": 1024, "output": 256 } }
+                }
+            }
+        }
+        "#;
+
+        let api_json_b = br#"
+        {
+            "alpha": {
+                "npm": "@ai-sdk/openai",
+                "api": null,
+                "env": [],
+                "models": {
+                    "m1": { "limit": { "context": 1024, "output": 256 } }
+                }
+            },
+            "beta": {
+                "npm": "@ai-sdk/anthropic",
+                "api": null,
+                "env": [],
+                "models": {
+                    "m2": { "limit": { "context": 2048, "output": 512 } }
+                }
+            }
+        }
+        "#;
+
+        let catalog_a = catalog(api_json_a);
+        let catalog_b = catalog(api_json_b);
+
+        assert_eq!(catalog_a.provider_count(), catalog_b.provider_count());
+        assert_eq!(
+            catalog_a.provider_model_count(),
+            catalog_b.provider_model_count()
+        );
+        assert_eq!(
+            catalog_a.model_config_count(),
+            catalog_b.model_config_count()
+        );
+        assert_eq!(
+            provider_snapshot(&catalog_a, "alpha"),
+            provider_snapshot(&catalog_b, "alpha")
+        );
+        assert_eq!(
+            provider_snapshot(&catalog_a, "beta"),
+            provider_snapshot(&catalog_b, "beta")
+        );
+        assert_eq!(
+            model_snapshot(&catalog_a, "alpha", "m1"),
+            model_snapshot(&catalog_b, "alpha", "m1")
+        );
+        assert_eq!(
+            model_snapshot(&catalog_a, "beta", "m2"),
+            model_snapshot(&catalog_b, "beta", "m2")
+        );
+    }
+
+    #[test]
+    fn provider_type_mapping_handles_known_and_unknown_packages() {
+        assert_eq!(
+            provider_type_from_models_dev_npm(Some("@ai-sdk/openai")),
+            ProviderType::OpenAiCompletions
+        );
+        assert_eq!(
+            provider_type_from_models_dev_npm(Some("@ai-sdk/google")),
+            ProviderType::Google
+        );
+        assert_eq!(
+            provider_type_from_models_dev_npm(Some("@ai-sdk/openai-compatible")),
+            ProviderType::Unknown
+        );
+        assert_eq!(
+            provider_type_from_models_dev_npm(None),
+            ProviderType::Unknown
+        );
+    }
+}
diff --git a/src/llm-coding-tools-models-dev/src/api/mod.rs b/src/llm-coding-tools-models-dev/src/api/mod.rs
new file mode 100644
index 00000000..730624ee
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/api/mod.rs
@@ -0,0 +1,12 @@
+//! models.dev API parsing and catalog-source mapping.
+//!
+//! - [`schema`] parses upstream `https://models.dev/api.json` into a minimal
+//!   serde representation.
+//! - [`catalog_sources`] maps parsed data into a
+//!   [`llm_coding_tools_core::models::ModelCatalog`].
+//!
+//! Both modules intentionally keep only fields required by core catalog
+//! construction so ingest stays fast and memory-bounded.
+
+pub(crate) mod catalog_sources;
+pub(crate) mod schema;
diff --git a/src/llm-coding-tools-models-dev/src/api/schema.rs b/src/llm-coding-tools-models-dev/src/api/schema.rs
new file mode 100644
index 00000000..3e0f4c12
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/api/schema.rs
@@ -0,0 +1,151 @@
+//! Minimal models.dev API schema used by this crate.
+//!
+//! We deserialize only fields needed for catalog-source mapping:
+//! provider metadata (`npm`, `api`, `env`) and model token limits
+//! (`limit.context`, `limit.input`, `limit.output`) plus directional modalities
+//! (`modalities.input[]`, `modalities.output[]`).
+//!
+//! Representative payload shape from `https://models.dev/api.json`:
+//!
+//! ```json
+//! {
+//!   "openai": {
+//!     "id": "openai",
+//!     "npm": "@ai-sdk/openai",
+//!     "api": null,
+//!     "env": ["OPENAI_API_KEY"],
+//!     "models": {
+//!       "gpt-4o": {
+//!         "id": "gpt-4o",
+//!         "modalities": {
+//!           "input": ["text", "image"],
+//!           "output": ["text"]
+//!         },
+//!         "limit": {
+//!           "context": 128000,
+//!           "output": 16384
+//!         }
+//!       }
+//!     }
+//!   }
+//! }
+//! ```
+//!
+//! Mapping into local structs:
+//! - top-level provider map entry -> [`ApiProviderEntry`]
+//! - `models.<model_id>` object -> [`ApiModelEntry`]
+//! - `models.<model_id>.modalities` object -> [`ApiModelModalities`]
+//! - `models.<model_id>.limit` object -> [`ApiModelLimit`]
+//!
+//! Unknown fields are intentionally ignored so we can drop large unused sections
+//! early and keep parse memory bounded.
+
+use crate::error::CatalogResult;
+use serde::Deserialize;
+use std::collections::HashMap;
+
+#[derive(Debug, Deserialize)]
+pub(crate) struct ApiProviderEntry {
+    #[serde(default)]
+    pub(crate) npm: Option<String>,
+    #[serde(default)]
+    pub(crate) api: Option<String>,
+    #[serde(default)]
+    pub(crate) env: Vec<String>,
+    #[serde(default)]
+    pub(crate) models: HashMap<String, ApiModelEntry>,
+}
+
+#[derive(Debug, Deserialize)]
+pub(crate) struct ApiModelEntry {
+    #[serde(default)]
+    pub(crate) limit: Option<ApiModelLimit>,
+    #[serde(default)]
+    pub(crate) modalities: Option<ApiModelModalities>,
+}
+
+#[derive(Debug, Deserialize)]
+pub(crate) struct ApiModelModalities {
+    #[serde(default)]
+    pub(crate) input: Vec<String>,
+    #[serde(default)]
+    pub(crate) output: Vec<String>,
+}
+
+#[derive(Debug, Deserialize)]
+pub(crate) struct ApiModelLimit {
+    #[serde(default)]
+    pub(crate) context: u32,
+    #[serde(default)]
+    pub(crate) input: u32,
+    #[serde(default)]
+    pub(crate) output: u32,
+}
+
+/// Parses upstream `api.json` bytes into a provider map.
+///
+/// Input must match the current models.dev shape: a flat top-level object where
+/// each key is a provider id and each value is a provider entry.
+#[inline]
+pub(crate) fn parse_api_json(
+    json_bytes: &[u8],
+) -> CatalogResult<HashMap<String, ApiProviderEntry>> {
+    Ok(serde_json::from_slice(json_bytes)?)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::parse_api_json;
+
+    #[test]
+    fn parse_api_json_supports_flat_provider_map() {
+        let api_json = br#"{"alpha":{"id":"alpha","npm":"@ai-sdk/openai","api":null,"env":["ALPHA_KEY"],"models":{"m1":{"modalities":{"input":["text","image"],"output":["text"]},"limit":{"context":4096,"output":512}}}}}"#;
+        let providers = parse_api_json(api_json).expect("API payload should parse");
+        let provider = providers.get("alpha").expect("provider should exist");
+
+        assert_eq!(provider.npm.as_deref(), Some("@ai-sdk/openai"));
+        assert_eq!(provider.env.as_slice(), ["ALPHA_KEY"]);
+
+        let model = provider.models.get("m1").expect("model should exist");
+        let modalities = model.modalities.as_ref().expect("modalities should exist");
+        let limit = model.limit.as_ref().expect("limit should exist");
+        assert_eq!(modalities.input.as_slice(), ["text", "image"]);
+        assert_eq!(modalities.output.as_slice(), ["text"]);
+        assert_eq!(limit.context, 4096);
+        assert_eq!(limit.output, 512);
+    }
+
+    #[test]
+    fn parse_api_json_ignores_unknown_fields() {
+        let api_json = br#"
+        {
+            "alpha": {
+                "id": "alpha",
+                "name": "Alpha",
+                "npm": "@ai-sdk/openai",
+                "api": "https://alpha.example/v1",
+                "env": ["ALPHA_KEY"],
+                "models": {
+                    "m1": {
+                        "description": "ignored",
+                        "limit": {
+                            "context": 128000,
+                            "input": 124000,
+                            "output": 4096
+                        }
+                    }
+                }
+            }
+        }
+        "#;
+
+        let providers = parse_api_json(api_json).expect("API payload should parse");
+        let provider = providers.get("alpha").expect("provider should exist");
+        let model = provider.models.get("m1").expect("model should exist");
+        let limit = model.limit.as_ref().expect("limit should exist");
+
+        assert_eq!(limit.context, 128000);
+        assert_eq!(limit.input, 124000);
+        assert_eq!(limit.output, 4096);
+    }
+}
diff --git a/src/llm-coding-tools-models-dev/src/cache/format.rs b/src/llm-coding-tools-models-dev/src/cache/format.rs
new file mode 100644
index 00000000..8059ac1b
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/cache/format.rs
@@ -0,0 +1,472 @@
+//! Cache container layout and read/write helpers.
+//!
+//! The on-disk layout for `models.dev.catalog.v1.cache` is:
+//!
+//! ```text
+//! [0..12)   12-byte fixed prelude:
+//!           - [0..4)   etag_len: u32 little-endian
+//!           - [4..8)   payload_len_compressed: u32 little-endian
+//!           - [8..12)  payload_len_decompressed: u32 little-endian
+//! [12..N)   raw ETag bytes (etag_len bytes, may be 0)
+//! [N..EOF)  compressed payload (rest of file)
+//! ```
+//!
+//! Versioning is keyed by filename (`*.v1.cache`), so this prelude carries
+//! lengths only and no magic marker.
+//! `payload_len_compressed` is retained so reads can detect unexpected file
+//! truncation before decode.
+//!
+//! Read path intentionally keeps payload compressed. We read the whole file in
+//! one pre-sized allocation, then parse/slice into `prelude`, `etag`, and
+//! `payload` views without additional copying.
+//!
+//! ## Performance
+//!
+//! models.dev changes infrequently, so cache hits are expected to be common.
+//! [`crate::cache::payload`] documents typical compressed payload sizes of about
+//! 23-32 kB, which keeps the whole container small enough that a single
+//! sequential read is generally the faster, simpler hot path on modern
+//! NVMe-backed systems.
+//!
+//! ## Safety
+//!
+//! Not a 'safe' parser. We assume the file was created by the user.
+//! There's no validation for erroneous data; e.g. maliciously crafted headers.
+//! Only validation for accidental corruption/truncation (e.g., from partial writes) is included.
+
+use crate::{
+    error::{CatalogError, CatalogResult},
+    fs,
+};
+use endian_writer::{EndianReader, EndianWriter, HasSize, LittleEndianReader, LittleEndianWriter};
+use endian_writer_derive::EndianWritable;
+use std::mem::size_of;
+use std::path::Path;
+use std::ptr::copy_nonoverlapping;
+
+/// Fixed v1 prelude, encoded little-endian.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, EndianWritable)]
+#[repr(C)]
+struct CachePreludeV1 {
+    /// Length in bytes of the optional ETag block.
+    etag_len: u32,
+    /// Length in bytes of compressed payload as written to disk.
+    payload_len_compressed: u32,
+    /// Length in bytes after decompression.
+    payload_len_decompressed: u32,
+}
+
+/// Input parameters for writing a cache container.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(crate) struct CacheWriteInput<'a> {
+    /// Optional ETag bytes (e.g., HTTP ETag value).
+    pub(crate) etag: Option<&'a [u8]>,
+    /// Compressed payload bytes.
+    pub(crate) payload_compressed: &'a [u8],
+    /// Expected decompressed payload length in bytes.
+    pub(crate) payload_len_decompressed: usize,
+}
+
+/// Fixed prelude size for v1.
+const CACHE_HEADER_LEN: usize = <CachePreludeV1 as HasSize>::SIZE;
+
+// SAFETY: All modern platforms have usize >= 32 bits.
+// This lets us safely cast u32 lengths to usize without checked arithmetic.
+const _: () = assert!(size_of::<usize>() >= size_of::<u32>());
+
+/// Raw cache blocks extracted from disk.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(crate) struct CacheFileData {
+    /// Prefix length of ETag bytes after the fixed prelude.
+    etag_len: u32,
+    /// Length in bytes of compressed payload from prelude.
+    payload_len_compressed: u32,
+    /// Size hint for the eventual decompressed payload allocation.
+    payload_len_decompressed: u32,
+    /// Full file bytes laid out as `prelude || etag || payload_compressed`.
+    file_bytes: Box<[u8]>,
+}
+
+impl CacheFileData {
+    /// Returns the optional ETag as a borrowed byte slice.
+    #[inline]
+    pub(crate) fn etag_bytes(&self) -> Option<&[u8]> {
+        let etag_start = CACHE_HEADER_LEN;
+        let etag_end = CACHE_HEADER_LEN + self.etag_len as usize;
+        let etag = &self.file_bytes[etag_start..etag_end];
+        if etag.is_empty() {
+            None
+        } else {
+            Some(etag)
+        }
+    }
+
+    /// Returns compressed payload bytes as a borrowed slice.
+    #[inline]
+    pub(crate) fn payload_compressed(&self) -> &[u8] {
+        let payload_start = CACHE_HEADER_LEN + self.etag_len as usize;
+        &self.file_bytes[payload_start..]
+    }
+
+    /// Returns compressed payload length in bytes.
+    #[allow(dead_code)] // public API
+    #[inline]
+    pub(crate) fn payload_len_compressed(&self) -> u32 {
+        self.payload_len_compressed
+    }
+
+    /// Returns expected decompressed payload length in bytes.
+    #[inline]
+    pub(crate) fn payload_len_decompressed(&self) -> u32 {
+        self.payload_len_decompressed
+    }
+}
+
+/// Reads a cache container from disk.
+///
+/// This reads the entire cache file into memory in one shot, then parses only
+/// the prelude + raw blocks and does not decompress payload.
+/// Compressed payload length is validated against prelude metadata to catch
+/// unexpected truncation or trailing bytes before decode.
+///
+/// # Performance
+///
+/// This intentionally performs one whole-file read. models.dev changes
+/// infrequently, so cache hits are expected to be common, and
+/// [`crate::cache::payload`] documents typical compressed payload sizes of about
+/// 23-32 kB. That is generally faster in practice than a streaming path while
+/// remaining effectively negligible on modern NVMe-backed systems.
+///
+/// # Errors
+///
+/// Returns [`CatalogError::CacheFormat`] when the prelude is truncated, when
+/// encoded lengths overflow platform limits, or when declared block lengths do not
+/// match file contents.
+#[maybe_async::maybe_async]
+pub(crate) async fn read_cache_file(path: &Path) -> CatalogResult<CacheFileData> {
+    let file_bytes = fs::read(path).await?;
+    if file_bytes.len() < CACHE_HEADER_LEN {
+        return Err(CatalogError::CacheFormat("cache prelude is truncated"));
+    }
+
+    let prelude = decode_prelude(&file_bytes[..CACHE_HEADER_LEN]);
+    let etag_len = prelude.etag_len as usize;
+    let payload_len_compressed = prelude.payload_len_compressed as usize;
+    let expected_total = CACHE_HEADER_LEN
+        .checked_add(etag_len)
+        .and_then(|v| v.checked_add(payload_len_compressed))
+        .ok_or(CatalogError::CacheFormat(
+            "cache file size exceeds platform limits",
+        ))?;
+
+    if file_bytes.len() != expected_total {
+        return Err(CatalogError::CacheFormat(
+            "cache file size mismatch (possible truncation or trailing data)",
+        ));
+    }
+
+    Ok(CacheFileData {
+        etag_len: prelude.etag_len,
+        payload_len_compressed: prelude.payload_len_compressed,
+        payload_len_decompressed: prelude.payload_len_decompressed,
+        file_bytes,
+    })
+}
+
+/// Writes a cache container to disk atomically.
+///
+/// Uses `tempfile::NamedTempFile` to ensure unique temp files for concurrent
+/// writers and cross-platform atomic replacement via `persist()`.
+///
+/// # Errors
+///
+/// Returns [`CatalogError::CacheFormat`] if a block length exceeds v1 `u32`
+/// limits, or [`CatalogError::Io`] on I/O failure.
+#[maybe_async::maybe_async]
+pub(crate) async fn write_cache_file(
+    path: &Path,
+    input: &CacheWriteInput<'_>,
+) -> CatalogResult<()> {
+    let parent = path
+        .parent()
+        .ok_or_else(|| CatalogError::CacheFormat("cache path has no parent directory"))?;
+    fs::create_dir_all(parent).await?;
+
+    let etag_bytes = input.etag.unwrap_or(&[]);
+    let prelude = CachePreludeV1 {
+        etag_len: to_u32_limit(etag_bytes.len(), "etag exceeds v1 length limits")?,
+        payload_len_compressed: to_u32_limit(
+            input.payload_compressed.len(),
+            "compressed payload exceeds v1 length limits",
+        )?,
+        payload_len_decompressed: to_u32_limit(
+            input.payload_len_decompressed,
+            "decompressed payload exceeds v1 length limits",
+        )?,
+    };
+
+    let encoded_prelude = encode_prelude(prelude);
+
+    let encoded_len = CACHE_HEADER_LEN
+        .checked_add(etag_bytes.len())
+        .and_then(|value| value.checked_add(input.payload_compressed.len()))
+        .ok_or(CatalogError::CacheFormat(
+            "cache file exceeds platform length limits",
+        ))?;
+
+    let mut uninit = fs::alloc_uninit_u8_slice(encoded_len);
+    let ptr = uninit.as_mut_ptr().cast::<u8>();
+
+    unsafe {
+        copy_nonoverlapping(encoded_prelude.as_ptr(), ptr, CACHE_HEADER_LEN);
+        copy_nonoverlapping(
+            etag_bytes.as_ptr(),
+            ptr.add(CACHE_HEADER_LEN),
+            etag_bytes.len(),
+        );
+        copy_nonoverlapping(
+            input.payload_compressed.as_ptr(),
+            ptr.add(CACHE_HEADER_LEN + etag_bytes.len()),
+            input.payload_compressed.len(),
+        );
+    }
+
+    let file_bytes = fs::assume_init_u8_slice(uninit);
+
+    #[cfg(feature = "blocking")]
+    {
+        use std::io::Write as _;
+        let mut temp = tempfile::NamedTempFile::new_in(parent)?;
+        temp.write_all(&file_bytes)?;
+        temp.persist(path).map_err(|e| e.error)?;
+    }
+
+    #[cfg(feature = "tokio")]
+    {
+        let file_bytes: Box<[u8]> = file_bytes;
+        let path = path.to_path_buf();
+        let parent = parent.to_path_buf();
+        tokio::task::spawn_blocking(move || {
+            use std::io::Write as _;
+            let mut temp = tempfile::NamedTempFile::new_in(&parent)?;
+            temp.write_all(&file_bytes)?;
+            temp.persist(&path).map_err(|e| e.error)
+        })
+        .await??;
+    }
+
+    Ok(())
+}
+
+#[inline]
+fn to_u32_limit(value: usize, msg: &'static str) -> CatalogResult<u32> {
+    u32::try_from(value).map_err(|_| CatalogError::CacheFormat(msg))
+}
+
+/// Encodes prelude into little-endian bytes.
+#[inline]
+fn encode_prelude(prelude: CachePreludeV1) -> [u8; CACHE_HEADER_LEN] {
+    let mut bytes = [0_u8; CACHE_HEADER_LEN];
+    // SAFETY: `bytes` has exactly the derived serialized size of `CachePreludeV1`.
+    unsafe {
+        let mut writer = LittleEndianWriter::new(bytes.as_mut_ptr());
+        writer.write(&prelude);
+    }
+    bytes
+}
+
+/// Decodes prelude from little-endian bytes.
+#[inline]
+fn decode_prelude(bytes: &[u8]) -> CachePreludeV1 {
+    // SAFETY: Caller guarantees `bytes` is at least `CACHE_HEADER_LEN`.
+    unsafe {
+        let mut reader = LittleEndianReader::new(bytes.as_ptr());
+        reader.read()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::TempDir;
+
+    // Verifies prelude encoding/decoding preserves all fields.
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn prelude_layout_round_trips() {
+        let prelude = CachePreludeV1 {
+            etag_len: 13,
+            payload_len_compressed: 44,
+            payload_len_decompressed: 333,
+        };
+
+        let round_trip = decode_prelude(&encode_prelude(prelude));
+        assert_eq!(round_trip, prelude);
+    }
+
+    // Verifies full round-trip with ETag included.
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn write_then_read_round_trips_with_etag() {
+        let temp = TempDir::new().expect("tempdir");
+        let path = temp.path().join("models.dev.catalog.v1.cache");
+
+        let input = CacheWriteInput {
+            etag: Some(b"etag-123"),
+            payload_compressed: b"payload-zstd-bytes",
+            payload_len_decompressed: 2048,
+        };
+        write_cache_file(&path, &input).await.expect("write cache");
+        let data = read_cache_file(&path).await.expect("read cache");
+
+        assert_eq!(data.etag_bytes(), input.etag);
+        assert_eq!(data.payload_compressed(), input.payload_compressed);
+        assert_eq!(
+            data.payload_len_compressed(),
+            input.payload_compressed.len() as u32
+        );
+        assert_eq!(
+            data.payload_len_decompressed(),
+            input.payload_len_decompressed as u32
+        );
+    }
+
+    // Verifies full round-trip without ETag.
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn write_then_read_round_trips_without_etag() {
+        let temp = TempDir::new().expect("tempdir");
+        let path = temp.path().join("models.dev.catalog.v1.cache");
+
+        let input = CacheWriteInput {
+            etag: None,
+            payload_compressed: b"payload-only",
+            payload_len_decompressed: 1024,
+        };
+        write_cache_file(&path, &input).await.expect("write cache");
+        let data = read_cache_file(&path).await.expect("read cache");
+
+        assert_eq!(data.etag_bytes(), input.etag);
+        assert_eq!(data.payload_compressed(), input.payload_compressed);
+        assert_eq!(
+            data.payload_len_decompressed(),
+            input.payload_len_decompressed as u32
+        );
+    }
+
+    // Rejects files shorter than the fixed header.
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn read_rejects_truncated_prelude() {
+        // File is 1 byte shorter than required header
+        let temp = TempDir::new().expect("tempdir");
+        let path = temp.path().join("short-prelude.cache");
+
+        std::fs::write(&path, [0_u8; CACHE_HEADER_LEN - 1]).expect("write fixture");
+        let error = read_cache_file(&path)
+            .await
+            .expect_err("truncated prelude should fail");
+        assert!(matches!(error, CatalogError::CacheFormat(_)));
+    }
+
+    // Rejects when file ends before etag_len bytes after header.
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn read_rejects_short_etag_length() {
+        // Header claims 12 bytes of etag but only 4 provided
+        let temp = TempDir::new().expect("tempdir");
+        let path = temp.path().join("short-etag.cache");
+
+        let prelude = CachePreludeV1 {
+            etag_len: 12,
+            payload_len_compressed: 0,
+            payload_len_decompressed: 0,
+        };
+        let mut bytes = encode_prelude(prelude).to_vec();
+        bytes.extend_from_slice(b"tiny"); // 'tiny' etag is 4 bytes
+        std::fs::write(&path, bytes).expect("write fixture");
+
+        // Header claims 12 bytes of etag but only 4 'tiny' provided, so 8 bytes short.
+        let error = read_cache_file(&path)
+            .await
+            .expect_err("short etag should fail");
+        assert!(matches!(error, CatalogError::CacheFormat(_)));
+    }
+
+    // Accepts minimal valid file with all zero-length fields.
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn read_supports_empty_etag_and_payload() {
+        let temp = TempDir::new().expect("tempdir");
+        let path = temp.path().join("empty.cache");
+
+        let prelude = CachePreludeV1 {
+            etag_len: 0,
+            payload_len_compressed: 0,
+            payload_len_decompressed: 0,
+        };
+        std::fs::write(&path, encode_prelude(prelude)).expect("write fixture");
+        let data = read_cache_file(&path).await.expect("read empty cache");
+
+        assert_eq!(data.etag_bytes(), None);
+        assert!(data.payload_compressed().is_empty());
+        assert_eq!(data.payload_len_compressed(), 0);
+        assert_eq!(data.payload_len_decompressed(), 0);
+    }
+
+    // Rejects when declared compressed payload length does not match file size.
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn read_rejects_mismatched_payload_length() {
+        // Header claims 10 bytes payload but only 5 provided
+        let temp = TempDir::new().expect("tempdir");
+        let path = temp.path().join("mismatched-payload-len.cache");
+
+        let prelude = CachePreludeV1 {
+            etag_len: 4,
+            payload_len_compressed: 10,
+            payload_len_decompressed: 0,
+        };
+        let mut bytes = encode_prelude(prelude).to_vec();
+        bytes.extend_from_slice(b"etag");
+        bytes.extend_from_slice(b"short"); // only 5 bytes, not 10 here.
+        std::fs::write(&path, bytes).expect("write fixture");
+
+        let error = read_cache_file(&path)
+            .await
+            .expect_err("payload length mismatch should fail");
+        assert!(matches!(error, CatalogError::CacheFormat(_)));
+    }
+
+    // Verifies atomic replacement replaces existing cache file content.
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn write_replaces_existing_cache_atomically() {
+        let temp = TempDir::new().expect("tempdir");
+        let path = temp.path().join("atomic-test.cache");
+
+        // Write first payload
+        let first_input = CacheWriteInput {
+            etag: Some(b"etag-1"),
+            payload_compressed: b"first-payload",
+            payload_len_decompressed: 100,
+        };
+        write_cache_file(&path, &first_input)
+            .await
+            .expect("write first");
+
+        let first_data = read_cache_file(&path).await.expect("read first");
+        assert_eq!(first_data.etag_bytes(), Some(b"etag-1".as_slice()));
+        assert_eq!(first_data.payload_compressed(), b"first-payload");
+
+        // Write second payload (atomic replacement)
+        let second_input = CacheWriteInput {
+            etag: Some(b"etag-2"),
+            payload_compressed: b"second-payload-different",
+            payload_len_decompressed: 200,
+        };
+        write_cache_file(&path, &second_input)
+            .await
+            .expect("write second");
+
+        let second_data = read_cache_file(&path).await.expect("read second");
+        assert_eq!(second_data.etag_bytes(), Some(b"etag-2".as_slice()));
+        assert_eq!(
+            second_data.payload_compressed(),
+            b"second-payload-different"
+        );
+        assert_eq!(second_data.payload_len_decompressed(), 200);
+    }
+}
diff --git a/src/llm-coding-tools-models-dev/src/cache/mod.rs b/src/llm-coding-tools-models-dev/src/cache/mod.rs
new file mode 100644
index 00000000..43af19b8
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/cache/mod.rs
@@ -0,0 +1,20 @@
+//! Cache path and container utilities for models.dev catalog data.
+//!
+//! Responsibilities are split by concern:
+//!
+//! - `path` resolves the shared cache location.
+//! - `format` defines the cache container layout and read/write helpers.
+//!
+//! Runtime behavior follows crate features:
+//! - `tokio` (default): async file I/O APIs.
+//! - `blocking`: sync file I/O APIs.
+//!
+//! The public API currently exposes path resolution only; container helpers are
+//! crate-internal until the sync/load flow is wired.
+
+pub(crate) mod format;
+mod path;
+pub(crate) mod payload;
+
+pub use crate::error::CatalogResult;
+pub use path::{shared_cache_path, CACHE_PATH_ENV_VAR};
diff --git a/src/llm-coding-tools-models-dev/src/cache/path.rs b/src/llm-coding-tools-models-dev/src/cache/path.rs
new file mode 100644
index 00000000..e43d256e
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/cache/path.rs
@@ -0,0 +1,63 @@
+//! Cross-platform cache path resolution.
+
+use crate::{error::CatalogResult, CatalogError};
+use std::path::PathBuf;
+
+/// Environment variable name for overriding the default cache path.
+pub const CACHE_PATH_ENV_VAR: &str = "LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH";
+
+const CACHE_SUBDIR: &str = "llm-coding-tools";
+const CACHE_FILENAME: &str = "models.dev.catalog.v1.cache";
+
+/// Returns the shared cache path for the models.dev catalog.
+///
+/// This function determines the appropriate cache location using the following
+/// precedence:
+///
+/// 1. `LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH` environment variable (if set)
+/// 2. Platform cache directory + `llm-coding-tools/models.dev.catalog.v1.cache`
+///
+/// # Platform Cache Locations
+///
+/// - **Linux**: `~/.cache/llm-coding-tools/models.dev.catalog.v1.cache`
+/// - **macOS**: `~/Library/Caches/llm-coding-tools/models.dev.catalog.v1.cache`
+/// - **Windows**: `%LOCALAPPDATA%\llm-coding-tools\models.dev.catalog.v1.cache`
+///
+/// # Returns
+///
+/// The full path to the cache file.
+///
+/// # Errors
+///
+/// Returns [`CatalogError::CachePathNotFound`] when:
+/// - The environment variable is not set AND
+/// - The platform cache directory cannot be determined
+///
+/// # Examples
+///
+/// ```
+/// use llm_coding_tools_models_dev::shared_cache_path;
+///
+/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
+/// let path = shared_cache_path()?;
+/// println!("Cache location: {}", path.display());
+/// # Ok(())
+/// # }
+/// ```
+pub fn shared_cache_path() -> CatalogResult<PathBuf> {
+    // 1. Check env var first
+    if let Some(os_str) = std::env::var_os(CACHE_PATH_ENV_VAR) {
+        if os_str.is_empty() {
+            return Err(CatalogError::Configuration(format!(
+                "{} is set but empty",
+                CACHE_PATH_ENV_VAR
+            )));
+        }
+        return Ok(PathBuf::from(&os_str));
+    }
+
+    // 2. Fall back to dirs::cache_dir()
+    let cache_dir = dirs::cache_dir().ok_or(CatalogError::CachePathNotFound)?;
+
+    Ok(cache_dir.join(CACHE_SUBDIR).join(CACHE_FILENAME))
+}
diff --git a/src/llm-coding-tools-models-dev/src/cache/payload.rs b/src/llm-coding-tools-models-dev/src/cache/payload.rs
new file mode 100644
index 00000000..3a1e65b0
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/cache/payload.rs
@@ -0,0 +1,276 @@
+//! Cache payload serialization for models.dev catalog data.
+//!
+//! The payload is stored as simple owned rows so it can be encoded compactly
+//! with bitcode and rebuilt into a [`ModelCatalog`]
+//! without reparsing the original JSON.
+//!
+//! ## Compression Benchmark
+//!
+//! Using a 1.26 MB `api.json` snapshot (models.dev), converted to bitcode
+//! then compressed with zstd at various levels:
+//!
+//! | Level          | Size      | % of JSON | Time    |
+//! |----------------|-----------|-----------|---------|
+//! | JSON           | 1260.7 KB | 100.00%   | -       |
+//! | (raw bitcode)  | 105.7 KB  | 8.39%     | -       |
+//! | 0              | 29.7 KB   | 2.36%     | 1.4ms   |
+//! | 1              | 32.1 KB   | 2.55%     | 1.0ms   |
+//! | 2              | 31.7 KB   | 2.51%     | 1.0ms   |
+//! | 3              | 29.7 KB   | 2.36%     | 1.1ms   |
+//! | 4              | 29.7 KB   | 2.36%     | 1.9ms   |
+//! | 5              | 27.5 KB   | 2.18%     | 2.9ms   |
+//! | 6              | 27.1 KB   | 2.15%     | 3.6ms   |
+//! | 7              | 26.6 KB   | 2.11%     | 4.8ms   |
+//! | 8              | 26.7 KB   | 2.12%     | 5.0ms   |
+//! | 9              | 26.7 KB   | 2.12%     | 6.3ms   |
+//! | 10             | 26.4 KB   | 2.09%     | 9.1ms   |
+//! | 11             | 26.1 KB   | 2.07%     | 8.5ms   |
+//! | 12             | 26.1 KB   | 2.07%     | 14.4ms  |
+//! | 13             | 26.0 KB   | 2.06%     | 12.0ms  |
+//! | 14             | 26.0 KB   | 2.06%     | 16.4ms  |
+//! | 15             | 25.9 KB   | 2.06%     | 21.6ms  |
+//! | 16             | 23.6 KB   | 1.87%     | 24.2ms  |
+//! | 17             | 23.2 KB   | 1.84%     | 27.6ms  |
+//! | 18             | 23.2 KB   | 1.84%     | 42.6ms  |
+//! | 19             | 23.1 KB   | 1.83%     | 81.3ms  |
+//! | 20             | 23.1 KB   | 1.83%     | 96.3ms  |
+//! | 21             | 23.1 KB   | 1.83%     | 125.4ms |
+//! | 22             | 23.1 KB   | 1.83%     | 207.5ms |
+//!
+//! Levels 1-3 offer the best speed/ratio tradeoff (~1ms, ~2.4% of JSON).
+//! Levels 19-22 provide maximal compression but take 80-200ms.
+
+use crate::error::{CatalogError, CatalogResult};
+use llm_coding_tools_core::models::{
+    Modality, ModelCatalog, ModelInfo, ProviderIdx, ProviderInfo, ProviderModelSource,
+    ProviderSource, ProviderType,
+};
+
+/// Serializable cache representation of the models.dev catalog.
+#[derive(Debug, Clone, PartialEq, bitcode::Encode, bitcode::Decode)]
+pub(crate) struct CatalogCachePayload {
+    /// Provider rows in catalog order.
+    pub(crate) providers: Vec<CachedProviderRow>,
+    /// Model rows that reference providers by index.
+    pub(crate) models: Vec<CachedModelRow>,
+}
+
+/// Serializable provider row stored in the cache payload.
+#[derive(Debug, Clone, PartialEq, Eq, bitcode::Encode, bitcode::Decode)]
+pub(crate) struct CachedProviderRow {
+    /// Stable provider lookup key.
+    pub(crate) provider_key: String,
+    /// Base API URL for requests to this provider.
+    pub(crate) api_url: String,
+    /// Environment variables that can supply credentials.
+    pub(crate) env_vars: Vec<String>,
+    /// Provider protocol or API shape.
+    pub(crate) api_type: ProviderType,
+}
+
+/// Serializable model row stored in the cache payload.
+#[derive(Debug, Clone, PartialEq, bitcode::Encode, bitcode::Decode)]
+pub(crate) struct CachedModelRow {
+    /// Index into [`CatalogCachePayload::providers`].
+    pub(crate) provider_idx: ProviderIdx,
+    /// Stable model lookup key within the provider.
+    pub(crate) model_key: String,
+    /// Serialized [`Modality`] bitflags.
+    pub(crate) modalities_bits: u8,
+    /// Maximum supported input tokens.
+    pub(crate) max_input: u32,
+    /// Maximum supported output tokens.
+    pub(crate) max_output: u32,
+    /// Optional default temperature.
+    pub(crate) temperature: Option<f32>,
+    /// Optional default top-p value.
+    pub(crate) top_p: Option<f32>,
+}
+
+/// Encodes a cache payload into bitcode bytes.
+pub(crate) fn encode_cache_payload(payload: &CatalogCachePayload) -> Vec<u8> {
+    bitcode::encode(payload)
+}
+
+/// Decodes bitcode bytes into an owned cache payload.
+///
+/// # Errors
+///
+/// Returns [`CatalogError::BitcodeDecode`] when the bytes are not a valid cache
+/// payload encoding.
+pub(crate) fn decode_cache_payload(bytes: &[u8]) -> CatalogResult<CatalogCachePayload> {
+    bitcode::decode(bytes).map_err(|error| CatalogError::BitcodeDecode(error.to_string()))
+}
+
+/// Rebuilds a [`ModelCatalog`] from decoded cache rows.
+///
+/// # Errors
+///
+/// Returns [`CatalogError`] when any cached row data cannot be used to build a
+/// valid catalog, such as when a model references an out-of-range provider.
+pub(crate) fn catalog_from_cache_payload(
+    payload: CatalogCachePayload,
+) -> CatalogResult<ModelCatalog> {
+    let CatalogCachePayload { providers, models } = payload;
+
+    let mut provider_sources = Vec::with_capacity(providers.len());
+    for row in providers {
+        provider_sources.push(ProviderSource {
+            provider_key: row.provider_key,
+            provider: ProviderInfo {
+                api_url: row.api_url,
+                env_vars: row.env_vars,
+                api_type: row.api_type,
+            },
+        });
+    }
+
+    let mut model_sources = Vec::with_capacity(models.len());
+    for row in &models {
+        model_sources.push(ProviderModelSource {
+            provider_idx: row.provider_idx,
+            model_key: row.model_key.as_str(),
+            model: ModelInfo {
+                modalities: Modality::from_bits_retain(row.modalities_bits),
+                max_input: row.max_input,
+                max_output: row.max_output,
+                temperature: row.temperature,
+                top_p: row.top_p,
+            },
+        });
+    }
+
+    Ok(ModelCatalog::build(&provider_sources, &model_sources)?)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn sample_payload() -> CatalogCachePayload {
+        CatalogCachePayload {
+            providers: vec![CachedProviderRow {
+                provider_key: "openai".to_string(),
+                api_url: "https://api.openai.com/v1".to_string(),
+                env_vars: vec!["OPENAI_API_KEY".to_string()],
+                api_type: ProviderType::OpenAiCompletions,
+            }],
+            models: vec![CachedModelRow {
+                provider_idx: ProviderIdx::new(0),
+                model_key: "gpt-4".to_string(),
+                modalities_bits: Modality::TEXT.bits(),
+                max_input: 8192,
+                max_output: 4096,
+                temperature: Some(0.7),
+                top_p: Some(0.9),
+            }],
+        }
+    }
+
+    #[test]
+    fn payload_round_trip() {
+        let original = sample_payload();
+        let encoded = encode_cache_payload(&original);
+        let decoded = decode_cache_payload(&encoded).expect("decode should succeed");
+        assert_eq!(original, decoded);
+    }
+
+    #[test]
+    fn catalog_from_payload_reconstructs_provider() {
+        let payload = sample_payload();
+        let catalog = catalog_from_cache_payload(payload).expect("catalog build should succeed");
+
+        let provider = catalog
+            .lookup_provider("openai")
+            .expect("provider should exist");
+        assert_eq!(provider.api_url, "https://api.openai.com/v1");
+        assert_eq!(provider.api_type, ProviderType::OpenAiCompletions);
+    }
+
+    #[test]
+    fn catalog_from_payload_reconstructs_model() {
+        let payload = sample_payload();
+        let catalog = catalog_from_cache_payload(payload).expect("catalog build should succeed");
+
+        let model = catalog
+            .lookup_provider_model("openai", "gpt-4")
+            .expect("model should exist");
+        assert_eq!(model.max_input, 8192);
+        assert_eq!(model.max_output, 4096);
+        assert_eq!(model.modalities, Modality::TEXT);
+    }
+
+    #[test]
+    fn catalog_from_payload_rejects_out_of_range_provider_idx() {
+        use llm_coding_tools_core::models::ModelCatalogBuildError;
+
+        let payload = CatalogCachePayload {
+            providers: vec![CachedProviderRow {
+                provider_key: "test".to_string(),
+                api_url: "".to_string(),
+                env_vars: vec![],
+                api_type: ProviderType::Unknown,
+            }],
+            models: vec![CachedModelRow {
+                provider_idx: ProviderIdx::new(999),
+                model_key: "model".to_string(),
+                modalities_bits: Modality::TEXT.bits(),
+                max_input: 0,
+                max_output: 0,
+                temperature: None,
+                top_p: None,
+            }],
+        };
+
+        let result = catalog_from_cache_payload(payload);
+        assert!(matches!(
+            result,
+            Err(CatalogError::ModelCatalogBuild(
+                ModelCatalogBuildError::ProviderIdxOutOfRangeForModel { .. }
+            ))
+        ));
+    }
+
+    #[test]
+    fn all_known_provider_types_round_trip() {
+        let types = [
+            ProviderType::Unknown,
+            ProviderType::OpenAiCompletions,
+            ProviderType::OpenAiResponses,
+            ProviderType::Anthropic,
+            ProviderType::Google,
+            ProviderType::Groq,
+            ProviderType::Mistral,
+            ProviderType::Ollama,
+            ProviderType::Bedrock,
+            ProviderType::Azure,
+            ProviderType::OpenRouter,
+            ProviderType::HuggingFace,
+            ProviderType::Cohere,
+            ProviderType::ChatGptOAuth,
+            ProviderType::ClaudeCodeOAuth,
+            ProviderType::Antigravity,
+        ];
+
+        for provider_type in types {
+            let payload = CatalogCachePayload {
+                providers: vec![CachedProviderRow {
+                    provider_key: "test".to_string(),
+                    api_url: "".to_string(),
+                    env_vars: vec![],
+                    api_type: provider_type,
+                }],
+                models: vec![],
+            };
+
+            let catalog = catalog_from_cache_payload(payload).expect("should succeed");
+            let provider = catalog
+                .lookup_provider("test")
+                .expect("provider should exist");
+            assert_eq!(
+                provider.api_type, provider_type,
+                "provider type should round-trip correctly"
+            );
+        }
+    }
+}
diff --git a/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs
new file mode 100644
index 00000000..8e807411
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/catalog/load_cache.rs
@@ -0,0 +1,101 @@
+//! Loading a model catalog from cached on-disk data.
+//!
+//! This module handles the offline half of catalog loading: it decompresses the
+//! stored payload, decodes the serialized rows, and rebuilds a
+//! [`ModelCatalog`](llm_coding_tools_core::models::ModelCatalog).
+
+use crate::cache::format::CacheFileData;
+use crate::cache::payload::{catalog_from_cache_payload, decode_cache_payload};
+use crate::catalog::load_result::{CatalogLoadResult, CatalogLoadSource};
+use crate::error::{CatalogError, CatalogResult};
+
+/// Decompresses cache file data and rebuilds a catalog from it.
+///
+/// # Errors
+///
+/// Returns [`CatalogError`] when zstd decompression fails, the decompressed
+/// length does not match the cache metadata, the serialized payload cannot be
+/// decoded, or catalog reconstruction fails.
+pub(crate) fn load_catalog_from_cache_file_data(
+    cache_file: &CacheFileData,
+    source: CatalogLoadSource,
+) -> CatalogResult<CatalogLoadResult> {
+    let expected_len = cache_file.payload_len_decompressed() as usize;
+    let decoded = zstd::bulk::decompress(cache_file.payload_compressed(), expected_len)
+        .map_err(|error| CatalogError::Zstd(error.to_string()))?;
+    if decoded.len() != expected_len {
+        return Err(CatalogError::CacheFormat(
+            "cache payload length mismatch after decompression",
+        ));
+    }
+
+    let payload = decode_cache_payload(&decoded)?;
+    let catalog = catalog_from_cache_payload(payload)?;
+    Ok(CatalogLoadResult { catalog, source })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cache::format::{write_cache_file, CacheWriteInput};
+    use crate::cache::payload::{
+        encode_cache_payload, CachedModelRow, CachedProviderRow, CatalogCachePayload,
+    };
+    use llm_coding_tools_core::models::{Modality, ProviderIdx, ProviderType};
+    use tempfile::TempDir;
+
+    fn sample_payload() -> CatalogCachePayload {
+        CatalogCachePayload {
+            providers: vec![CachedProviderRow {
+                provider_key: "test".to_string(),
+                api_url: "https://test.example".to_string(),
+                env_vars: vec![],
+                api_type: ProviderType::OpenAiCompletions,
+            }],
+            models: vec![CachedModelRow {
+                provider_idx: ProviderIdx::new(0),
+                model_key: "model1".to_string(),
+                modalities_bits: Modality::TEXT.bits(),
+                max_input: 4096,
+                max_output: 2048,
+                temperature: None,
+                top_p: None,
+            }],
+        }
+    }
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn round_trip_through_cache_file() {
+        let temp = TempDir::new().expect("tempdir");
+        let path = temp.path().join("test.cache");
+
+        let payload = sample_payload();
+        let encoded = encode_cache_payload(&payload);
+        let compressed = zstd::bulk::compress(&encoded, 1).expect("compress");
+
+        write_cache_file(
+            &path,
+            &CacheWriteInput {
+                etag: Some(b"test-etag"),
+                payload_compressed: &compressed,
+                payload_len_decompressed: encoded.len(),
+            },
+        )
+        .await
+        .expect("write cache");
+
+        let cache_file = crate::cache::format::read_cache_file(&path)
+            .await
+            .expect("read cache");
+        let result =
+            load_catalog_from_cache_file_data(&cache_file, CatalogLoadSource::NotModifiedCache)
+                .expect("load from cache");
+
+        assert_eq!(result.source, CatalogLoadSource::NotModifiedCache);
+        let provider = result
+            .catalog
+            .lookup_provider("test")
+            .expect("provider should exist");
+        assert_eq!(provider.api_type, ProviderType::OpenAiCompletions);
+    }
+}
diff --git a/src/llm-coding-tools-models-dev/src/catalog/load_result.rs b/src/llm-coding-tools-models-dev/src/catalog/load_result.rs
new file mode 100644
index 00000000..aee2d46e
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/catalog/load_result.rs
@@ -0,0 +1,52 @@
+//! Result types for catalog load operations.
+
+use llm_coding_tools_core::models::ModelCatalog;
+
+/// Result of a successful catalog load operation.
+///
+/// This struct provides both the loaded catalog and metadata about
+/// how the catalog was obtained (fresh download, cached, etc.).
+pub struct CatalogLoadResult {
+    /// The loaded model catalog ready for lookups.
+    pub catalog: ModelCatalog,
+
+    /// Information about how the catalog was loaded.
+    pub source: CatalogLoadSource,
+}
+
+/// Indicates how the catalog was loaded.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CatalogLoadSource {
+    /// The catalog was downloaded fresh (HTTP 200 OK) and the cache was updated.
+    Downloaded,
+
+    /// The cache was up to date (HTTP 304 Not Modified) and loaded from disk.
+    NotModifiedCache,
+
+    /// A network failure occurred, but a valid cached copy was available
+    /// and loaded as a fallback.
+    FallbackCache,
+}
+
+impl CatalogLoadSource {
+    /// Returns true if the catalog was loaded from the network (fresh download).
+    #[inline]
+    pub fn is_fresh(&self) -> bool {
+        matches!(self, CatalogLoadSource::Downloaded)
+    }
+
+    /// Returns true if the catalog was loaded from cache (either fresh cache or fallback).
+    #[inline]
+    pub fn is_cached(&self) -> bool {
+        matches!(
+            self,
+            CatalogLoadSource::NotModifiedCache | CatalogLoadSource::FallbackCache
+        )
+    }
+
+    /// Returns true if this was a fallback load due to network failure.
+    #[inline]
+    pub fn is_fallback(&self) -> bool {
+        matches!(self, CatalogLoadSource::FallbackCache)
+    }
+}
diff --git a/src/llm-coding-tools-models-dev/src/catalog/mod.rs b/src/llm-coding-tools-models-dev/src/catalog/mod.rs
new file mode 100644
index 00000000..1764915d
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/catalog/mod.rs
@@ -0,0 +1,215 @@
+//! Catalog loading and synchronization with models.dev.
+//!
+//! Flow is simple:
+//! - Try online sync first using conditional HTTP (`If-None-Match`)
+//! - Reuse cache on `304 Not Modified`
+//! - Fall back to cached data if the network path fails
+
+mod load_cache;
+mod load_result;
+mod sync;
+
+#[cfg(test)]
+mod test_utils;
+
+pub use load_result::{CatalogLoadResult, CatalogLoadSource};
+
+use crate::cache::shared_cache_path;
+use crate::error::CatalogError;
+use std::path::Path;
+
+/// Entry point for loading models.dev catalogs.
+///
+/// This struct provides static methods for loading the catalog either
+/// from the default shared cache location or from a custom path.
+pub struct ModelsDevCatalog;
+
+impl ModelsDevCatalog {
+    /// Loads the catalog from the default shared cache location.
+    ///
+    /// This is the primary entry point for most use cases. It will:
+    /// 1. Check for an existing cache and extract its ETag
+    /// 2. Send a conditional GET request with `If-None-Match`
+    /// 3. On `200 OK`: download, map the API payload into catalog sources,
+    ///    cache it, and return fresh data
+    /// 4. On `304 Not Modified`: decode and return cached data
+    /// 5. On network failure: fall back to cached data if available
+    ///
+    /// The cache location is determined by:
+    /// - `LLM_CODING_TOOLS_MODELS_DEV_CACHE_PATH` environment variable (if set)
+    /// - Platform cache directory + `llm-coding-tools/models.dev.catalog.v1.cache`
+    ///
+    /// # Returns
+    ///
+    /// A [`CatalogLoadResult`] containing the loaded catalog and information
+    /// about how it was loaded (downloaded fresh, from cache, or fallback).
+    ///
+    /// # Errors
+    ///
+    /// Returns [`CatalogError`] when:
+    /// - The cache path cannot be determined and no cache exists
+    /// - An HTTP error occurs and no cache is available for fallback
+    /// - The cache is corrupted and cannot be decoded
+    /// - Catalog construction from mapped catalog sources fails
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use llm_coding_tools_models_dev::ModelsDevCatalog;
+    ///
+    /// # #[cfg(feature = "tokio")]
+    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
+    /// let result = ModelsDevCatalog::load().await?;
+    ///
+    /// // Use the catalog
+    /// if let Some(entry) = result.catalog.lookup("openai", "gpt-4") {
+    ///     println!("API URL: {}", entry.0.api_url);
+    /// }
+    /// # Ok(())
+    /// # }
+    ///
+    /// # #[cfg(feature = "blocking")]
+    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
+    /// # let result = ModelsDevCatalog::load()?;
+    /// // Use the catalog
+    /// # if let Some(entry) = result.catalog.lookup("openai", "gpt-4") {
+    /// #     println!("API URL: {}", entry.0.api_url);
+    /// # }
+    /// # Ok(())
+    /// # }
+    /// ```
+    #[maybe_async::maybe_async]
+    pub async fn load() -> Result<CatalogLoadResult, CatalogError> {
+        let path = shared_cache_path()?;
+        Self::load_at(path).await
+    }
+
+    /// Loads the catalog from a specific cache file path.
+    ///
+    /// This method provides the same behavior as [`load`](Self::load), but
+    /// allows specifying a custom cache file path. This is useful for:
+    /// - Testing with temporary cache files
+    /// - Custom deployment scenarios
+    /// - Isolated cache locations
+    ///
+    /// # Parameters
+    ///
+    /// * `path` - The path to the cache file. Parent directories will be
+    ///   created if they don't exist.
+    ///
+    /// # Returns
+    ///
+    /// A [`CatalogLoadResult`] containing the loaded catalog and source
+    /// information.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`CatalogError`] under the same conditions as [`load`](Self::load),
+    /// plus:
+    /// - The parent directory cannot be created
+    /// - The path is not a valid file path
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use llm_coding_tools_models_dev::ModelsDevCatalog;
+    /// use std::path::PathBuf;
+    ///
+    /// # #[cfg(feature = "tokio")]
+    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
+    /// let cache_path = PathBuf::from("/tmp/my-cache.cache");
+    /// let result = ModelsDevCatalog::load_at(&cache_path).await?;
+    ///
+    /// // Use the catalog
+    /// if let Some(entry) = result.catalog.lookup("openai", "gpt-4") {
+    ///     println!("API URL: {}", entry.0.api_url);
+    /// }
+    /// # Ok(())
+    /// # }
+    ///
+    /// # #[cfg(feature = "blocking")]
+    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
+    /// # let cache_path = PathBuf::from("/tmp/my-cache.cache");
+    /// # let result = ModelsDevCatalog::load_at(&cache_path)?;
+    /// # if let Some(entry) = result.catalog.lookup("openai", "gpt-4") {
+    /// #     println!("API URL: {}", entry.0.api_url);
+    /// # }
+    /// # Ok(())
+    /// # }
+    /// ```
+    #[maybe_async::maybe_async]
+    pub async fn load_at(path: impl AsRef<Path>) -> Result<CatalogLoadResult, CatalogError> {
+        sync::load_catalog_at_path(path.as_ref()).await
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cache::CACHE_PATH_ENV_VAR;
+    use llm_coding_tools_core::models::ProviderType;
+    use tempfile::TempDir;
+
+    /// Guard that restores environment variables on drop
+    struct EnvGuard {
+        cache_path_var: Option<String>,
+    }
+
+    impl EnvGuard {
+        fn new(value: Option<&str>) -> Self {
+            let cache_path_var = std::env::var(CACHE_PATH_ENV_VAR).ok();
+            match value {
+                Some(v) => std::env::set_var(CACHE_PATH_ENV_VAR, v),
+                None => std::env::remove_var(CACHE_PATH_ENV_VAR),
+            }
+            Self { cache_path_var }
+        }
+    }
+
+    impl Drop for EnvGuard {
+        fn drop(&mut self) {
+            // Clear test URL override
+            super::sync::set_test_models_dev_api_url(None);
+
+            // Restore or remove cache path env var
+            match &self.cache_path_var {
+                Some(v) => std::env::set_var(CACHE_PATH_ENV_VAR, v),
+                None => std::env::remove_var(CACHE_PATH_ENV_VAR),
+            }
+        }
+    }
+
+    use super::test_utils::{sample_api_json, start_mock_server, MockResponse};
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    #[serial_test::serial]
+    async fn facade_load_uses_shared_cache_path() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("facade-test.cache");
+        let _guard = EnvGuard::new(Some(cache_path.to_str().unwrap()));
+
+        // Start mock server and set URL override
+        let body = String::from_utf8_lossy(sample_api_json()).to_string();
+        let (_handle, url) = start_mock_server(MockResponse::Ok {
+            etag: "\"facade-test-etag\"",
+            body,
+        });
+        super::sync::set_test_models_dev_api_url(Some(url));
+
+        // Call public facade
+        let result = ModelsDevCatalog::load().await.expect("load should succeed");
+
+        assert_eq!(result.source, CatalogLoadSource::Downloaded);
+        let provider = result
+            .catalog
+            .lookup_provider("openai")
+            .expect("openai provider should exist");
+        assert_eq!(provider.api_type, ProviderType::OpenAiCompletions);
+
+        // Verify cache was written
+        assert!(
+            cache_path.exists(),
+            "cache file should exist at shared path"
+        );
+    }
+}
diff --git a/src/llm-coding-tools-models-dev/src/catalog/sync.rs b/src/llm-coding-tools-models-dev/src/catalog/sync.rs
new file mode 100644
index 00000000..d63023d6
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/catalog/sync.rs
@@ -0,0 +1,449 @@
+//! Catalog synchronization against the remote models.dev API.
+//!
+//! This module owns the online-first load path used by
+//! [`ModelsDevCatalog`](crate::catalog::ModelsDevCatalog). It reads any cached
+//! container in one shot, sends a conditional request with the cached ETag when
+//! available, refreshes the cache on `200 OK`, reuses it on `304 Not Modified`,
+//! and falls back to cached data when the request fails.
+
+use crate::api::catalog_sources::cache_payload_from_api_json_bytes;
+use crate::cache::format::{read_cache_file, write_cache_file, CacheFileData, CacheWriteInput};
+use crate::cache::payload::{catalog_from_cache_payload, encode_cache_payload};
+use crate::catalog::load_cache::load_catalog_from_cache_file_data;
+use crate::catalog::load_result::{CatalogLoadResult, CatalogLoadSource};
+use crate::error::{CatalogError, CatalogResult};
+use reqwest::header::{ETAG, IF_NONE_MATCH};
+use reqwest::StatusCode;
+use std::borrow::Cow;
+use std::io::ErrorKind;
+use std::path::Path;
+
+/// Default production endpoint for the models.dev catalog snapshot.
+const MODELS_DEV_API_URL: &str = "https://models.dev/api.json";
+
+/// Timeout for HTTP connections and requests in seconds.
+const REQUEST_TIMEOUT_SECS: u64 = 30;
+
+#[cfg(test)]
+static TEST_MODELS_DEV_API_URL: std::sync::Mutex<Option<String>> = std::sync::Mutex::new(None);
+
+#[cfg(test)]
+/// Overrides the remote catalog URL for sync tests.
+pub(crate) fn set_test_models_dev_api_url(url: Option<String>) {
+    *TEST_MODELS_DEV_API_URL.lock().unwrap() = url;
+}
+
+/// Returns the active catalog endpoint, including the test override when set.
+fn models_dev_api_url() -> Cow<'static, str> {
+    #[cfg(test)]
+    if let Some(url) = TEST_MODELS_DEV_API_URL.lock().unwrap().clone() {
+        return Cow::Owned(url);
+    }
+
+    Cow::Borrowed(MODELS_DEV_API_URL)
+}
+
+/// Resolves the result to return after a transient request failure.
+///
+/// Cached data takes precedence over surfacing the request error so callers can
+/// continue with the last known-good catalog when possible.
+fn load_after_request_failure(
+    request_error: reqwest::Error,
+    cache_file: Option<&CacheFileData>,
+    cache_error: Option<CatalogError>,
+) -> CatalogResult<CatalogLoadResult> {
+    if let Some(cache_file) = cache_file {
+        return load_catalog_from_cache_file_data(cache_file, CatalogLoadSource::FallbackCache);
+    }
+
+    if let Some(cache_error) = cache_error {
+        return Err(cache_error);
+    }
+
+    Err(CatalogError::Reqwest(request_error))
+}
+
+#[inline]
+fn is_transient_status(status: StatusCode) -> bool {
+    status == StatusCode::TOO_MANY_REQUESTS || status.is_server_error()
+}
+
+#[maybe_async::maybe_async]
+/// Loads the catalog at `path` using the default models.dev endpoint.
+///
+/// # Errors
+///
+/// Returns the same errors as [`load_catalog_from_url`] while targeting the
+/// default production URL.
+pub(crate) async fn load_catalog_at_path(path: &Path) -> CatalogResult<CatalogLoadResult> {
+    let url = models_dev_api_url();
+    load_catalog_from_url(path, url.as_ref()).await
+}
+
+#[maybe_async::maybe_async]
+/// Synchronizes the cache at `path` against `url` and returns a catalog.
+///
+/// The sync flow is:
+/// - read any existing cache file in one whole-file read
+/// - send `If-None-Match` when the cache includes an ETag
+/// - on `200 OK`, decode the response and rewrite the cache
+/// - on `304 Not Modified`, load the existing cache
+/// - on request, response-body, or transient status failure, fall back to cache when available
+///
+/// # Performance
+///
+/// Cache probing performs one up-front whole-file read through
+/// [`read_cache_file`]. models.dev changes infrequently, so cache hits are
+/// expected to be common, and [`crate::cache::payload`] documents typical
+/// compressed payload sizes of about 23-32 kB. That makes a single sequential
+/// read generally the faster hot path on modern NVMe-backed systems.
+///
+/// # Errors
+///
+/// Returns [`CatalogError`] when cache I/O fails without a usable fallback,
+/// response data cannot be decoded, the cache cannot be written, or the server
+/// responds with an unexpected status.
+pub(crate) async fn load_catalog_from_url(
+    path: &Path,
+    url: &str,
+) -> CatalogResult<CatalogLoadResult> {
+    let mut cache_file = None;
+    let mut cache_error = None;
+    match read_cache_file(path).await {
+        Ok(file) => cache_file = Some(file),
+        Err(CatalogError::Io(error)) if error.kind() == ErrorKind::NotFound => {}
+        Err(error) => cache_error = Some(error),
+    }
+
+    #[cfg(feature = "tokio")]
+    let client = reqwest::Client::builder()
+        .connect_timeout(std::time::Duration::from_secs(REQUEST_TIMEOUT_SECS))
+        .timeout(std::time::Duration::from_secs(REQUEST_TIMEOUT_SECS))
+        .build()
+        .expect("client builder should not fail with valid config");
+    #[cfg(feature = "blocking")]
+    let client = reqwest::blocking::Client::builder()
+        .connect_timeout(std::time::Duration::from_secs(REQUEST_TIMEOUT_SECS))
+        .timeout(std::time::Duration::from_secs(REQUEST_TIMEOUT_SECS))
+        .build()
+        .expect("client builder should not fail with valid config");
+
+    let mut request = client.get(url);
+    if let Some(etag) = cache_file.as_ref().and_then(|file| file.etag_bytes()) {
+        request = request.header(IF_NONE_MATCH, etag);
+    }
+
+    let response = match request.send().await {
+        Ok(response) => response,
+        Err(error) => {
+            return load_after_request_failure(error, cache_file.as_ref(), cache_error);
+        }
+    };
+    match response.status() {
+        StatusCode::OK => {
+            let response_etag: Option<Vec<u8>> = response
+                .headers()
+                .get(ETAG)
+                .map(|value| value.as_bytes().to_vec());
+            let body = match response.bytes().await {
+                Ok(body) => body,
+                Err(error) => {
+                    return load_after_request_failure(error, cache_file.as_ref(), cache_error);
+                }
+            };
+            let payload = cache_payload_from_api_json_bytes(body.as_ref())?;
+            let payload_encoded = encode_cache_payload(&payload);
+            let catalog = catalog_from_cache_payload(payload)?;
+            let payload_compressed = zstd::bulk::compress(payload_encoded.as_slice(), 17)
+                .map_err(|error| CatalogError::Zstd(error.to_string()))?;
+
+            write_cache_file(
+                path,
+                &CacheWriteInput {
+                    etag: response_etag.as_deref(),
+                    payload_compressed: &payload_compressed,
+                    payload_len_decompressed: payload_encoded.len(),
+                },
+            )
+            .await?;
+
+            Ok(CatalogLoadResult {
+                catalog,
+                source: CatalogLoadSource::Downloaded,
+            })
+        }
+        StatusCode::NOT_MODIFIED => {
+            if let Some(cache_file) = cache_file.as_ref() {
+                load_catalog_from_cache_file_data(cache_file, CatalogLoadSource::NotModifiedCache)
+            } else if let Some(error) = cache_error {
+                Err(error)
+            } else {
+                Err(CatalogError::CacheFormat(
+                    "received 304 but no cached payload is available",
+                ))
+            }
+        }
+        status if is_transient_status(status) => {
+            if let Some(cache_file) = cache_file.as_ref() {
+                load_catalog_from_cache_file_data(cache_file, CatalogLoadSource::FallbackCache)
+            } else if let Some(error) = cache_error {
+                Err(error)
+            } else {
+                Err(CatalogError::Configuration(format!(
+                    "unexpected catalog sync status: {status}",
+                )))
+            }
+        }
+        status => Err(CatalogError::Configuration(format!(
+            "unexpected catalog sync status: {status}",
+        ))),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::super::test_utils::{sample_api_json, start_mock_server, MockResponse};
+    use super::*;
+    use crate::cache::format::CacheWriteInput;
+    use crate::cache::payload::{
+        encode_cache_payload, CachedModelRow, CachedProviderRow, CatalogCachePayload,
+    };
+    use llm_coding_tools_core::models::{Modality, ProviderIdx, ProviderType};
+    use tempfile::TempDir;
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn sync_returns_downloaded_on_200() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("test.cache");
+
+        // Start mock server returning 200 OK with fresh catalog data
+        let body = String::from_utf8_lossy(sample_api_json()).to_string();
+        let (_handle, url) = start_mock_server(MockResponse::Ok {
+            etag: "\"test-etag-123\"",
+            body,
+        });
+
+        let result = load_catalog_from_url(&cache_path, &url)
+            .await
+            .expect("sync should succeed");
+
+        // Verify source is Downloaded (not from cache)
+        assert_eq!(result.source, CatalogLoadSource::Downloaded);
+        let provider = result
+            .catalog
+            .lookup_provider("openai")
+            .expect("openai provider should exist");
+        assert_eq!(provider.api_type, ProviderType::OpenAiCompletions);
+        assert_eq!(provider.api_url, "https://api.openai.com/v1");
+
+        // Verify cache file was written with the ETag from response
+        let cache_file = read_cache_file(&cache_path)
+            .await
+            .expect("cache should exist");
+        assert_eq!(
+            cache_file.etag_bytes(),
+            Some(b"\"test-etag-123\"".as_slice())
+        );
+    }
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn sync_returns_cached_on_304_with_if_none_match() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("test.cache");
+
+        // Pre-seed cache with a valid catalog payload
+        let payload = CatalogCachePayload {
+            providers: vec![CachedProviderRow {
+                provider_key: "openai".to_string(),
+                api_url: "https://api.openai.com/v1".to_string(),
+                env_vars: vec!["OPENAI_API_KEY".to_string()],
+                api_type: ProviderType::OpenAiCompletions,
+            }],
+            models: vec![CachedModelRow {
+                provider_idx: ProviderIdx::new(0),
+                model_key: "gpt-4".to_string(),
+                modalities_bits: Modality::TEXT.bits(),
+                max_input: 8192,
+                max_output: 4096,
+                temperature: None,
+                top_p: None,
+            }],
+        };
+        let encoded = encode_cache_payload(&payload);
+        let compressed = zstd::bulk::compress(&encoded, 1).expect("compress");
+
+        // Write the seeded cache file with ETag
+        crate::cache::format::write_cache_file(
+            &cache_path,
+            &CacheWriteInput {
+                etag: Some(b"\"cached-etag-456\""),
+                payload_compressed: &compressed,
+                payload_len_decompressed: encoded.len(),
+            },
+        )
+        .await
+        .expect("seed cache");
+
+        // Server returns 304 Not Modified (ETag matches If-None-Match)
+        let (_handle, url) = start_mock_server(MockResponse::NotModified {
+            etag: "\"cached-etag-456\"",
+        });
+
+        let result = load_catalog_from_url(&cache_path, &url)
+            .await
+            .expect("sync should succeed");
+
+        // Verify source is NotModifiedCache (loaded from local file)
+        assert_eq!(result.source, CatalogLoadSource::NotModifiedCache);
+        let provider = result
+            .catalog
+            .lookup_provider("openai")
+            .expect("openai provider should exist");
+        assert_eq!(provider.api_type, ProviderType::OpenAiCompletions);
+    }
+
+    fn refused_local_url() -> String {
+        let listener = std::net::TcpListener::bind("127.0.0.1:0").expect("bind");
+        let port = listener.local_addr().expect("local addr").port();
+        drop(listener);
+        format!("http://127.0.0.1:{port}/api.json")
+    }
+
+    #[maybe_async::maybe_async]
+    async fn seed_cache(cache_path: &Path) {
+        let payload = CatalogCachePayload {
+            providers: vec![CachedProviderRow {
+                provider_key: "openai".to_string(),
+                api_url: "https://api.openai.com/v1".to_string(),
+                env_vars: vec!["OPENAI_API_KEY".to_string()],
+                api_type: ProviderType::OpenAiCompletions,
+            }],
+            models: vec![CachedModelRow {
+                provider_idx: ProviderIdx::new(0),
+                model_key: "gpt-4".to_string(),
+                modalities_bits: Modality::TEXT.bits(),
+                max_input: 8192,
+                max_output: 4096,
+                temperature: None,
+                top_p: None,
+            }],
+        };
+        let encoded = encode_cache_payload(&payload);
+        let compressed = zstd::bulk::compress(&encoded, 1).expect("compress");
+        crate::cache::format::write_cache_file(
+            cache_path,
+            &CacheWriteInput {
+                etag: Some(b"\"cached-etag-456\""),
+                payload_compressed: &compressed,
+                payload_len_decompressed: encoded.len(),
+            },
+        )
+        .await
+        .expect("seed cache");
+    }
+
+    #[test]
+    fn transient_status_detection_matches_retryable_responses() {
+        assert!(is_transient_status(StatusCode::TOO_MANY_REQUESTS));
+        assert!(is_transient_status(StatusCode::SERVICE_UNAVAILABLE));
+        assert!(!is_transient_status(StatusCode::NOT_FOUND));
+    }
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn sync_returns_fallback_cache_on_request_failure_with_valid_cache() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("test.cache");
+
+        seed_cache(&cache_path).await;
+
+        let result = load_catalog_from_url(&cache_path, &refused_local_url())
+            .await
+            .expect("fallback should succeed");
+
+        assert_eq!(result.source, CatalogLoadSource::FallbackCache);
+        assert!(result.catalog.lookup_provider("openai").is_some());
+    }
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn sync_returns_fallback_cache_on_transient_status_with_valid_cache() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("test.cache");
+
+        seed_cache(&cache_path).await;
+
+        let (_handle, url) = start_mock_server(MockResponse::Status {
+            code: 503,
+            reason: "Service Unavailable",
+        });
+
+        let result = load_catalog_from_url(&cache_path, &url)
+            .await
+            .expect("fallback should succeed");
+
+        assert_eq!(result.source, CatalogLoadSource::FallbackCache);
+        assert!(result.catalog.lookup_provider("openai").is_some());
+    }
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn sync_returns_cache_error_on_transient_status_with_corrupt_cache() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("corrupt.cache");
+
+        std::fs::write(&cache_path, [0_u8; 11]).expect("write corrupt cache");
+
+        let (_handle, url) = start_mock_server(MockResponse::Status {
+            code: 429,
+            reason: "Too Many Requests",
+        });
+
+        match load_catalog_from_url(&cache_path, &url).await {
+            Err(error) => assert!(matches!(error, CatalogError::CacheFormat(_))),
+            Ok(_) => panic!("transient status with corrupt cache should error"),
+        }
+    }
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn sync_returns_fallback_cache_on_body_read_failure_with_valid_cache() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("test.cache");
+
+        seed_cache(&cache_path).await;
+
+        let (_handle, url) = start_mock_server(MockResponse::PartialOk {
+            etag: "\"fresh-etag\"",
+            body: "{".to_string(),
+            content_length: 32,
+        });
+
+        let result = load_catalog_from_url(&cache_path, &url)
+            .await
+            .expect("fallback should succeed");
+
+        assert_eq!(result.source, CatalogLoadSource::FallbackCache);
+        assert!(result.catalog.lookup_provider("openai").is_some());
+    }
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn sync_returns_request_error_when_request_fails_without_cache() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("missing.cache");
+
+        match load_catalog_from_url(&cache_path, &refused_local_url()).await {
+            Err(error) => assert!(matches!(error, CatalogError::Reqwest(_))),
+            Ok(_) => panic!("request failure without cache should error"),
+        }
+    }
+
+    #[maybe_async::test(feature = "blocking", async(feature = "tokio", tokio::test))]
+    async fn sync_returns_cache_error_when_request_fails_with_corrupt_cache() {
+        let temp = TempDir::new().expect("tempdir");
+        let cache_path = temp.path().join("corrupt.cache");
+
+        std::fs::write(&cache_path, [0_u8; 11]).expect("write corrupt cache");
+
+        match load_catalog_from_url(&cache_path, &refused_local_url()).await {
+            Err(error) => assert!(matches!(error, CatalogError::CacheFormat(_))),
+            Ok(_) => panic!("request failure with corrupt cache should error"),
+        }
+    }
+}
diff --git a/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs b/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs
new file mode 100644
index 00000000..a7fec883
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/catalog/test_utils.rs
@@ -0,0 +1,108 @@
+use std::io::{BufRead, Write};
+
+pub enum MockResponse {
+    Ok {
+        etag: &'static str,
+        body: String,
+    },
+    PartialOk {
+        etag: &'static str,
+        body: String,
+        content_length: usize,
+    },
+    NotModified {
+        etag: &'static str,
+    },
+    Status {
+        code: u16,
+        reason: &'static str,
+    },
+}
+
+pub fn sample_api_json() -> &'static [u8] {
+    br#"
+        {
+            "openai": {
+                "id": "openai",
+                "npm": "@ai-sdk/openai",
+                "api": "https://api.openai.com/v1",
+                "env": ["OPENAI_API_KEY"],
+                "models": {
+                    "gpt-4": {
+                        "modalities": {
+                            "input": ["text"],
+                            "output": ["text"]
+                        },
+                        "limit": {
+                            "context": 8192,
+                            "input": 8192,
+                            "output": 4096
+                        }
+                    }
+                }
+            }
+        }
+        "#
+}
+
+pub fn start_mock_server(response: MockResponse) -> (std::thread::JoinHandle<()>, String) {
+    let listener = std::net::TcpListener::bind("127.0.0.1:0").expect("bind");
+    let port = listener.local_addr().unwrap().port();
+    let url = format!("http://127.0.0.1:{}/api.json", port);
+
+    let handle = std::thread::spawn(move || {
+        let (mut stream, _) = listener.accept().expect("accept");
+        let mut reader = std::io::BufReader::new(&stream);
+        let mut request = String::new();
+
+        loop {
+            let mut line = String::new();
+            if reader.read_line(&mut line).expect("read line") == 0 {
+                break;
+            }
+            if line == "\r\n" || line.is_empty() {
+                break;
+            }
+            request.push_str(&line);
+        }
+
+        let _has_if_none_match = request.contains("If-None-Match");
+
+        match response {
+            MockResponse::Ok { etag, body } => {
+                let response = format!(
+                    "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nETag: {}\r\nContent-Length: {}\r\n\r\n{}",
+                    etag,
+                    body.len(),
+                    body
+                );
+                stream.write_all(response.as_bytes()).expect("write");
+                stream.flush().expect("flush");
+            }
+            MockResponse::PartialOk {
+                etag,
+                body,
+                content_length,
+            } => {
+                let response = format!(
+                    "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nETag: {}\r\nContent-Length: {}\r\n\r\n{}",
+                    etag, content_length, body
+                );
+                stream.write_all(response.as_bytes()).expect("write");
+                stream.flush().expect("flush");
+            }
+            MockResponse::NotModified { etag } => {
+                let response = format!("HTTP/1.1 304 Not Modified\r\nETag: {}\r\n\r\n", etag);
+                stream.write_all(response.as_bytes()).expect("write");
+                stream.flush().expect("flush");
+            }
+            MockResponse::Status { code, reason } => {
+                let response = format!("HTTP/1.1 {code} {reason}\r\nContent-Length: 0\r\n\r\n");
+                stream.write_all(response.as_bytes()).expect("write");
+                stream.flush().expect("flush");
+            }
+        }
+    });
+
+    (handle, url)
+}
diff --git a/src/llm-coding-tools-models-dev/src/error.rs b/src/llm-coding-tools-models-dev/src/error.rs
new file mode 100644
index 00000000..3b3fdfb4
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/error.rs
@@ -0,0 +1,52 @@
+//! Error types for models.dev catalog operations.
+
+use llm_coding_tools_core::models::ModelCatalogBuildError;
+use thiserror::Error;
+
+/// Errors that can occur during catalog loading and synchronization.
+#[derive(Debug, Error)]
+pub enum CatalogError {
+    /// The platform's cache directory could not be determined.
+    #[error("cache directory not found on this platform")]
+    CachePathNotFound,
+
+    /// A configuration error occurred (e.g., invalid environment variable).
+    #[error("configuration error: {0}")]
+    Configuration(String),
+
+    /// An I/O error occurred while reading or writing the cache.
+    #[error("I/O error: {0}")]
+    Io(#[from] std::io::Error),
+
+    /// An HTTP error occurred during the sync request.
+    #[error("HTTP error: {0}")]
+    Reqwest(#[from] reqwest::Error),
+
+    /// A JSON parse error occurred while decoding models.dev API JSON.
+    #[error("JSON parse error: {0}")]
+    Json(#[from] serde_json::Error),
+
+    /// A zstd decompression error occurred.
+    #[error("decompression error: {0}")]
+    Zstd(String),
+
+    /// A bitcode deserialization error occurred.
+    #[error("decode error: {0}")]
+    BitcodeDecode(String),
+
+    /// The on-disk cache file is malformed or incompatible.
+    #[error("cache format error: {0}")]
+    CacheFormat(&'static str),
+
+    /// The catalog failed to build from source rows.
+    #[error("catalog build error: {0}")]
+    ModelCatalogBuild(#[from] ModelCatalogBuildError),
+
+    /// A spawn_blocking task failed.
+    #[cfg(feature = "tokio")]
+    #[error("blocking task failed: {0}")]
+    JoinHandle(#[from] tokio::task::JoinError),
+}
+
+/// Convenience type alias for catalog operations.
+pub type CatalogResult<T> = Result<T, CatalogError>;
diff --git a/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs
new file mode 100644
index 00000000..01252a9b
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/fs/blocking_impl.rs
@@ -0,0 +1,36 @@
+//! Blocking/sync filesystem operations.
+
+use std::io::{ErrorKind, Read as _};
+use std::path::Path;
+
+/// Reads a file into memory in one pre-sized allocation.
+///
+/// # Safety
+///
+/// We snapshot file length then call `read_exact`, which would miss data appended after
+/// the metadata call if the file grew mid-read. However, within this codebase all
+/// writes go to a temp file first, then rename to target — so files are never
+/// appended to in place.
+/// Therefore this race cannot occur.
+#[inline]
+pub(crate) fn read(path: impl AsRef<Path>) -> std::io::Result<Box<[u8]>> {
+    let mut file = std::fs::File::open(path)?;
+    let file_len_u64 = file.metadata()?.len();
+    let file_len = usize::try_from(file_len_u64).map_err(|_| {
+        std::io::Error::new(ErrorKind::InvalidData, "file is too large to fit in memory")
+    })?;
+
+    let mut bytes = super::alloc_uninit_u8_slice(file_len);
+    if file_len != 0 {
+        let buf = super::uninit_u8_slice_as_mut_bytes(&mut bytes);
+        file.read_exact(buf)?;
+    }
+
+    Ok(super::assume_init_u8_slice(bytes))
+}
+
+/// Creates a directory and all parent directories.
+#[inline]
+pub(crate) fn create_dir_all(path: impl AsRef<Path>) -> std::io::Result<()> {
+    std::fs::create_dir_all(path)
+}
diff --git a/src/llm-coding-tools-models-dev/src/fs/mod.rs b/src/llm-coding-tools-models-dev/src/fs/mod.rs
new file mode 100644
index 00000000..e08a6304
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/fs/mod.rs
@@ -0,0 +1,45 @@
+//! Filesystem abstraction layer.
+//!
+//! Provides unified APIs that work with both sync and async runtimes.
+//! Exactly one runtime feature must be enabled:
+//! - `tokio`: Async operations using the tokio runtime
+//! - `blocking`: Synchronous operations
+
+use std::mem::MaybeUninit;
+
+#[cfg(all(feature = "tokio", feature = "blocking"))]
+compile_error!("Features `tokio` and `blocking` are mutually exclusive.");
+
+#[cfg(not(any(feature = "tokio", feature = "blocking")))]
+compile_error!("Either `tokio` or `blocking` feature must be enabled for the fs module.");
+
+/// Allocates an uninitialized boxed byte slice with logical length `len`.
+#[inline]
+pub(crate) fn alloc_uninit_u8_slice(len: usize) -> Box<[MaybeUninit<u8>]> {
+    Box::<[u8]>::new_uninit_slice(len)
+}
+
+/// Views an uninitialized `u8` slice as mutable bytes for initialization.
+#[inline]
+pub(crate) fn uninit_u8_slice_as_mut_bytes(bytes: &mut [MaybeUninit<u8>]) -> &mut [u8] {
+    // SAFETY: `MaybeUninit<u8>` has identical layout to `u8`; caller only uses
+    // returned slice for writes before reading.
+    unsafe { std::slice::from_raw_parts_mut(bytes.as_mut_ptr().cast::<u8>(), bytes.len()) }
+}
+
+/// Converts a fully-initialized boxed uninitialized slice into initialized bytes.
+#[inline]
+pub(crate) fn assume_init_u8_slice(bytes: Box<[MaybeUninit<u8>]>) -> Box<[u8]> {
+    // SAFETY: caller guarantees all bytes were initialized.
+    unsafe { bytes.assume_init() }
+}
+
+#[cfg(feature = "tokio")]
+mod tokio_impl;
+#[cfg(feature = "tokio")]
+pub(crate) use tokio_impl::*;
+
+#[cfg(feature = "blocking")]
+mod blocking_impl;
+#[cfg(feature = "blocking")]
+pub(crate) use blocking_impl::*;
diff --git a/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs
new file mode 100644
index 00000000..29d04d2c
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/fs/tokio_impl.rs
@@ -0,0 +1,37 @@
+//! Tokio-based async filesystem operations.
+
+use std::io::ErrorKind;
+use std::path::Path;
+use tokio::io::AsyncReadExt as _;
+
+/// Reads a file into memory in one pre-sized allocation.
+///
+/// # Safety
+///
+/// We snapshot file length then call `read_exact`, which would miss data appended after
+/// the metadata call if the file grew mid-read. However, within this codebase all
+/// writes go to a temp file first, then rename to target — so files are never
+/// appended to in place.
+/// Therefore this race cannot occur.
+#[inline]
+pub(crate) async fn read(path: impl AsRef<Path>) -> std::io::Result<Box<[u8]>> {
+    let mut file = tokio::fs::File::open(path).await?;
+    let file_len_u64 = file.metadata().await?.len();
+    let file_len = usize::try_from(file_len_u64).map_err(|_| {
+        std::io::Error::new(ErrorKind::InvalidData, "file is too large to fit in memory")
+    })?;
+
+    let mut bytes = super::alloc_uninit_u8_slice(file_len);
+    if file_len != 0 {
+        let buf = super::uninit_u8_slice_as_mut_bytes(&mut bytes);
+        file.read_exact(buf).await?;
+    }
+
+    Ok(super::assume_init_u8_slice(bytes))
+}
+
+/// Creates a directory and all parent directories.
+#[inline]
+pub(crate) async fn create_dir_all(path: impl AsRef<Path>) -> std::io::Result<()> {
+    tokio::fs::create_dir_all(path).await
+}
diff --git a/src/llm-coding-tools-models-dev/src/lib.rs b/src/llm-coding-tools-models-dev/src/lib.rs
new file mode 100644
index 00000000..60fef51e
--- /dev/null
+++ b/src/llm-coding-tools-models-dev/src/lib.rs
@@ -0,0 +1,21 @@
+#![doc = include_str!(concat!("../", env!("CARGO_PKG_README")))]
+
+// Validate feature combinations at compile time.
+#[cfg(all(feature = "async", feature = "blocking"))]
+compile_error!("Features `async` and `blocking` are mutually exclusive.");
+
+#[cfg(not(any(feature = "async", feature = "blocking")))]
+compile_error!(concat!(
+    "Either an async runtime (e.g., `tokio`) or `blocking` feature ",
+    "must be enabled."
+));
+
+mod api;
+pub mod cache;
+pub mod catalog;
+pub mod error;
+mod fs;
+
+pub use cache::shared_cache_path;
+pub use catalog::{CatalogLoadResult, CatalogLoadSource, ModelsDevCatalog};
+pub use error::{CatalogError, CatalogResult};