From 5a030d053a549ea7fe24978947d276c09c55375d Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 17:20:37 -0400
Subject: [PATCH 01/18] feat(providers): add Vertex AI provider type

- Add vertex provider plugin with ANTHROPIC_VERTEX_PROJECT_ID credential
- Add vertex inference profile with Anthropic-compatible protocols
- Register vertex in provider registry and CLI
- Add vertex to supported inference provider types
- Fix scripts/podman.env to use correct env var names for local registry
- Update docs for simplified CLI install workflow

Known limitation: GCP OAuth authentication not yet implemented.
Vertex provider can be created and configured but API calls will fail
until OAuth token generation is added.
---
 crates/openshell-cli/src/main.rs              |  2 +
 crates/openshell-core/src/inference.rs        | 12 +++++
 crates/openshell-providers/src/lib.rs         |  2 +
 .../openshell-providers/src/providers/mod.rs  |  1 +
 .../src/providers/vertex.rs                   | 47 +++++++++++++++++++
 crates/openshell-server/src/inference.rs      |  2 +-
 docs/get-started/install-podman-macos.md      | 12 ++---
 scripts/podman.env                            | 10 +++-
 8 files changed, 78 insertions(+), 10 deletions(-)
 create mode 100644 crates/openshell-providers/src/providers/vertex.rs

diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs
index 0d546c7b1..5277ab805 100644
--- a/crates/openshell-cli/src/main.rs
+++ b/crates/openshell-cli/src/main.rs
@@ -615,6 +615,7 @@ enum CliProviderType {
     Gitlab,
     Github,
     Outlook,
+    Vertex,
 }
 
 #[derive(Clone, Debug, ValueEnum)]
@@ -646,6 +647,7 @@ impl CliProviderType {
             Self::Gitlab => "gitlab",
             Self::Github => "github",
             Self::Outlook => "outlook",
+            Self::Vertex => "vertex",
         }
     }
 }
diff --git a/crates/openshell-core/src/inference.rs b/crates/openshell-core/src/inference.rs
index a06c427f8..78fe72310 100644
--- a/crates/openshell-core/src/inference.rs
+++ b/crates/openshell-core/src/inference.rs
@@ -86,6 +86,16 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
     default_headers: &[],
 };
 
+static VERTEX_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
+    provider_type: "vertex",
+    default_base_url: "https://us-central1-aiplatform.googleapis.com/v1",
+    protocols: ANTHROPIC_PROTOCOLS,
+    credential_key_names: &["ANTHROPIC_VERTEX_PROJECT_ID"],
+    base_url_config_keys: &["ANTHROPIC_VERTEX_REGION", "VERTEX_BASE_URL"],
+    auth: AuthHeader::Custom("x-api-key"),
+    default_headers: &[("anthropic-version", "2023-06-01")],
+};
+
 /// Look up the inference provider profile for a given provider type.
 ///
 /// Returns `None` for provider types that don't support inference routing
@@ -95,6 +105,7 @@ pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProf
         "openai" => Some(&OPENAI_PROFILE),
         "anthropic" => Some(&ANTHROPIC_PROFILE),
         "nvidia" => Some(&NVIDIA_PROFILE),
+        "vertex" => Some(&VERTEX_PROFILE),
         _ => None,
     }
 }
@@ -176,6 +187,7 @@ mod tests {
         assert!(profile_for("openai").is_some());
         assert!(profile_for("anthropic").is_some());
         assert!(profile_for("nvidia").is_some());
+        assert!(profile_for("vertex").is_some());
         assert!(profile_for("OpenAI").is_some()); // case insensitive
     }
 
diff --git a/crates/openshell-providers/src/lib.rs b/crates/openshell-providers/src/lib.rs
index e2bcc0c09..2fa771950 100644
--- a/crates/openshell-providers/src/lib.rs
+++ b/crates/openshell-providers/src/lib.rs
@@ -86,6 +86,7 @@ impl ProviderRegistry {
         registry.register(providers::gitlab::GitlabProvider);
         registry.register(providers::github::GithubProvider);
         registry.register(providers::outlook::OutlookProvider);
+        registry.register(providers::vertex::VertexProvider);
         registry
     }
 
@@ -138,6 +139,7 @@ pub fn normalize_provider_type(input: &str) -> Option<&'static str> {
         "gitlab" | "glab" => Some("gitlab"),
         "github" | "gh" => Some("github"),
         "outlook" => Some("outlook"),
+        "vertex" => Some("vertex"),
         _ => None,
     }
 }
diff --git a/crates/openshell-providers/src/providers/mod.rs b/crates/openshell-providers/src/providers/mod.rs
index 6fe395135..19f9c54a5 100644
--- a/crates/openshell-providers/src/providers/mod.rs
+++ b/crates/openshell-providers/src/providers/mod.rs
@@ -12,3 +12,4 @@ pub mod nvidia;
 pub mod openai;
 pub mod opencode;
 pub mod outlook;
+pub mod vertex;
diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs
new file mode 100644
index 000000000..92e77002a
--- /dev/null
+++ b/crates/openshell-providers/src/providers/vertex.rs
@@ -0,0 +1,47 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::{
+    ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, discover_with_spec,
+};
+
+pub struct VertexProvider;
+
+pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec {
+    id: "vertex",
+    credential_env_vars: &["ANTHROPIC_VERTEX_PROJECT_ID"],
+};
+
+impl ProviderPlugin for VertexProvider {
+    fn id(&self) -> &'static str {
+        SPEC.id
+    }
+
+    fn discover_existing(&self) -> Result<Option<crate::DiscoveredProvider>, ProviderError> {
+        discover_with_spec(&SPEC, &RealDiscoveryContext)
+    }
+
+    fn credential_env_vars(&self) -> &'static [&'static str] {
+        SPEC.credential_env_vars
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::SPEC;
+    use crate::discover_with_spec;
+    use crate::test_helpers::MockDiscoveryContext;
+
+    #[test]
+    fn discovers_vertex_env_credentials() {
+        let ctx = MockDiscoveryContext::new()
+            .with_env("ANTHROPIC_VERTEX_PROJECT_ID", "my-gcp-project");
+        let discovered = discover_with_spec(&SPEC, &ctx)
+            .expect("discovery")
+            .expect("provider");
+        assert_eq!(
+            discovered.credentials.get("ANTHROPIC_VERTEX_PROJECT_ID"),
+            Some(&"my-gcp-project".to_string())
+        );
+    }
+}
diff --git a/crates/openshell-server/src/inference.rs b/crates/openshell-server/src/inference.rs
index 0fb29bde5..5d4014b7a 100644
--- a/crates/openshell-server/src/inference.rs
+++ b/crates/openshell-server/src/inference.rs
@@ -237,7 +237,7 @@ fn resolve_provider_route(provider: &Provider) -> Result<ResolvedProviderRoute,
     let profile = openshell_core::inference::profile_for(&provider_type).ok_or_else(|| {
         Status::invalid_argument(format!(
             "provider '{name}' has unsupported type '{provider_type}' for cluster inference \
-                 (supported: openai, anthropic, nvidia)",
+                 (supported: openai, anthropic, nvidia, vertex)",
             name = provider.name
         ))
     })?;
diff --git a/docs/get-started/install-podman-macos.md b/docs/get-started/install-podman-macos.md
index 3b744c026..8a847a547 100644
--- a/docs/get-started/install-podman-macos.md
+++ b/docs/get-started/install-podman-macos.md
@@ -35,9 +35,7 @@ brew install podman mise
 bash scripts/setup-podman-macos.sh
 source scripts/podman.env
 mise run cluster:build:full
-cargo build --release -p openshell-cli
-mkdir -p ~/.local/bin
-cp target/release/openshell ~/.local/bin/
+cargo install --path crates/openshell-cli --root ~/.local
 openshell sandbox create
 ```
 
@@ -72,7 +70,9 @@ source scripts/podman.env
 This sets:
 - `CONTAINER_HOST` - Podman socket path
 - `OPENSHELL_CONTAINER_RUNTIME=podman` - Use Podman runtime
-- `OPENSHELL_REGISTRY=127.0.0.1:5000/openshell` - Local registry for component images
+- `OPENSHELL_IMAGE_REPO_BASE=127.0.0.1:5000/openshell` - Local registry for component images
+- `OPENSHELL_REGISTRY_HOST=127.0.0.1:5000` - Registry host
+- `OPENSHELL_REGISTRY_INSECURE=true` - Allow HTTP registry
 - `OPENSHELL_CLUSTER_IMAGE=localhost/openshell/cluster:dev` - Local cluster image
 
 To make these persistent, add to your shell profile (`~/.zshrc` or `~/.bashrc`):
@@ -114,9 +114,7 @@ tasks/scripts/cluster-bootstrap.sh build
 For a release-optimized binary that works system-wide:
 
 ```console
-cargo build --release -p openshell-cli
-mkdir -p ~/.local/bin
-cp target/release/openshell ~/.local/bin/
+cargo install --path crates/openshell-cli --root ~/.local
 ```
 
 ## Create a Sandbox
diff --git a/scripts/podman.env b/scripts/podman.env
index 1e74a6b71..5aba469b2 100644
--- a/scripts/podman.env
+++ b/scripts/podman.env
@@ -21,13 +21,19 @@ if command -v podman &>/dev/null; then
         export OPENSHELL_CONTAINER_RUNTIME=podman
 
         # Local development image registry
-        export OPENSHELL_REGISTRY="127.0.0.1:5000/openshell"
+        export OPENSHELL_IMAGE_REPO_BASE="127.0.0.1:5000/openshell"
+        export OPENSHELL_REGISTRY_HOST="127.0.0.1:5000"
+        export OPENSHELL_REGISTRY_NAMESPACE="openshell"
+        export OPENSHELL_REGISTRY_ENDPOINT="host.containers.internal:5000"
+        export OPENSHELL_REGISTRY_INSECURE="true"
         export OPENSHELL_CLUSTER_IMAGE="localhost/openshell/cluster:dev"
 
         echo "✓ Podman environment configured:"
         echo "  CONTAINER_HOST=${CONTAINER_HOST}"
         echo "  OPENSHELL_CONTAINER_RUNTIME=${OPENSHELL_CONTAINER_RUNTIME}"
-        echo "  OPENSHELL_REGISTRY=${OPENSHELL_REGISTRY}"
+        echo "  OPENSHELL_IMAGE_REPO_BASE=${OPENSHELL_IMAGE_REPO_BASE}"
+        echo "  OPENSHELL_REGISTRY_HOST=${OPENSHELL_REGISTRY_HOST}"
+        echo "  OPENSHELL_REGISTRY_INSECURE=${OPENSHELL_REGISTRY_INSECURE}"
         echo "  OPENSHELL_CLUSTER_IMAGE=${OPENSHELL_CLUSTER_IMAGE}"
     fi
 else

From dc3690350254ac84c78873529a44af34bef78451 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 17:26:45 -0400
Subject: [PATCH 02/18] docs: clarify that cluster:build:full also starts the
 gateway

- Note that mise run cluster:build:full builds AND starts the gateway
- Add verification step after build completes
- Clarify that gateway is already running before sandbox creation
---
 docs/get-started/install-podman-macos.md | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/docs/get-started/install-podman-macos.md b/docs/get-started/install-podman-macos.md
index 8a847a547..661abada0 100644
--- a/docs/get-started/install-podman-macos.md
+++ b/docs/get-started/install-podman-macos.md
@@ -90,12 +90,13 @@ mise run cluster:build:full
 ```
 
 This command:
-- Builds the gateway image
+- Builds the gateway and cluster images
 - Starts a local container registry at `127.0.0.1:5000`
-- Builds the cluster image
-- Pushes images to the local registry
+- Pushes the gateway image to the local registry
 - Bootstraps a k3s cluster inside a Podman container
-- Deploys the OpenShell gateway
+- Deploys and starts the OpenShell gateway
+
+**Note:** This command builds the images AND starts the gateway in one step. The gateway will be running when the command completes.
 
 Or run the script directly:
 
@@ -119,10 +120,18 @@ cargo install --path crates/openshell-cli --root ~/.local
 
 ## Create a Sandbox
 
+The gateway is now running. Create a sandbox to test it:
+
 ```console
 openshell sandbox create
 ```
 
+Verify the gateway is healthy:
+
+```console
+openshell gateway info
+```
+
 ## Cleanup
 
 To remove all OpenShell resources and optionally the Podman machine:

From a6cc6a4bd2debaee2ad26506308772c8edc7e0c6 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 17:54:14 -0400
Subject: [PATCH 03/18] docs: add Vertex AI provider to inference and provider
 docs

- Add vertex to supported provider types table in manage-providers.md
- Add Vertex AI provider tab in inference configuration docs
- Clarify two usage modes: direct API calls vs inference.local routing
- Document prerequisites (GCP project, Application Default Credentials)
- Note OAuth limitation only affects inference routing, not direct calls
- Keep Vertex docs in provider/inference pages, not installation guides
---
 docs/inference/configure.md        | 21 +++++++++++++++++++++
 docs/sandboxes/manage-providers.md |  1 +
 2 files changed, 22 insertions(+)

diff --git a/docs/inference/configure.md b/docs/inference/configure.md
index 78065689e..4798bc09c 100644
--- a/docs/inference/configure.md
+++ b/docs/inference/configure.md
@@ -100,6 +100,27 @@ This reads `ANTHROPIC_API_KEY` from your environment.
 
 ::::
 
+::::{tab-item} Google Cloud Vertex AI
+
+```console
+$ export ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id
+$ openshell provider create --name vertex-claude --type vertex --from-existing
+```
+
+This reads `ANTHROPIC_VERTEX_PROJECT_ID` from your environment and makes it available inside sandboxes.
+
+**Prerequisites:**
+- Google Cloud project with Vertex AI API enabled
+- Application Default Credentials configured: `gcloud auth application-default login`
+
+**Usage:**
+- **Direct API calls:** Attach this provider to sandboxes to inject the project ID credential. Call Vertex AI directly from your code using the Anthropic SDK.
+- **Inference routing:** Configure `inference.local` to proxy requests to Vertex AI (see "Set Inference Routing" section below).
+
+**Known Limitation:** When using inference routing, GCP OAuth authentication is not yet fully implemented. The provider can be created and configured, but API calls through `inference.local` will fail until OAuth token generation is implemented. Direct API calls from sandbox code using the Anthropic SDK work if you handle authentication yourself.
+
+::::
+
 :::::
 
 ## Set Inference Routing
diff --git a/docs/sandboxes/manage-providers.md b/docs/sandboxes/manage-providers.md
index 6d35766bf..bd75b978f 100644
--- a/docs/sandboxes/manage-providers.md
+++ b/docs/sandboxes/manage-providers.md
@@ -179,6 +179,7 @@ The following provider types are supported.
 | `nvidia` | `NVIDIA_API_KEY` | NVIDIA API Catalog |
 | `openai` | `OPENAI_API_KEY` | Any OpenAI-compatible endpoint. Set `--config OPENAI_BASE_URL` to point to the provider. Refer to {doc}`/inference/configure`. |
 | `opencode` | `OPENCODE_API_KEY`, `OPENROUTER_API_KEY`, `OPENAI_API_KEY` | opencode tool |
+| `vertex` | `ANTHROPIC_VERTEX_PROJECT_ID` | Google Cloud Vertex AI with Claude models. Requires GCP Application Default Credentials. **Note:** OAuth authentication not yet fully implemented. |
 
 :::{tip}
 Use the `generic` type for any service not listed above. You define the

From 17bf43411f27258c0e3297b8fc2a8ed6c4a0aebc Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 18:45:53 -0400
Subject: [PATCH 04/18] feat(vertex): implement GCP OAuth authentication for
 Vertex AI

- Add gcp_auth dependency for OAuth token generation
- Generate OAuth tokens from Application Default Credentials in vertex provider
- Store tokens as VERTEX_OAUTH_TOKEN credential for router authentication
- Update inference profile to use Bearer auth with OAuth tokens
- Construct Vertex-specific URLs with :streamRawPredict endpoint
- Support project ID from credentials for URL construction
- Add model parameter to build_backend_url for Vertex routing
---
 Cargo.lock                                    | 38 +++++++++++++++
 crates/openshell-core/src/inference.rs        | 11 +++--
 crates/openshell-providers/Cargo.toml         |  2 +
 .../src/providers/vertex.rs                   | 48 +++++++++++++++++--
 crates/openshell-router/src/backend.rs        | 41 +++++++++++++---
 crates/openshell-server/src/inference.rs      | 28 ++++++++++-
 6 files changed, 153 insertions(+), 15 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 98797cc24..1e2b542ee 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1509,6 +1509,32 @@ dependencies = [
  "slab",
 ]
 
+[[package]]
+name = "gcp_auth"
+version = "0.12.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2b3d0b409a042a380111af38136310839af8ac1a0917fb6e84515ed1e4bf3ee"
+dependencies = [
+ "async-trait",
+ "base64 0.22.1",
+ "bytes",
+ "chrono",
+ "http",
+ "http-body-util",
+ "hyper",
+ "hyper-rustls",
+ "hyper-util",
+ "ring",
+ "rustls-pki-types",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "tokio",
+ "tracing",
+ "tracing-futures",
+ "url",
+]
+
 [[package]]
 name = "generic-array"
 version = "0.14.7"
@@ -2919,8 +2945,10 @@ dependencies = [
 name = "openshell-providers"
 version = "0.0.0"
 dependencies = [
+ "gcp_auth",
  "openshell-core",
  "thiserror 2.0.18",
+ "tokio",
 ]
 
 [[package]]
@@ -5378,6 +5406,16 @@ dependencies = [
  "valuable",
 ]
 
+[[package]]
+name = "tracing-futures"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2"
+dependencies = [
+ "pin-project",
+ "tracing",
+]
+
 [[package]]
 name = "tracing-log"
 version = "0.2.0"
diff --git a/crates/openshell-core/src/inference.rs b/crates/openshell-core/src/inference.rs
index 78fe72310..0973f25db 100644
--- a/crates/openshell-core/src/inference.rs
+++ b/crates/openshell-core/src/inference.rs
@@ -88,12 +88,15 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
 
 static VERTEX_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
     provider_type: "vertex",
+    // Base URL template - actual URL constructed at request time with project/region/model
     default_base_url: "https://us-central1-aiplatform.googleapis.com/v1",
     protocols: ANTHROPIC_PROTOCOLS,
-    credential_key_names: &["ANTHROPIC_VERTEX_PROJECT_ID"],
-    base_url_config_keys: &["ANTHROPIC_VERTEX_REGION", "VERTEX_BASE_URL"],
-    auth: AuthHeader::Custom("x-api-key"),
-    default_headers: &[("anthropic-version", "2023-06-01")],
+    // Look for OAuth token first, fallback to project ID (for manual config)
+    credential_key_names: &["VERTEX_OAUTH_TOKEN", "ANTHROPIC_VERTEX_PROJECT_ID"],
+    base_url_config_keys: &["VERTEX_BASE_URL", "ANTHROPIC_VERTEX_REGION"],
+    // Vertex uses OAuth Bearer tokens, not x-api-key
+    auth: AuthHeader::Bearer,
+    default_headers: &[("anthropic-version", "vertex-2023-10-16")],
 };
 
 /// Look up the inference provider profile for a given provider type.
diff --git a/crates/openshell-providers/Cargo.toml b/crates/openshell-providers/Cargo.toml
index 41f9ed6c0..0cf14ec2b 100644
--- a/crates/openshell-providers/Cargo.toml
+++ b/crates/openshell-providers/Cargo.toml
@@ -13,6 +13,8 @@ repository.workspace = true
 [dependencies]
 openshell-core = { path = "../openshell-core" }
 thiserror = { workspace = true }
+gcp_auth = "0.12"
+tokio = { workspace = true }
 
 [lints]
 workspace = true
diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs
index 92e77002a..ef7758670 100644
--- a/crates/openshell-providers/src/providers/vertex.rs
+++ b/crates/openshell-providers/src/providers/vertex.rs
@@ -2,7 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 
 use crate::{
-    ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, discover_with_spec,
+    DiscoveredProvider, ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext,
+    discover_with_spec,
 };
 
 pub struct VertexProvider;
@@ -12,13 +13,54 @@ pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec {
     credential_env_vars: &["ANTHROPIC_VERTEX_PROJECT_ID"],
 };
 
+// Additional config keys for Vertex AI
+const VERTEX_CONFIG_KEYS: &[&str] = &["ANTHROPIC_VERTEX_REGION"];
+
+/// Generate an OAuth token from GCP Application Default Credentials for Vertex AI.
+///
+/// Returns `None` if ADC is not configured or token generation fails.
+async fn generate_oauth_token() -> Option<String> {
+    // Try to find an appropriate token provider (checks ADC, service account, metadata server, etc.)
+    let provider = gcp_auth::provider().await.ok()?;
+
+    // Get token for Vertex AI scope
+    // Vertex AI uses the Cloud Platform scope
+    let scopes = &["https://www.googleapis.com/auth/cloud-platform"];
+    let token = provider.token(scopes).await.ok()?;
+
+    Some(token.as_str().to_string())
+}
+
 impl ProviderPlugin for VertexProvider {
     fn id(&self) -> &'static str {
         SPEC.id
     }
 
-    fn discover_existing(&self) -> Result<Option<crate::DiscoveredProvider>, ProviderError> {
-        discover_with_spec(&SPEC, &RealDiscoveryContext)
+    fn discover_existing(&self) -> Result<Option<DiscoveredProvider>, ProviderError> {
+        let mut discovered = discover_with_spec(&SPEC, &RealDiscoveryContext)?;
+
+        // Add region config if present
+        if let Some(ref mut provider) = discovered {
+            for &key in VERTEX_CONFIG_KEYS {
+                if let Ok(value) = std::env::var(key) {
+                    provider.config.insert(key.to_string(), value);
+                }
+            }
+
+            // Generate OAuth token from Application Default Credentials
+            // This replaces the project ID credential with an actual OAuth token
+            // that can be used for API authentication
+            let rt = tokio::runtime::Runtime::new()
+                .map_err(|e| ProviderError::UnsupportedProvider(format!("failed to create tokio runtime: {e}")))?;
+
+            if let Some(token) = rt.block_on(generate_oauth_token()) {
+                // Store the OAuth token as VERTEX_OAUTH_TOKEN
+                // The inference router will use this as the Bearer token
+                provider.credentials.insert("VERTEX_OAUTH_TOKEN".to_string(), token);
+            }
+        }
+
+        Ok(discovered)
     }
 
     fn credential_env_vars(&self) -> &'static [&'static str] {
diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs
index d1d7092c0..3698441f7 100644
--- a/crates/openshell-router/src/backend.rs
+++ b/crates/openshell-router/src/backend.rs
@@ -95,7 +95,7 @@ async fn send_backend_request(
     headers: Vec<(String, String)>,
     body: bytes::Bytes,
 ) -> Result<reqwest::Response, RouterError> {
-    let url = build_backend_url(&route.endpoint, path);
+    let url = build_backend_url(&route.endpoint, path, &route.model);
 
     let reqwest_method: reqwest::Method = method
         .parse()
@@ -241,7 +241,7 @@ pub async fn verify_backend_endpoint(
 
     if mock::is_mock_route(route) {
         return Ok(ValidatedEndpoint {
-            url: build_backend_url(&route.endpoint, probe.path),
+            url: build_backend_url(&route.endpoint, probe.path, &route.model),
             protocol: probe.protocol.to_string(),
         });
     }
@@ -306,7 +306,7 @@ async fn try_validation_request(
                 details,
             },
         })?;
-    let url = build_backend_url(&route.endpoint, path);
+    let url = build_backend_url(&route.endpoint, path, &route.model);
 
     if response.status().is_success() {
         return Ok(ValidatedEndpoint {
@@ -418,8 +418,23 @@ pub async fn proxy_to_backend_streaming(
     })
 }
 
-fn build_backend_url(endpoint: &str, path: &str) -> String {
+fn build_backend_url(endpoint: &str, path: &str, model: &str) -> String {
     let base = endpoint.trim_end_matches('/');
+
+    // Special handling for Vertex AI
+    if base.contains("aiplatform.googleapis.com") && path.starts_with("/v1/messages") {
+        // Vertex AI uses a different path structure:
+        // https://{region}-aiplatform.googleapis.com/v1/projects/{project}/locations/{region}/publishers/anthropic/models/{model}:streamRawPredict
+        // The base already has everything up to /models, so we append /{model}:streamRawPredict
+        let model_suffix = if model.is_empty() {
+            String::new()
+        } else {
+            format!("/{}", model)
+        };
+        return format!("{}{}:streamRawPredict", base, model_suffix);
+    }
+
+    // Deduplicate /v1 prefix for standard endpoints
     if base.ends_with("/v1") && (path == "/v1" || path.starts_with("/v1/")) {
         return format!("{base}{}", &path[3..]);
     }
@@ -438,7 +453,7 @@ mod tests {
     #[test]
     fn build_backend_url_dedupes_v1_prefix() {
         assert_eq!(
-            build_backend_url("https://api.openai.com/v1", "/v1/chat/completions"),
+            build_backend_url("https://api.openai.com/v1", "/v1/chat/completions", "gpt-4"),
             "https://api.openai.com/v1/chat/completions"
         );
     }
@@ -446,15 +461,27 @@ mod tests {
     #[test]
     fn build_backend_url_preserves_non_versioned_base() {
         assert_eq!(
-            build_backend_url("https://api.anthropic.com", "/v1/messages"),
+            build_backend_url("https://api.anthropic.com", "/v1/messages", "claude-3"),
             "https://api.anthropic.com/v1/messages"
         );
     }
 
+    #[test]
+    fn build_backend_url_handles_vertex_ai() {
+        assert_eq!(
+            build_backend_url(
+                "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1/publishers/anthropic/models",
+                "/v1/messages",
+                "claude-3-5-sonnet-20241022"
+            ),
+            "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1/publishers/anthropic/models/claude-3-5-sonnet-20241022:streamRawPredict"
+        );
+    }
+
     #[test]
     fn build_backend_url_handles_exact_v1_path() {
         assert_eq!(
-            build_backend_url("https://api.openai.com/v1", "/v1"),
+            build_backend_url("https://api.openai.com/v1", "/v1", "gpt-4"),
             "https://api.openai.com/v1"
         );
     }
diff --git a/crates/openshell-server/src/inference.rs b/crates/openshell-server/src/inference.rs
index 5d4014b7a..5faa30518 100644
--- a/crates/openshell-server/src/inference.rs
+++ b/crates/openshell-server/src/inference.rs
@@ -250,11 +250,37 @@ fn resolve_provider_route(provider: &Provider) -> Result<ResolvedProviderRoute,
             ))
         })?;
 
-    let base_url = find_provider_config_value(provider, profile.base_url_config_keys)
+    let mut base_url = find_provider_config_value(provider, profile.base_url_config_keys)
         .unwrap_or_else(|| profile.default_base_url.to_string())
         .trim()
         .to_string();
 
+    // For Vertex AI, construct the base URL with project ID and region
+    if provider_type == "vertex" {
+        let region = provider
+            .config
+            .get("ANTHROPIC_VERTEX_REGION")
+            .map(|s| s.as_str())
+            .unwrap_or("us-central1");
+
+        // Get project ID - if we have an OAuth token, we still need the project ID for URL construction
+        let project_id = provider
+            .credentials
+            .get("ANTHROPIC_VERTEX_PROJECT_ID")
+            .ok_or_else(|| {
+                Status::invalid_argument(format!(
+                    "provider '{}' missing ANTHROPIC_VERTEX_PROJECT_ID credential for Vertex AI URL construction",
+                    provider.name
+                ))
+            })?;
+
+        // Construct Vertex AI base URL: https://{region}-aiplatform.googleapis.com/v1/projects/{project}/locations/{region}/publishers/anthropic/models
+        base_url = format!(
+            "https://{}-aiplatform.googleapis.com/v1/projects/{}/locations/{}/publishers/anthropic/models",
+            region, project_id, region
+        );
+    }
+
     if base_url.is_empty() {
         return Err(Status::invalid_argument(format!(
             "provider '{name}' resolved to empty base_url",

From 5ac42babef783f00ab82d7f6eb1c8ec403842f3f Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 18:50:57 -0400
Subject: [PATCH 05/18] fix(vertex): use separate thread for OAuth token
 generation

Avoid tokio runtime nesting panic by spawning OAuth token generation
in a separate OS thread with its own runtime. This allows provider
discovery to work when called from within an existing tokio context.
---
 .../openshell-providers/src/providers/vertex.rs | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs
index ef7758670..0669c8067 100644
--- a/crates/openshell-providers/src/providers/vertex.rs
+++ b/crates/openshell-providers/src/providers/vertex.rs
@@ -48,12 +48,17 @@ impl ProviderPlugin for VertexProvider {
             }
 
             // Generate OAuth token from Application Default Credentials
-            // This replaces the project ID credential with an actual OAuth token
-            // that can be used for API authentication
-            let rt = tokio::runtime::Runtime::new()
-                .map_err(|e| ProviderError::UnsupportedProvider(format!("failed to create tokio runtime: {e}")))?;
-
-            if let Some(token) = rt.block_on(generate_oauth_token()) {
+            // Try to generate token, but don't fail if we're in a nested runtime context
+            let token = std::thread::spawn(|| {
+                tokio::runtime::Runtime::new()
+                    .ok()
+                    .and_then(|rt| rt.block_on(generate_oauth_token()))
+            })
+            .join()
+            .ok()
+            .flatten();
+
+            if let Some(token) = token {
                 // Store the OAuth token as VERTEX_OAUTH_TOKEN
                 // The inference router will use this as the Bearer token
                 provider.credentials.insert("VERTEX_OAUTH_TOKEN".to_string(), token);

From f606dc37cf261ab29461a2da659bfc94a2a11c8f Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 19:17:58 -0400
Subject: [PATCH 06/18] feat(scripts): improve cleanup script with sandbox
 deletion and better ordering

- Delete all sandboxes before destroying gateway
- Explicitly stop and remove cluster and registry containers by name
- Remove images by specific tags (localhost/openshell/*)
- Run cargo clean for build artifacts
- Add reinstall instructions to completion message
- Better error handling with 2>/dev/null redirects
---
 cleanup-openshell-podman-macos.sh | 46 +++++++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 6 deletions(-)

diff --git a/cleanup-openshell-podman-macos.sh b/cleanup-openshell-podman-macos.sh
index 43efd8dd5..d6b80a411 100755
--- a/cleanup-openshell-podman-macos.sh
+++ b/cleanup-openshell-podman-macos.sh
@@ -11,19 +11,43 @@ set -e
 echo "=== OpenShell Podman Cleanup Script ==="
 echo ""
 
+# Delete all sandboxes first (before destroying gateway)
+echo "Deleting all sandboxes..."
+if command -v openshell &>/dev/null; then
+    # Get list of sandboxes and delete each one
+    openshell sandbox list --no-header 2>/dev/null | awk '{print $1}' | while read -r sandbox; do
+        if [ -n "$sandbox" ]; then
+            echo "  Deleting sandbox: $sandbox"
+            openshell sandbox delete "$sandbox" 2>/dev/null || true
+        fi
+    done
+fi
+
 # Destroy OpenShell gateway (if it exists)
 echo "Destroying OpenShell gateway..."
 if command -v openshell &>/dev/null; then
     openshell gateway destroy --name openshell 2>/dev/null || true
 fi
 
-# Stop and remove any running OpenShell containers
-echo "Stopping OpenShell containers..."
-podman ps -a | grep openshell | awk '{print $1}' | xargs -r podman rm -f || true
+# Stop and remove cluster container
+echo "Stopping cluster container..."
+podman stop openshell-cluster-openshell 2>/dev/null || true
+podman rm openshell-cluster-openshell 2>/dev/null || true
+
+# Stop and remove local registry container
+echo "Stopping local registry..."
+podman stop openshell-local-registry 2>/dev/null || true
+podman rm openshell-local-registry 2>/dev/null || true
+
+# Stop and remove any other OpenShell containers
+echo "Cleaning up remaining OpenShell containers..."
+podman ps -a | grep openshell | awk '{print $1}' | xargs -r podman rm -f 2>/dev/null || true
 
 # Remove OpenShell images
 echo "Removing OpenShell images..."
-podman images | grep -E "openshell|cluster" | awk '{print $3}' | xargs -r podman rmi -f || true
+podman rmi localhost/openshell/cluster:dev 2>/dev/null || true
+podman rmi localhost/openshell/gateway:dev 2>/dev/null || true
+podman images | grep -E "openshell|127.0.0.1:5000/openshell" | awk '{print $3}' | xargs -r podman rmi -f 2>/dev/null || true
 
 # Remove CLI binary
 echo "Removing CLI binary..."
@@ -41,8 +65,11 @@ rm -rf ~/.openshell
 echo "Removing build artifacts..."
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 cd "$SCRIPT_DIR"
-rm -rf target/
-rm -rf deploy/docker/.build/
+if command -v cargo &>/dev/null; then
+    echo "  Running cargo clean..."
+    cargo clean 2>/dev/null || true
+fi
+rm -rf deploy/docker/.build/ 2>/dev/null || true
 
 # Clean Podman cache
 echo "Cleaning Podman build cache..."
@@ -51,6 +78,13 @@ podman system prune -af --volumes
 echo ""
 echo "=== Cleanup Complete ==="
 echo ""
+echo "OpenShell containers, images, and configuration have been removed."
+echo ""
+echo "To reinstall OpenShell:"
+echo "  1. source scripts/podman.env"
+echo "  2. mise run cluster:build:full"
+echo "  3. cargo install --path crates/openshell-cli --root ~/.local"
+echo ""
 echo "To completely remove the OpenShell Podman machine:"
 echo "  podman machine stop openshell"
 echo "  podman machine rm openshell"

From d36e58b21ff50f5b410b6edb011cefe55ca27322 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 19:45:17 -0400
Subject: [PATCH 07/18] feat(sandbox): inject Vertex AI credentials as actual
 environment variables

Add selective direct injection for provider credentials that need to be
accessible as real environment variables (not placeholders). This allows
tools like `claude` CLI to read Vertex AI credentials directly.

Changes:
- Add direct_inject_credentials() list for credentials requiring direct access
- Modify from_provider_env() to support selective direct injection
- Inject ANTHROPIC_VERTEX_PROJECT_ID, VERTEX_OAUTH_TOKEN, and
  ANTHROPIC_VERTEX_REGION as actual values instead of placeholders
- Other credentials continue using openshell:resolve:env:* placeholders
  for HTTP proxy resolution

Security note: Directly injected credentials are visible via /proc/*/environ,
unlike placeholder-based credentials which are only resolved within HTTP
requests. Only credentials essential for CLI tool compatibility are included.
---
 crates/openshell-sandbox/src/secrets.rs | 53 +++++++++++++++++++++++--
 1 file changed, 49 insertions(+), 4 deletions(-)

diff --git a/crates/openshell-sandbox/src/secrets.rs b/crates/openshell-sandbox/src/secrets.rs
index a27537c91..233056f07 100644
--- a/crates/openshell-sandbox/src/secrets.rs
+++ b/crates/openshell-sandbox/src/secrets.rs
@@ -10,6 +10,25 @@ const PLACEHOLDER_PREFIX: &str = "openshell:resolve:env:";
 /// Public access to the placeholder prefix for fail-closed scanning in other modules.
 pub(crate) const PLACEHOLDER_PREFIX_PUBLIC: &str = PLACEHOLDER_PREFIX;
 
+/// Credentials that should be injected as actual values into the sandbox environment
+/// instead of being converted to placeholders.
+///
+/// These credentials are needed by tools (like `claude` CLI) that read environment
+/// variables directly rather than making HTTP requests through the proxy.
+///
+/// **Security consideration**: These values are visible to all sandbox processes via
+/// `/proc/<pid>/environ`, unlike placeholder-based credentials which are only resolved
+/// within HTTP requests. Only include credentials here when direct env var access is
+/// required for tool compatibility.
+fn direct_inject_credentials() -> &'static [&'static str] {
+    &[
+        // Vertex AI credentials for claude CLI
+        "ANTHROPIC_VERTEX_PROJECT_ID",
+        "VERTEX_OAUTH_TOKEN",
+        "ANTHROPIC_VERTEX_REGION",
+    ]
+}
+
 /// Characters that are valid in an env var key name (used to extract
 /// placeholder boundaries within concatenated strings like path segments).
 fn is_env_key_char(b: u8) -> bool {
@@ -69,6 +88,19 @@ pub struct SecretResolver {
 impl SecretResolver {
     pub(crate) fn from_provider_env(
         provider_env: HashMap<String, String>,
+    ) -> (HashMap<String, String>, Option<Self>) {
+        Self::from_provider_env_with_direct_inject(provider_env, &direct_inject_credentials())
+    }
+
+    /// Create a resolver from provider environment with selective direct injection.
+    ///
+    /// Credentials matching keys in `direct_inject` are injected as actual values
+    /// into the child environment (for tools like `claude` CLI that need real env vars).
+    /// All other credentials are converted to `openshell:resolve:env:*` placeholders
+    /// that get resolved by the HTTP proxy.
+    pub(crate) fn from_provider_env_with_direct_inject(
+        provider_env: HashMap<String, String>,
+        direct_inject: &[&str],
     ) -> (HashMap<String, String>, Option<Self>) {
         if provider_env.is_empty() {
             return (HashMap::new(), None);
@@ -78,12 +110,25 @@ impl SecretResolver {
         let mut by_placeholder = HashMap::with_capacity(provider_env.len());
 
         for (key, value) in provider_env {
-            let placeholder = placeholder_for_env_key(&key);
-            child_env.insert(key, placeholder.clone());
-            by_placeholder.insert(placeholder, value);
+            // Check if this credential should be injected directly
+            if direct_inject.contains(&key.as_str()) {
+                // Direct injection: put actual value in environment
+                child_env.insert(key, value);
+            } else {
+                // Placeholder: will be resolved by HTTP proxy
+                let placeholder = placeholder_for_env_key(&key);
+                child_env.insert(key, placeholder.clone());
+                by_placeholder.insert(placeholder, value);
+            }
         }
 
-        (child_env, Some(Self { by_placeholder }))
+        let resolver = if by_placeholder.is_empty() {
+            None
+        } else {
+            Some(Self { by_placeholder })
+        };
+
+        (child_env, resolver)
     }
 
     /// Resolve a placeholder string to the real secret value.

From 2dd3438a165a898bf3ff8c72aabbfbabab231dd9 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 20:03:59 -0400
Subject: [PATCH 08/18] feat(vertex): auto-inject CLAUDE_CODE_USE_VERTEX for
 claude CLI

- Add CLAUDE_CODE_USE_VERTEX to direct injection list
- Automatically set CLAUDE_CODE_USE_VERTEX=1 in Vertex provider credentials
- Enables claude CLI to auto-detect Vertex AI without manual config

Now sandboxes with Vertex provider will automatically have:
- ANTHROPIC_VERTEX_PROJECT_ID (from env)
- VERTEX_OAUTH_TOKEN (generated from GCP ADC)
- CLAUDE_CODE_USE_VERTEX=1 (auto-set)

The claude CLI can now use Vertex AI with zero manual configuration.
---
 crates/openshell-providers/src/providers/vertex.rs | 4 ++++
 crates/openshell-sandbox/src/secrets.rs            | 1 +
 2 files changed, 5 insertions(+)

diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs
index 0669c8067..6daadd5f9 100644
--- a/crates/openshell-providers/src/providers/vertex.rs
+++ b/crates/openshell-providers/src/providers/vertex.rs
@@ -47,6 +47,10 @@ impl ProviderPlugin for VertexProvider {
                 }
             }
 
+            // Set CLAUDE_CODE_USE_VERTEX=1 to enable Vertex AI in claude CLI
+            // Must be in credentials (not config) to be injected into sandbox environment
+            provider.credentials.insert("CLAUDE_CODE_USE_VERTEX".to_string(), "1".to_string());
+
             // Generate OAuth token from Application Default Credentials
             // Try to generate token, but don't fail if we're in a nested runtime context
             let token = std::thread::spawn(|| {
diff --git a/crates/openshell-sandbox/src/secrets.rs b/crates/openshell-sandbox/src/secrets.rs
index 233056f07..0cd188b6e 100644
--- a/crates/openshell-sandbox/src/secrets.rs
+++ b/crates/openshell-sandbox/src/secrets.rs
@@ -26,6 +26,7 @@ fn direct_inject_credentials() -> &'static [&'static str] {
         "ANTHROPIC_VERTEX_PROJECT_ID",
         "VERTEX_OAUTH_TOKEN",
         "ANTHROPIC_VERTEX_REGION",
+        "CLAUDE_CODE_USE_VERTEX",
     ]
 }
 

From bc3342de1a58a54550b8a5c2360528c561111e94 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 20:06:01 -0400
Subject: [PATCH 09/18] feat(podman): increase default memory to 12 GB for
 better build performance

- Change Podman machine default memory from 8 GB to 12 GB
- Update documentation to reflect 12 GB default
- Update troubleshooting to suggest 16 GB for build issues

12 GB provides better performance for Rust compilation and reduces
out-of-memory issues during parallel builds.
---
 docs/get-started/install-podman-macos.md | 6 +++---
 scripts/setup-podman-macos.sh            | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/get-started/install-podman-macos.md b/docs/get-started/install-podman-macos.md
index 661abada0..648f11564 100644
--- a/docs/get-started/install-podman-macos.md
+++ b/docs/get-started/install-podman-macos.md
@@ -51,7 +51,7 @@ brew install podman mise
 
 The `scripts/setup-podman-macos.sh` script automates Podman Machine configuration:
 
-- Creates a dedicated `openshell` Podman machine (8 GB RAM, 4 CPUs)
+- Creates a dedicated `openshell` Podman machine (12 GB RAM, 4 CPUs)
 - Configures cgroup delegation (required for the embedded k3s cluster)
 - Stops conflicting machines (only one can run at a time, with user confirmation)
 
@@ -161,11 +161,11 @@ openshell sandbox create
 
 ### Build fails with memory errors
 
-Increase the Podman machine memory allocation:
+Increase the Podman machine memory allocation (default is 12 GB):
 
 ```console
 podman machine stop openshell
-podman machine set openshell --memory 8192
+podman machine set openshell --memory 16384
 podman machine start openshell
 ```
 
diff --git a/scripts/setup-podman-macos.sh b/scripts/setup-podman-macos.sh
index 1538259f3..979a51e3e 100755
--- a/scripts/setup-podman-macos.sh
+++ b/scripts/setup-podman-macos.sh
@@ -9,7 +9,7 @@
 set -euo pipefail
 
 MACHINE_NAME="${PODMAN_MACHINE_NAME:-openshell}"
-MEMORY="${PODMAN_MEMORY:-8192}"
+MEMORY="${PODMAN_MEMORY:-12288}"
 CPUS="${PODMAN_CPUS:-4}"
 
 echo "=== OpenShell Podman Setup for macOS ==="

From b08de19e134b32147a7eb56b7eb7edfe134fea47 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 20:19:40 -0400
Subject: [PATCH 10/18] fix(scripts): update CLI installation command in setup
 script

Replace manual 'cargo build + cp' with 'cargo install --path'
Add verification step with 'openshell gateway info'
Keep correct 'mise run cluster:build:full' command
---
 scripts/setup-podman-macos.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/setup-podman-macos.sh b/scripts/setup-podman-macos.sh
index 979a51e3e..02fdf2343 100755
--- a/scripts/setup-podman-macos.sh
+++ b/scripts/setup-podman-macos.sh
@@ -108,9 +108,9 @@ echo "Podman machine '${MACHINE_NAME}' is ready!"
 echo ""
 echo "Next steps:"
 echo "  1. Set up environment: source scripts/podman.env"
-echo "  2. Build and deploy: mise run cluster:build:full"
-echo "  3. Build CLI: cargo build --release -p openshell-cli"
-echo "  4. Install CLI: cp target/release/openshell ~/.local/bin/"
+echo "  2. Build and deploy cluster: mise run cluster:build:full"
+echo "  3. Install CLI: cargo install --path crates/openshell-cli --root ~/.local"
+echo "  4. Verify installation: openshell gateway info"
 echo ""
 echo "To make the environment persistent, add to your shell profile (~/.zshrc):"
 echo "  source $(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/scripts/podman.env"

From b56828e9efea9a60bd6e4e1b5cf7499373ec9ae1 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 23:15:18 -0400
Subject: [PATCH 11/18] fix(router): remove model field from Vertex AI request
 bodies

Vertex AI's :streamRawPredict endpoint expects the model in the URL
path, not in the request body. The router was incorrectly inserting
the model field, causing "Extra inputs are not permitted" errors.

Changes:
- Router now detects Vertex AI endpoints and removes model field
- Added bash 3 compatibility fix for cluster-deploy-fast.sh
- Added scripts/rebuild-cluster.sh for development workflow
- Updated documentation for Vertex AI setup and rebuild process

Fixes inference routing to Vertex AI via inference.local endpoint.
---
 CONTRIBUTING.md                          | 17 ++++++++++++
 crates/openshell-router/src/backend.rs   | 19 ++++++++++---
 docs/get-started/install-podman-macos.md | 25 ++++++++++++++++-
 docs/inference/configure.md              | 15 ++++++-----
 docs/sandboxes/manage-providers.md       |  3 ++-
 scripts/rebuild-cluster.sh               | 34 ++++++++++++++++++++++++
 tasks/scripts/cluster-deploy-fast.sh     | 19 ++++++++++++-
 7 files changed, 119 insertions(+), 13 deletions(-)
 create mode 100755 scripts/rebuild-cluster.sh

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 19a398a32..d759863a8 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -176,6 +176,23 @@ These are the primary `mise` tasks for day-to-day development:
 | `mise run docs`    | Build and serve documentation locally                   |
 | `mise run clean`   | Clean build artifacts                                   |
 
+## Rebuilding After Code Changes
+
+When developing OpenShell core components (gateway, router, sandbox supervisor), you need to rebuild the cluster to test your changes:
+
+```bash
+bash scripts/rebuild-cluster.sh
+```
+
+This script stops the cluster, rebuilds the image with your changes, and restarts it.
+
+**After rebuilding:**
+- Providers need to be recreated (gateway database was reset)
+- Inference routing needs to be reconfigured
+- Sandboxes need to be recreated
+
+For a complete cleanup, see the cleanup scripts in the `scripts/` directory.
+
 ## Project Structure
 
 | Path            | Purpose                                       |
diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs
index 3698441f7..9b5d1a000 100644
--- a/crates/openshell-router/src/backend.rs
+++ b/crates/openshell-router/src/backend.rs
@@ -137,13 +137,24 @@ async fn send_backend_request(
 
     // Set the "model" field in the JSON body to the route's configured model so the
     // backend receives the correct model ID regardless of what the client sent.
+    //
+    // Exception: Vertex AI's :streamRawPredict endpoint expects the model in the URL
+    // path (already handled in build_backend_url), not in the request body.
+    let is_vertex_ai = route.endpoint.contains("aiplatform.googleapis.com");
+
     let body = match serde_json::from_slice::<serde_json::Value>(&body) {
         Ok(mut json) => {
             if let Some(obj) = json.as_object_mut() {
-                obj.insert(
-                    "model".to_string(),
-                    serde_json::Value::String(route.model.clone()),
-                );
+                if is_vertex_ai {
+                    // Remove model field for Vertex AI (it's in the URL path)
+                    obj.remove("model");
+                } else {
+                    // Insert/override model field for standard backends
+                    obj.insert(
+                        "model".to_string(),
+                        serde_json::Value::String(route.model.clone()),
+                    );
+                }
             }
             bytes::Bytes::from(serde_json::to_vec(&json).unwrap_or_else(|_| body.to_vec()))
         }
diff --git a/docs/get-started/install-podman-macos.md b/docs/get-started/install-podman-macos.md
index 648f11564..abc0a3ac6 100644
--- a/docs/get-started/install-podman-macos.md
+++ b/docs/get-started/install-podman-macos.md
@@ -132,14 +132,37 @@ Verify the gateway is healthy:
 openshell gateway info
 ```
 
+## Rebuilding After Code Changes
+
+If you're developing OpenShell and need to test code changes, use the rebuild script:
+
+```console
+bash scripts/rebuild-cluster.sh
+```
+
+This stops the cluster, removes the old image, rebuilds with your changes, and restarts. After rebuilding:
+1. Recreate providers (gateway database was reset)
+2. Reconfigure inference routing if needed
+3. Recreate sandboxes
+
 ## Cleanup
 
-To remove all OpenShell resources and optionally the Podman machine:
+### Quick Rebuild (Development)
+
+```console
+bash scripts/rebuild-cluster.sh
+```
+
+Rebuilds the cluster with latest code changes. Use this during development.
+
+### Full Cleanup (Start Fresh)
 
 ```console
 bash cleanup-openshell-podman-macos.sh
 ```
 
+Removes all OpenShell resources and optionally the Podman machine. Use this to completely reset your installation.
+
 ## Troubleshooting
 
 ### Environment variables not set
diff --git a/docs/inference/configure.md b/docs/inference/configure.md
index 4798bc09c..e13567135 100644
--- a/docs/inference/configure.md
+++ b/docs/inference/configure.md
@@ -104,23 +104,26 @@ This reads `ANTHROPIC_API_KEY` from your environment.
 
 ```console
 $ export ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id
-$ openshell provider create --name vertex-claude --type vertex --from-existing
+$ export ANTHROPIC_VERTEX_REGION=us-east5  # Optional, defaults to us-central1
+$ openshell provider create --name vertex --type vertex --from-existing
 ```
 
-This reads `ANTHROPIC_VERTEX_PROJECT_ID` from your environment and makes it available inside sandboxes.
+This reads `ANTHROPIC_VERTEX_PROJECT_ID` and `ANTHROPIC_VERTEX_REGION` from your environment and automatically generates OAuth tokens from GCP Application Default Credentials.
 
 **Prerequisites:**
-- Google Cloud project with Vertex AI API enabled
+- Google Cloud project with Vertex AI API enabled and Claude models available
 - Application Default Credentials configured: `gcloud auth application-default login`
+- The `~/.config/gcloud/` directory must be uploaded to sandboxes for OAuth token refresh
 
 **Usage:**
-- **Direct API calls:** Attach this provider to sandboxes to inject the project ID credential. Call Vertex AI directly from your code using the Anthropic SDK.
-- **Inference routing:** Configure `inference.local` to proxy requests to Vertex AI (see "Set Inference Routing" section below).
+- **Direct API calls:** Tools like `claude` CLI automatically use Vertex AI when `CLAUDE_CODE_USE_VERTEX=1` is set
+- **Inference routing:** Configure `inference.local` to proxy requests to Vertex AI (see "Set Inference Routing" section below)
 
-**Known Limitation:** When using inference routing, GCP OAuth authentication is not yet fully implemented. The provider can be created and configured, but API calls through `inference.local` will fail until OAuth token generation is implemented. Direct API calls from sandbox code using the Anthropic SDK work if you handle authentication yourself.
+**Model ID Format:** Use `@` separator for versions (e.g., `claude-sonnet-4-5@20250929`)
 
 ::::
 
+
 :::::
 
 ## Set Inference Routing
diff --git a/docs/sandboxes/manage-providers.md b/docs/sandboxes/manage-providers.md
index bd75b978f..716c16f5a 100644
--- a/docs/sandboxes/manage-providers.md
+++ b/docs/sandboxes/manage-providers.md
@@ -179,7 +179,7 @@ The following provider types are supported.
 | `nvidia` | `NVIDIA_API_KEY` | NVIDIA API Catalog |
 | `openai` | `OPENAI_API_KEY` | Any OpenAI-compatible endpoint. Set `--config OPENAI_BASE_URL` to point to the provider. Refer to {doc}`/inference/configure`. |
 | `opencode` | `OPENCODE_API_KEY`, `OPENROUTER_API_KEY`, `OPENAI_API_KEY` | opencode tool |
-| `vertex` | `ANTHROPIC_VERTEX_PROJECT_ID` | Google Cloud Vertex AI with Claude models. Requires GCP Application Default Credentials. **Note:** OAuth authentication not yet fully implemented. |
+| `vertex` | `ANTHROPIC_VERTEX_PROJECT_ID`, `VERTEX_OAUTH_TOKEN`, `CLAUDE_CODE_USE_VERTEX` | Google Cloud Vertex AI with Claude models. Automatically generates OAuth tokens from GCP Application Default Credentials. Set `ANTHROPIC_VERTEX_REGION` (optional, defaults to `us-central1`) to control the region. |
 
 :::{tip}
 Use the `generic` type for any service not listed above. You define the
@@ -194,6 +194,7 @@ The following providers have been tested with `inference.local`. Any provider th
 |---|---|---|---|---|
 | NVIDIA API Catalog | `nvidia-prod` | `nvidia` | `https://integrate.api.nvidia.com/v1` | `NVIDIA_API_KEY` |
 | Anthropic | `anthropic-prod` | `anthropic` | `https://api.anthropic.com` | `ANTHROPIC_API_KEY` |
+| Google Vertex AI | `vertex` | `vertex` | Auto-configured per region | `ANTHROPIC_VERTEX_PROJECT_ID` (OAuth auto-generated) |
 | Baseten | `baseten` | `openai` | `https://inference.baseten.co/v1` | `OPENAI_API_KEY` |
 | Bitdeer AI | `bitdeer` | `openai` | `https://api-inference.bitdeer.ai/v1` | `OPENAI_API_KEY` |
 | Deepinfra | `deepinfra` | `openai` | `https://api.deepinfra.com/v1/openai` | `OPENAI_API_KEY` |
diff --git a/scripts/rebuild-cluster.sh b/scripts/rebuild-cluster.sh
new file mode 100755
index 000000000..f836a832a
--- /dev/null
+++ b/scripts/rebuild-cluster.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Quick rebuild script for development
+# Restarts the cluster container with the latest code changes
+
+set -euo pipefail
+
+echo "=== OpenShell Quick Rebuild ==="
+echo ""
+
+# Stop and remove cluster container
+echo "Stopping cluster container..."
+podman stop openshell-cluster-openshell 2>/dev/null || true
+podman rm openshell-cluster-openshell 2>/dev/null || true
+
+# Remove old cluster image
+echo "Removing old cluster image..."
+podman rmi localhost/openshell/cluster:dev 2>/dev/null || true
+
+# Rebuild and start cluster
+echo "Rebuilding cluster with latest code..."
+mise run cluster:build:full
+
+echo ""
+echo "=== Rebuild Complete ==="
+echo ""
+echo "Next steps:"
+echo "  1. Recreate provider: openshell provider create --name <name> --type <type> --from-existing"
+echo "  2. Configure inference: openshell inference set --provider <name> --model <model>"
+echo "  3. Recreate sandboxes: openshell sandbox create ..."
+echo ""
diff --git a/tasks/scripts/cluster-deploy-fast.sh b/tasks/scripts/cluster-deploy-fast.sh
index 86fe9746d..9bdc6a604 100755
--- a/tasks/scripts/cluster-deploy-fast.sh
+++ b/tasks/scripts/cluster-deploy-fast.sh
@@ -28,6 +28,23 @@ log_duration() {
 	echo "${label} took $((end - start))s"
 }
 
+# Read lines into an array variable (bash 3 & 4 compatible)
+# Usage: read_lines_into_array array_name < <(command)
+read_lines_into_array() {
+  local array_name=$1
+  if ((BASH_VERSINFO[0] >= 4)); then
+    # Bash 4+: use mapfile (faster)
+    mapfile -t "$array_name"
+  else
+    # Bash 3: use while loop
+    local line
+    eval "$array_name=()"
+    while IFS= read -r line; do
+      eval "$array_name+=(\"\$line\")"
+    done
+  fi
+}
+
 if ! $CONTAINER_RUNTIME ps -q --filter "name=^${CONTAINER_NAME}$" --filter "health=healthy" | grep -q .; then
 	echo "Error: Cluster container '${CONTAINER_NAME}' is not running or not healthy."
 	echo "Start the cluster first with: mise run cluster"
@@ -86,7 +103,7 @@ fi
 
 declare -a changed_files=()
 detect_start=$(date +%s)
-mapfile -t changed_files < <(
+read_lines_into_array changed_files < <(
 	{
 		git diff --name-only
 		git diff --name-only --cached

From 308dc5cfd3f1358432e8d849460d1d6250877a3a Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 23:22:08 -0400
Subject: [PATCH 12/18] docs: add Vertex AI example with network policy

Added examples/vertex-ai/ directory with:
- sandbox-policy.yaml: Network policy for Vertex AI endpoints
- README.md: Quick start guide with links to full documentation

Provides ready-to-use policy file for Vertex AI integration.
---
 examples/vertex-ai/README.md           | 46 +++++++++++++++++++++
 examples/vertex-ai/sandbox-policy.yaml | 55 ++++++++++++++++++++++++++
 2 files changed, 101 insertions(+)
 create mode 100644 examples/vertex-ai/README.md
 create mode 100644 examples/vertex-ai/sandbox-policy.yaml

diff --git a/examples/vertex-ai/README.md b/examples/vertex-ai/README.md
new file mode 100644
index 000000000..ec0cdf78a
--- /dev/null
+++ b/examples/vertex-ai/README.md
@@ -0,0 +1,46 @@
+# Google Cloud Vertex AI Example
+
+This example demonstrates how to use OpenShell with Google Cloud Vertex AI to run Claude models via GCP infrastructure.
+
+## Quick Start
+
+```bash
+# Configure GCP credentials
+export ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id
+gcloud auth application-default login
+
+# Create provider
+openshell provider create --name vertex --type vertex --from-existing
+
+# Create sandbox with policy
+openshell sandbox create --name vertex-test --provider vertex \
+  --upload ~/.config/gcloud/:.config/gcloud/ \
+  --policy examples/vertex-ai/sandbox-policy.yaml
+
+# Inside sandbox
+claude  # Automatically uses Vertex AI
+```
+
+## What's Included
+
+- **`sandbox-policy.yaml`**: Network policy allowing Google OAuth and Vertex AI endpoints
+  - Supports major GCP regions (us-east5, us-central1, us-west1, europe-west1, europe-west4, asia-northeast1)
+  - Enables direct Claude CLI usage
+  - Enables `inference.local` routing
+
+## Documentation
+
+For detailed setup instructions, troubleshooting, and configuration options, see:
+
+- [Vertex AI Provider Configuration](../../docs/inference/configure.md#google-cloud-vertex-ai)
+- [Provider Management](../../docs/sandboxes/manage-providers.md)
+- [Inference Routing](../../docs/inference/configure.md)
+
+## Adding Regions
+
+To support additional GCP regions, add them to `sandbox-policy.yaml`:
+
+```yaml
+- host: asia-southeast1-aiplatform.googleapis.com
+  port: 443
+```
diff --git a/examples/vertex-ai/sandbox-policy.yaml b/examples/vertex-ai/sandbox-policy.yaml
new file mode 100644
index 000000000..81fa36d10
--- /dev/null
+++ b/examples/vertex-ai/sandbox-policy.yaml
@@ -0,0 +1,55 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Sandbox policy for Google Cloud Vertex AI
+#
+# This policy allows sandboxes to access Google Cloud endpoints required for
+# Vertex AI with Anthropic Claude models.
+
+version: 1
+
+network_policies:
+  google_vertex:
+    name: google-vertex
+    endpoints:
+      # Google OAuth endpoints for authentication
+      - host: oauth2.googleapis.com
+        port: 443
+      - host: accounts.google.com
+        port: 443
+      - host: www.googleapis.com
+        port: 443
+
+      # Vertex AI endpoints (global and regional)
+      - host: aiplatform.googleapis.com
+        port: 443
+      - host: us-east5-aiplatform.googleapis.com
+        port: 443
+      - host: us-central1-aiplatform.googleapis.com
+        port: 443
+      - host: us-west1-aiplatform.googleapis.com
+        port: 443
+      - host: europe-west1-aiplatform.googleapis.com
+        port: 443
+      - host: europe-west4-aiplatform.googleapis.com
+        port: 443
+      - host: asia-northeast1-aiplatform.googleapis.com
+        port: 443
+
+    binaries:
+      # Claude CLI for direct Vertex AI usage
+      - path: /usr/local/bin/claude
+      # Python for Anthropic SDK usage
+      - path: /usr/bin/python3
+      # curl for testing
+      - path: /usr/bin/curl
+
+  inference_local:
+    name: inference-local
+    endpoints:
+      # Local inference routing endpoint
+      - host: inference.local
+        port: 80
+    binaries:
+      - path: /usr/bin/curl
+      - path: /usr/bin/python3

From 83a94b9fbc61951e7997fbeeedf6ac2dbc787747 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Tue, 7 Apr 2026 09:53:28 -0400
Subject: [PATCH 13/18] fix(build): handle Podman --push flag and array
 expansion

Podman does not support --push flag in build command like Docker buildx.
This commit fixes two issues:

1. docker-build-image.sh: Filter out --push flag and execute push as
   separate command after build completes

2. docker-publish-multiarch.sh: Use safe array expansion syntax to avoid
   unbound variable errors with set -u when EXTRA_TAGS is empty

Note: Multi-arch builds with Podman still require manual workflow due to
cross-compilation toolchain issues. Use /tmp/build-multiarch-local.sh
for local multi-arch builds with QEMU emulation.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 tasks/scripts/docker-build-image.sh       | 11 ++++++++++-
 tasks/scripts/docker-publish-multiarch.sh |  4 ++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/tasks/scripts/docker-build-image.sh b/tasks/scripts/docker-build-image.sh
index 38b200a2e..a76b01d12 100755
--- a/tasks/scripts/docker-build-image.sh
+++ b/tasks/scripts/docker-build-image.sh
@@ -212,11 +212,13 @@ if [[ "${CONTAINER_RUNTIME}" == "podman" ]]; then
 		ARCH_ARGS+=(--build-arg "BUILDARCH=${TARGETARCH}")
 	fi
 
-	# Filter OUTPUT_ARGS: Podman stores images locally by default (no --load)
+	# Filter OUTPUT_ARGS: Podman doesn't support --load or --push in build command
 	PODMAN_OUTPUT_ARGS=()
+	PODMAN_SHOULD_PUSH=0
 	for arg in ${OUTPUT_ARGS[@]+"${OUTPUT_ARGS[@]}"}; do
 		case "${arg}" in
 		--load) ;; # implicit in Podman
+		--push) PODMAN_SHOULD_PUSH=1 ;; # push after build
 		*) PODMAN_OUTPUT_ARGS+=("${arg}") ;;
 		esac
 	done
@@ -227,6 +229,13 @@ if [[ "${CONTAINER_RUNTIME}" == "podman" ]]; then
 		${TLS_ARGS[@]+"${TLS_ARGS[@]}"} \
 		${PODMAN_OUTPUT_ARGS[@]+"${PODMAN_OUTPUT_ARGS[@]}"} \
 		.
+
+	# Push after build if requested (Podman doesn't support --push in build)
+	if [[ "${PODMAN_SHOULD_PUSH}" == "1" && "${IS_FINAL_IMAGE}" == "1" ]]; then
+		echo "Pushing ${IMAGE_NAME}:${IMAGE_TAG}..."
+		podman_local_tls_args "${IMAGE_NAME}"
+		podman push ${PODMAN_TLS_ARGS[@]+"${PODMAN_TLS_ARGS[@]}"} "${IMAGE_NAME}:${IMAGE_TAG}"
+	fi
 else
 	# Docker: use buildx
 	docker buildx build \
diff --git a/tasks/scripts/docker-publish-multiarch.sh b/tasks/scripts/docker-publish-multiarch.sh
index f83a7c203..e8185a952 100755
--- a/tasks/scripts/docker-publish-multiarch.sh
+++ b/tasks/scripts/docker-publish-multiarch.sh
@@ -50,7 +50,7 @@ echo
 echo "Building multi-arch cluster image..."
 tasks/scripts/docker-build-image.sh cluster
 
-TAGS_TO_APPLY=("${EXTRA_TAGS[@]}")
+TAGS_TO_APPLY=(${EXTRA_TAGS[@]+"${EXTRA_TAGS[@]}"})
 if [[ "${TAG_LATEST}" == "true" ]]; then
 	TAGS_TO_APPLY+=("latest")
 fi
@@ -58,7 +58,7 @@ fi
 if [[ ${#TAGS_TO_APPLY[@]} -gt 0 ]]; then
 	for component in gateway cluster; do
 		full_image="${REGISTRY}/${component}"
-		for tag in "${TAGS_TO_APPLY[@]}"; do
+		for tag in ${TAGS_TO_APPLY[@]+"${TAGS_TO_APPLY[@]}"}; do
 			[[ "${tag}" == "${IMAGE_TAG}" ]] && continue
 			echo "Tagging ${full_image}:${tag}..."
 			if [[ "${CONTAINER_RUNTIME}" == "podman" ]]; then

From b2d65457a193561ffcfde5ffce6545608c0e3f35 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Tue, 7 Apr 2026 12:55:19 -0400
Subject: [PATCH 14/18] feat(build): add Podman multi-arch support to
 docker-publish-multiarch.sh

Add Podman-specific multi-architecture build logic to complement existing
Docker buildx support. Podman builds each platform sequentially using
manifest lists, while Docker buildx builds in parallel.

Changes:
- Detect Podman and use manifest-based approach for multi-arch builds
- Build each platform (arm64, amd64) separately with explicit TARGETARCH
- Create and push manifest list combining all architectures
- Preserve existing Docker buildx workflow unchanged
- Add informative logging about sequential vs parallel builds

Build times:
- Podman: Sequential builds (~30-40 min on Linux, ~45-60 min on macOS)
- Docker buildx: Parallel builds (~20-30 min)

This enables multi-arch image publishing on systems using Podman as the
container runtime, supporting both Apple Silicon and Intel architectures.
---
 tasks/scripts/docker-publish-multiarch.sh | 66 +++++++++++++++++++----
 1 file changed, 57 insertions(+), 9 deletions(-)

diff --git a/tasks/scripts/docker-publish-multiarch.sh b/tasks/scripts/docker-publish-multiarch.sh
index e8185a952..398c97c00 100755
--- a/tasks/scripts/docker-publish-multiarch.sh
+++ b/tasks/scripts/docker-publish-multiarch.sh
@@ -27,8 +27,56 @@ fi
 
 if [[ "${CONTAINER_RUNTIME}" == "podman" ]]; then
 	echo "Using Podman for multi-arch build (podman manifest)"
+	echo "Note: Podman builds platforms sequentially (slower than Docker buildx)"
 	export DOCKER_BUILDER=""
+
+	# Podman: build each platform separately and create manifest
+	IFS=',' read -ra PLATFORM_ARRAY <<< "${PLATFORMS}"
+
+	for component in gateway cluster; do
+		full_image="${REGISTRY}/${component}"
+		echo ""
+		echo "=== Building multi-arch ${component} image ==="
+
+		# Create manifest list
+		podman manifest rm "${full_image}:${IMAGE_TAG}" 2>/dev/null || true
+		podman manifest create "${full_image}:${IMAGE_TAG}"
+
+		# Build for each platform
+		for platform in "${PLATFORM_ARRAY[@]}"; do
+			arch="${platform##*/}"
+			case "${arch}" in
+				amd64) target_arch="amd64" ;;
+				arm64) target_arch="arm64" ;;
+				*) echo "Unsupported arch: ${arch}" >&2; exit 1 ;;
+			esac
+
+			echo "Building ${component} for ${platform}..."
+
+			# Package Helm chart for cluster builds
+			if [[ "${component}" == "cluster" ]]; then
+				mkdir -p deploy/docker/.build/charts
+				helm package deploy/helm/openshell -d deploy/docker/.build/charts/ >/dev/null
+			fi
+
+			# Build with explicit TARGETARCH/BUILDARCH to avoid cross-compilation
+			# (QEMU emulation handles running the different architecture)
+			podman build --platform "${platform}" \
+				--build-arg TARGETARCH="${target_arch}" \
+				--build-arg BUILDARCH="${target_arch}" \
+				--manifest "${full_image}:${IMAGE_TAG}" \
+				-f deploy/docker/Dockerfile.images \
+				--target "${component}" \
+				.
+		done
+
+		# Push manifest
+		echo "Pushing ${full_image}:${IMAGE_TAG}..."
+		podman manifest push "${full_image}:${IMAGE_TAG}" \
+			"docker://${full_image}:${IMAGE_TAG}"
+	done
 else
+	# Docker: use buildx
 	BUILDER_NAME=${DOCKER_BUILDER:-multiarch}
 	if docker buildx inspect "${BUILDER_NAME}" >/dev/null 2>&1; then
 		echo "Using existing buildx builder: ${BUILDER_NAME}"
@@ -38,17 +86,17 @@ else
 		docker buildx create --name "${BUILDER_NAME}" --use --bootstrap
 	fi
 	export DOCKER_BUILDER="${BUILDER_NAME}"
-fi
-export DOCKER_PLATFORM="${PLATFORMS}"
-export DOCKER_PUSH=1
-export IMAGE_REGISTRY="${REGISTRY}"
+	export DOCKER_PLATFORM="${PLATFORMS}"
+	export DOCKER_PUSH=1
+	export IMAGE_REGISTRY="${REGISTRY}"
 
-echo "Building multi-arch gateway image..."
-tasks/scripts/docker-build-image.sh gateway
+	echo "Building multi-arch gateway image..."
+	tasks/scripts/docker-build-image.sh gateway
 
-echo
-echo "Building multi-arch cluster image..."
-tasks/scripts/docker-build-image.sh cluster
+	echo
+	echo "Building multi-arch cluster image..."
+	tasks/scripts/docker-build-image.sh cluster
+fi
 
 TAGS_TO_APPLY=(${EXTRA_TAGS[@]+"${EXTRA_TAGS[@]}"})
 if [[ "${TAG_LATEST}" == "true" ]]; then

From 8a27b2fa20dd1a882e7553986fe0fc9a90945f33 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Tue, 7 Apr 2026 14:47:26 -0400
Subject: [PATCH 15/18] fix: apply cargo fmt formatting to vertex provider

Fix CI formatting check failures:
- Split long .insert() calls across multiple lines
- Reformat MockDiscoveryContext initialization

No functional changes, formatting only.
---
 crates/openshell-providers/src/providers/vertex.rs | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs
index 6daadd5f9..de8d45d31 100644
--- a/crates/openshell-providers/src/providers/vertex.rs
+++ b/crates/openshell-providers/src/providers/vertex.rs
@@ -49,7 +49,9 @@ impl ProviderPlugin for VertexProvider {
 
             // Set CLAUDE_CODE_USE_VERTEX=1 to enable Vertex AI in claude CLI
             // Must be in credentials (not config) to be injected into sandbox environment
-            provider.credentials.insert("CLAUDE_CODE_USE_VERTEX".to_string(), "1".to_string());
+            provider
+                .credentials
+                .insert("CLAUDE_CODE_USE_VERTEX".to_string(), "1".to_string());
 
             // Generate OAuth token from Application Default Credentials
             // Try to generate token, but don't fail if we're in a nested runtime context
@@ -65,7 +67,9 @@ impl ProviderPlugin for VertexProvider {
             if let Some(token) = token {
                 // Store the OAuth token as VERTEX_OAUTH_TOKEN
                 // The inference router will use this as the Bearer token
-                provider.credentials.insert("VERTEX_OAUTH_TOKEN".to_string(), token);
+                provider
+                    .credentials
+                    .insert("VERTEX_OAUTH_TOKEN".to_string(), token);
             }
         }
 
@@ -85,8 +89,8 @@ mod tests {
 
     #[test]
     fn discovers_vertex_env_credentials() {
-        let ctx = MockDiscoveryContext::new()
-            .with_env("ANTHROPIC_VERTEX_PROJECT_ID", "my-gcp-project");
+        let ctx =
+            MockDiscoveryContext::new().with_env("ANTHROPIC_VERTEX_PROJECT_ID", "my-gcp-project");
         let discovered = discover_with_spec(&SPEC, &ctx)
             .expect("discovery")
             .expect("provider");

From 8241dc702323efd89281a42b458e84e22cd5b2b1 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Tue, 7 Apr 2026 16:16:06 -0400
Subject: [PATCH 16/18] refactor: remove OAuth token storage from Vertex
 provider

Remove short-lived OAuth token generation and storage in gateway database.
Tokens are now generated on-demand inside sandboxes from uploaded ADC files.

Changes:
- Remove generate_oauth_token() function and gcp_auth dependency
- Remove VERTEX_OAUTH_TOKEN from direct credential injection
- Remove OAuth token insertion in discover_existing()
- Add unset IMAGE_TAG/TAG_LATEST in podman.env to prevent build conflicts
- Update Cargo.lock to remove gcp_auth dependency tree

Benefits:
- No stale token pollution in database
- Tokens generated fresh on-demand (auto-refresh via ADC)
- Simpler provider creation (synchronous, no async OAuth)
- Reduced dependency footprint (removes 32 packages)
- Better security (tokens not persisted in database)

Token lifecycle:
- Provider stores only ANTHROPIC_VERTEX_PROJECT_ID and region
- Sandboxes require --upload ~/.config/gcloud/ for token generation
- Claude CLI uses gcp_auth to generate/refresh tokens from ADC
- Tokens valid for 1 hour, automatically refreshed via refresh token
---
 Cargo.lock                                    | 38 -------------------
 crates/openshell-providers/Cargo.toml         |  2 -
 .../src/providers/vertex.rs                   | 37 ++----------------
 crates/openshell-sandbox/src/secrets.rs       |  4 +-
 scripts/podman.env                            |  5 +++
 5 files changed, 12 insertions(+), 74 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 1e2b542ee..98797cc24 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1509,32 +1509,6 @@ dependencies = [
  "slab",
 ]
 
-[[package]]
-name = "gcp_auth"
-version = "0.12.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2b3d0b409a042a380111af38136310839af8ac1a0917fb6e84515ed1e4bf3ee"
-dependencies = [
- "async-trait",
- "base64 0.22.1",
- "bytes",
- "chrono",
- "http",
- "http-body-util",
- "hyper",
- "hyper-rustls",
- "hyper-util",
- "ring",
- "rustls-pki-types",
- "serde",
- "serde_json",
- "thiserror 2.0.18",
- "tokio",
- "tracing",
- "tracing-futures",
- "url",
-]
-
 [[package]]
 name = "generic-array"
 version = "0.14.7"
@@ -2945,10 +2919,8 @@ dependencies = [
 name = "openshell-providers"
 version = "0.0.0"
 dependencies = [
- "gcp_auth",
  "openshell-core",
  "thiserror 2.0.18",
- "tokio",
 ]
 
 [[package]]
@@ -5406,16 +5378,6 @@ dependencies = [
  "valuable",
 ]
 
-[[package]]
-name = "tracing-futures"
-version = "0.2.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2"
-dependencies = [
- "pin-project",
- "tracing",
-]
-
 [[package]]
 name = "tracing-log"
 version = "0.2.0"
diff --git a/crates/openshell-providers/Cargo.toml b/crates/openshell-providers/Cargo.toml
index 0cf14ec2b..41f9ed6c0 100644
--- a/crates/openshell-providers/Cargo.toml
+++ b/crates/openshell-providers/Cargo.toml
@@ -13,8 +13,6 @@ repository.workspace = true
 [dependencies]
 openshell-core = { path = "../openshell-core" }
 thiserror = { workspace = true }
-gcp_auth = "0.12"
-tokio = { workspace = true }
 
 [lints]
 workspace = true
diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs
index de8d45d31..5b2ecdf9d 100644
--- a/crates/openshell-providers/src/providers/vertex.rs
+++ b/crates/openshell-providers/src/providers/vertex.rs
@@ -16,21 +16,6 @@ pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec {
 // Additional config keys for Vertex AI
 const VERTEX_CONFIG_KEYS: &[&str] = &["ANTHROPIC_VERTEX_REGION"];
 
-/// Generate an OAuth token from GCP Application Default Credentials for Vertex AI.
-///
-/// Returns `None` if ADC is not configured or token generation fails.
-async fn generate_oauth_token() -> Option<String> {
-    // Try to find an appropriate token provider (checks ADC, service account, metadata server, etc.)
-    let provider = gcp_auth::provider().await.ok()?;
-
-    // Get token for Vertex AI scope
-    // Vertex AI uses the Cloud Platform scope
-    let scopes = &["https://www.googleapis.com/auth/cloud-platform"];
-    let token = provider.token(scopes).await.ok()?;
-
-    Some(token.as_str().to_string())
-}
-
 impl ProviderPlugin for VertexProvider {
     fn id(&self) -> &'static str {
         SPEC.id
@@ -53,24 +38,10 @@ impl ProviderPlugin for VertexProvider {
                 .credentials
                 .insert("CLAUDE_CODE_USE_VERTEX".to_string(), "1".to_string());
 
-            // Generate OAuth token from Application Default Credentials
-            // Try to generate token, but don't fail if we're in a nested runtime context
-            let token = std::thread::spawn(|| {
-                tokio::runtime::Runtime::new()
-                    .ok()
-                    .and_then(|rt| rt.block_on(generate_oauth_token()))
-            })
-            .join()
-            .ok()
-            .flatten();
-
-            if let Some(token) = token {
-                // Store the OAuth token as VERTEX_OAUTH_TOKEN
-                // The inference router will use this as the Bearer token
-                provider
-                    .credentials
-                    .insert("VERTEX_OAUTH_TOKEN".to_string(), token);
-            }
+            // NOTE: We do NOT generate/store VERTEX_OAUTH_TOKEN here.
+            // OAuth tokens are short-lived (~1 hour) and storing them leads to stale token pollution.
+            // Instead, sandboxes generate fresh tokens on-demand from the uploaded ADC file
+            // (requires --upload ~/.config/gcloud/:.config/gcloud/ when creating sandbox).
         }
 
         Ok(discovered)
diff --git a/crates/openshell-sandbox/src/secrets.rs b/crates/openshell-sandbox/src/secrets.rs
index 0cd188b6e..87c353c83 100644
--- a/crates/openshell-sandbox/src/secrets.rs
+++ b/crates/openshell-sandbox/src/secrets.rs
@@ -23,8 +23,10 @@ pub(crate) const PLACEHOLDER_PREFIX_PUBLIC: &str = PLACEHOLDER_PREFIX;
 fn direct_inject_credentials() -> &'static [&'static str] {
     &[
         // Vertex AI credentials for claude CLI
+        // NOTE: VERTEX_OAUTH_TOKEN is NOT included here - sandboxes generate
+        // fresh tokens on-demand from the uploaded ADC file instead of using
+        // a pre-generated (and likely expired) token from the provider database.
         "ANTHROPIC_VERTEX_PROJECT_ID",
-        "VERTEX_OAUTH_TOKEN",
         "ANTHROPIC_VERTEX_REGION",
         "CLAUDE_CODE_USE_VERTEX",
     ]
diff --git a/scripts/podman.env b/scripts/podman.env
index 5aba469b2..459627c0e 100644
--- a/scripts/podman.env
+++ b/scripts/podman.env
@@ -8,6 +8,11 @@
 
 MACHINE_NAME="${PODMAN_MACHINE_NAME:-openshell}"
 
+# Clear variables from other build workflows that would interfere with local development
+unset IMAGE_TAG
+unset TAG_LATEST
+unset REGISTRY
+
 # Get Podman socket path from the machine
 if command -v podman &>/dev/null; then
     SOCKET_PATH=$(podman machine inspect "${MACHINE_NAME}" --format '{{.ConnectionInfo.PodmanSocket.Path}}' 2>/dev/null)

From 987b2a0e4d2d6154aa3ba19634c0a6eed843b609 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Tue, 7 Apr 2026 16:27:27 -0400
Subject: [PATCH 17/18] docs(vertex): improve ADC detection and troubleshooting
 docs

- Check for ADC in both GOOGLE_APPLICATION_CREDENTIALS and default location
- Add critical warning about --upload ~/.config/gcloud/ requirement
- Document security model for credential injection strategy
- Add comprehensive troubleshooting section with solutions for:
  - Authentication failures (missing ADC)
  - Project not found errors
  - Region not supported errors
---
 .../src/providers/vertex.rs                   | 28 ++++++
 examples/vertex-ai/README.md                  | 93 +++++++++++++++++--
 2 files changed, 115 insertions(+), 6 deletions(-)

diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs
index 5b2ecdf9d..38d54a24e 100644
--- a/crates/openshell-providers/src/providers/vertex.rs
+++ b/crates/openshell-providers/src/providers/vertex.rs
@@ -42,6 +42,34 @@ impl ProviderPlugin for VertexProvider {
             // OAuth tokens are short-lived (~1 hour) and storing them leads to stale token pollution.
             // Instead, sandboxes generate fresh tokens on-demand from the uploaded ADC file
             // (requires --upload ~/.config/gcloud/:.config/gcloud/ when creating sandbox).
+
+            // Warn if ADC doesn't exist on host
+            let adc_exists = if let Ok(custom_path) = std::env::var("GOOGLE_APPLICATION_CREDENTIALS") {
+                std::path::Path::new(&custom_path).exists()
+            } else {
+                let default_path = format!(
+                    "{}/.config/gcloud/application_default_credentials.json",
+                    std::env::var("HOME").unwrap_or_default()
+                );
+                std::path::Path::new(&default_path).exists()
+            };
+
+            if !adc_exists {
+                eprintln!();
+                eprintln!("⚠️  Warning: GCP Application Default Credentials not found");
+                eprintln!("   Sandboxes will need ADC uploaded to generate OAuth tokens.");
+                eprintln!();
+                eprintln!("   Configure ADC with:");
+                eprintln!("     gcloud auth application-default login");
+                eprintln!();
+                eprintln!("   Or use a service account key:");
+                eprintln!("     export GOOGLE_APPLICATION_CREDENTIALS=/path/to/key.json");
+                eprintln!();
+                eprintln!("   Then upload credentials when creating sandboxes:");
+                eprintln!("     openshell sandbox create --provider vertex \\");
+                eprintln!("       --upload ~/.config/gcloud/:.config/gcloud/");
+                eprintln!();
+            }
         }
 
         Ok(discovered)
diff --git a/examples/vertex-ai/README.md b/examples/vertex-ai/README.md
index ec0cdf78a..2423c3d04 100644
--- a/examples/vertex-ai/README.md
+++ b/examples/vertex-ai/README.md
@@ -2,22 +2,32 @@
 
 This example demonstrates how to use OpenShell with Google Cloud Vertex AI to run Claude models via GCP infrastructure.
 
+## ⚠️ Critical Requirement
+
+Vertex AI sandboxes **MUST** upload GCP credentials to generate OAuth tokens:
+
+```bash
+--upload ~/.config/gcloud/:.config/gcloud/
+```
+
+Without this upload, token generation will fail and sandboxes cannot connect to Vertex AI.
+
 ## Quick Start
 
 ```bash
-# Configure GCP credentials
+# 1. Configure GCP credentials
 export ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id
 gcloud auth application-default login
 
-# Create provider
+# 2. Create provider
 openshell provider create --name vertex --type vertex --from-existing
 
-# Create sandbox with policy
+# 3. Create sandbox with credentials uploaded
 openshell sandbox create --name vertex-test --provider vertex \
-  --upload ~/.config/gcloud/:.config/gcloud/ \
+  --upload ~/.config/gcloud/:.config/gcloud/ \  # ← REQUIRED
   --policy examples/vertex-ai/sandbox-policy.yaml
 
-# Inside sandbox
+# 4. Inside sandbox
 claude  # Automatically uses Vertex AI
 ```
 
@@ -28,9 +38,80 @@ claude  # Automatically uses Vertex AI
   - Enables direct Claude CLI usage
   - Enables `inference.local` routing
 
+## Security Model
+
+### Credential Injection
+
+Vertex AI uses selective credential injection for CLI tool compatibility:
+
+**Directly injected (visible in `/proc/<pid>/environ`):**
+- `ANTHROPIC_VERTEX_PROJECT_ID` - Not sensitive (public project ID, visible in API URLs)
+- `CLAUDE_CODE_USE_VERTEX` - Configuration flag (boolean)
+- `ANTHROPIC_VERTEX_REGION` - Public metadata (region name)
+
+**Generated in sandbox (not stored in gateway database):**
+- OAuth access tokens - Generated on-demand from uploaded ADC file, automatically refreshed
+
+**Trade-off:** Direct injection required for Claude CLI compatibility (cannot use HTTP proxy placeholders). Risk is low since no secrets are exposed via environment variables.
+
+## Troubleshooting
+
+### "Authentication failed" or "invalid credentials"
+
+**Cause:** Sandbox cannot generate OAuth tokens (ADC file not uploaded or missing).
+
+**Solution:**
+1. Verify ADC exists on host:
+   ```bash
+   ls -la ~/.config/gcloud/application_default_credentials.json
+   ```
+
+2. If missing, configure ADC:
+   ```bash
+   gcloud auth application-default login
+   ```
+
+3. Ensure sandbox creation includes upload:
+   ```bash
+   openshell sandbox create --provider vertex \
+     --upload ~/.config/gcloud/:.config/gcloud/  # ← Required
+   ```
+
+### "Project not found" errors
+
+**Cause:** Invalid or inaccessible GCP project ID.
+
+**Solution:**
+1. Verify project exists and you have access:
+   ```bash
+   gcloud projects describe $ANTHROPIC_VERTEX_PROJECT_ID
+   ```
+
+2. Check Vertex AI API is enabled:
+   ```bash
+   gcloud services list --enabled --project=$ANTHROPIC_VERTEX_PROJECT_ID | grep aiplatform
+   ```
+
+3. Enable if needed:
+   ```bash
+   gcloud services enable aiplatform.googleapis.com --project=$ANTHROPIC_VERTEX_PROJECT_ID
+   ```
+
+### "Region not supported" errors
+
+**Cause:** Vertex AI endpoint for your region not in network policy.
+
+**Solution:** Add region to `sandbox-policy.yaml`:
+```yaml
+- host: your-region-aiplatform.googleapis.com
+  port: 443
+```
+
+Supported regions: us-central1, us-east5, us-west1, europe-west1, europe-west4, asia-northeast1, asia-southeast1
+
 ## Documentation
 
-For detailed setup instructions, troubleshooting, and configuration options, see:
+For detailed setup instructions and configuration options, see:
 
 - [Vertex AI Provider Configuration](../../docs/inference/configure.md#google-cloud-vertex-ai)
 - [Provider Management](../../docs/sandboxes/manage-providers.md)

From c58f3c7eec90b8dd252e4943ee1c9f062e42515f Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Tue, 7 Apr 2026 16:29:40 -0400
Subject: [PATCH 18/18] style(vertex): apply cargo fmt formatting

---
 .../src/providers/vertex.rs                   | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs
index 38d54a24e..f5b5b67d0 100644
--- a/crates/openshell-providers/src/providers/vertex.rs
+++ b/crates/openshell-providers/src/providers/vertex.rs
@@ -44,15 +44,16 @@ impl ProviderPlugin for VertexProvider {
             // (requires --upload ~/.config/gcloud/:.config/gcloud/ when creating sandbox).
 
             // Warn if ADC doesn't exist on host
-            let adc_exists = if let Ok(custom_path) = std::env::var("GOOGLE_APPLICATION_CREDENTIALS") {
-                std::path::Path::new(&custom_path).exists()
-            } else {
-                let default_path = format!(
-                    "{}/.config/gcloud/application_default_credentials.json",
-                    std::env::var("HOME").unwrap_or_default()
-                );
-                std::path::Path::new(&default_path).exists()
-            };
+            let adc_exists =
+                if let Ok(custom_path) = std::env::var("GOOGLE_APPLICATION_CREDENTIALS") {
+                    std::path::Path::new(&custom_path).exists()
+                } else {
+                    let default_path = format!(
+                        "{}/.config/gcloud/application_default_credentials.json",
+                        std::env::var("HOME").unwrap_or_default()
+                    );
+                    std::path::Path::new(&default_path).exists()
+                };
 
             if !adc_exists {
                 eprintln!();