From 5a030d053a549ea7fe24978947d276c09c55375d Mon Sep 17 00:00:00 2001 From: itdove Date: Mon, 6 Apr 2026 17:20:37 -0400 Subject: [PATCH 01/18] feat(providers): add Vertex AI provider type - Add vertex provider plugin with ANTHROPIC_VERTEX_PROJECT_ID credential - Add vertex inference profile with Anthropic-compatible protocols - Register vertex in provider registry and CLI - Add vertex to supported inference provider types - Fix scripts/podman.env to use correct env var names for local registry - Update docs for simplified CLI install workflow Known limitation: GCP OAuth authentication not yet implemented. Vertex provider can be created and configured but API calls will fail until OAuth token generation is added. --- crates/openshell-cli/src/main.rs | 2 + crates/openshell-core/src/inference.rs | 12 +++++ crates/openshell-providers/src/lib.rs | 2 + .../openshell-providers/src/providers/mod.rs | 1 + .../src/providers/vertex.rs | 47 +++++++++++++++++++ crates/openshell-server/src/inference.rs | 2 +- docs/get-started/install-podman-macos.md | 12 ++--- scripts/podman.env | 10 +++- 8 files changed, 78 insertions(+), 10 deletions(-) create mode 100644 crates/openshell-providers/src/providers/vertex.rs diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index 0d546c7b1..5277ab805 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -615,6 +615,7 @@ enum CliProviderType { Gitlab, Github, Outlook, + Vertex, } #[derive(Clone, Debug, ValueEnum)] @@ -646,6 +647,7 @@ impl CliProviderType { Self::Gitlab => "gitlab", Self::Github => "github", Self::Outlook => "outlook", + Self::Vertex => "vertex", } } } diff --git a/crates/openshell-core/src/inference.rs b/crates/openshell-core/src/inference.rs index a06c427f8..78fe72310 100644 --- a/crates/openshell-core/src/inference.rs +++ b/crates/openshell-core/src/inference.rs @@ -86,6 +86,16 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile { default_headers: &[], }; +static VERTEX_PROFILE: InferenceProviderProfile = InferenceProviderProfile { + provider_type: "vertex", + default_base_url: "https://us-central1-aiplatform.googleapis.com/v1", + protocols: ANTHROPIC_PROTOCOLS, + credential_key_names: &["ANTHROPIC_VERTEX_PROJECT_ID"], + base_url_config_keys: &["ANTHROPIC_VERTEX_REGION", "VERTEX_BASE_URL"], + auth: AuthHeader::Custom("x-api-key"), + default_headers: &[("anthropic-version", "2023-06-01")], +}; + /// Look up the inference provider profile for a given provider type. /// /// Returns `None` for provider types that don't support inference routing @@ -95,6 +105,7 @@ pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProf "openai" => Some(&OPENAI_PROFILE), "anthropic" => Some(&ANTHROPIC_PROFILE), "nvidia" => Some(&NVIDIA_PROFILE), + "vertex" => Some(&VERTEX_PROFILE), _ => None, } } @@ -176,6 +187,7 @@ mod tests { assert!(profile_for("openai").is_some()); assert!(profile_for("anthropic").is_some()); assert!(profile_for("nvidia").is_some()); + assert!(profile_for("vertex").is_some()); assert!(profile_for("OpenAI").is_some()); // case insensitive } diff --git a/crates/openshell-providers/src/lib.rs b/crates/openshell-providers/src/lib.rs index e2bcc0c09..2fa771950 100644 --- a/crates/openshell-providers/src/lib.rs +++ b/crates/openshell-providers/src/lib.rs @@ -86,6 +86,7 @@ impl ProviderRegistry { registry.register(providers::gitlab::GitlabProvider); registry.register(providers::github::GithubProvider); registry.register(providers::outlook::OutlookProvider); + registry.register(providers::vertex::VertexProvider); registry } @@ -138,6 +139,7 @@ pub fn normalize_provider_type(input: &str) -> Option<&'static str> { "gitlab" | "glab" => Some("gitlab"), "github" | "gh" => Some("github"), "outlook" => Some("outlook"), + "vertex" => Some("vertex"), _ => None, } } diff --git a/crates/openshell-providers/src/providers/mod.rs b/crates/openshell-providers/src/providers/mod.rs index 6fe395135..19f9c54a5 100644 --- a/crates/openshell-providers/src/providers/mod.rs +++ b/crates/openshell-providers/src/providers/mod.rs @@ -12,3 +12,4 @@ pub mod nvidia; pub mod openai; pub mod opencode; pub mod outlook; +pub mod vertex; diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs new file mode 100644 index 000000000..92e77002a --- /dev/null +++ b/crates/openshell-providers/src/providers/vertex.rs @@ -0,0 +1,47 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use crate::{ + ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, discover_with_spec, +}; + +pub struct VertexProvider; + +pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec { + id: "vertex", + credential_env_vars: &["ANTHROPIC_VERTEX_PROJECT_ID"], +}; + +impl ProviderPlugin for VertexProvider { + fn id(&self) -> &'static str { + SPEC.id + } + + fn discover_existing(&self) -> Result, ProviderError> { + discover_with_spec(&SPEC, &RealDiscoveryContext) + } + + fn credential_env_vars(&self) -> &'static [&'static str] { + SPEC.credential_env_vars + } +} + +#[cfg(test)] +mod tests { + use super::SPEC; + use crate::discover_with_spec; + use crate::test_helpers::MockDiscoveryContext; + + #[test] + fn discovers_vertex_env_credentials() { + let ctx = MockDiscoveryContext::new() + .with_env("ANTHROPIC_VERTEX_PROJECT_ID", "my-gcp-project"); + let discovered = discover_with_spec(&SPEC, &ctx) + .expect("discovery") + .expect("provider"); + assert_eq!( + discovered.credentials.get("ANTHROPIC_VERTEX_PROJECT_ID"), + Some(&"my-gcp-project".to_string()) + ); + } +} diff --git a/crates/openshell-server/src/inference.rs b/crates/openshell-server/src/inference.rs index 0fb29bde5..5d4014b7a 100644 --- a/crates/openshell-server/src/inference.rs +++ b/crates/openshell-server/src/inference.rs @@ -237,7 +237,7 @@ fn resolve_provider_route(provider: &Provider) -> Result/dev/null; then export OPENSHELL_CONTAINER_RUNTIME=podman # Local development image registry - export OPENSHELL_REGISTRY="127.0.0.1:5000/openshell" + export OPENSHELL_IMAGE_REPO_BASE="127.0.0.1:5000/openshell" + export OPENSHELL_REGISTRY_HOST="127.0.0.1:5000" + export OPENSHELL_REGISTRY_NAMESPACE="openshell" + export OPENSHELL_REGISTRY_ENDPOINT="host.containers.internal:5000" + export OPENSHELL_REGISTRY_INSECURE="true" export OPENSHELL_CLUSTER_IMAGE="localhost/openshell/cluster:dev" echo "✓ Podman environment configured:" echo " CONTAINER_HOST=${CONTAINER_HOST}" echo " OPENSHELL_CONTAINER_RUNTIME=${OPENSHELL_CONTAINER_RUNTIME}" - echo " OPENSHELL_REGISTRY=${OPENSHELL_REGISTRY}" + echo " OPENSHELL_IMAGE_REPO_BASE=${OPENSHELL_IMAGE_REPO_BASE}" + echo " OPENSHELL_REGISTRY_HOST=${OPENSHELL_REGISTRY_HOST}" + echo " OPENSHELL_REGISTRY_INSECURE=${OPENSHELL_REGISTRY_INSECURE}" echo " OPENSHELL_CLUSTER_IMAGE=${OPENSHELL_CLUSTER_IMAGE}" fi else From dc3690350254ac84c78873529a44af34bef78451 Mon Sep 17 00:00:00 2001 From: itdove Date: Mon, 6 Apr 2026 17:26:45 -0400 Subject: [PATCH 02/18] docs: clarify that cluster:build:full also starts the gateway - Note that mise run cluster:build:full builds AND starts the gateway - Add verification step after build completes - Clarify that gateway is already running before sandbox creation --- docs/get-started/install-podman-macos.md | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/docs/get-started/install-podman-macos.md b/docs/get-started/install-podman-macos.md index 8a847a547..661abada0 100644 --- a/docs/get-started/install-podman-macos.md +++ b/docs/get-started/install-podman-macos.md @@ -90,12 +90,13 @@ mise run cluster:build:full ``` This command: -- Builds the gateway image +- Builds the gateway and cluster images - Starts a local container registry at `127.0.0.1:5000` -- Builds the cluster image -- Pushes images to the local registry +- Pushes the gateway image to the local registry - Bootstraps a k3s cluster inside a Podman container -- Deploys the OpenShell gateway +- Deploys and starts the OpenShell gateway + +**Note:** This command builds the images AND starts the gateway in one step. The gateway will be running when the command completes. Or run the script directly: @@ -119,10 +120,18 @@ cargo install --path crates/openshell-cli --root ~/.local ## Create a Sandbox +The gateway is now running. Create a sandbox to test it: + ```console openshell sandbox create ``` +Verify the gateway is healthy: + +```console +openshell gateway info +``` + ## Cleanup To remove all OpenShell resources and optionally the Podman machine: From a6cc6a4bd2debaee2ad26506308772c8edc7e0c6 Mon Sep 17 00:00:00 2001 From: itdove Date: Mon, 6 Apr 2026 17:54:14 -0400 Subject: [PATCH 03/18] docs: add Vertex AI provider to inference and provider docs - Add vertex to supported provider types table in manage-providers.md - Add Vertex AI provider tab in inference configuration docs - Clarify two usage modes: direct API calls vs inference.local routing - Document prerequisites (GCP project, Application Default Credentials) - Note OAuth limitation only affects inference routing, not direct calls - Keep Vertex docs in provider/inference pages, not installation guides --- docs/inference/configure.md | 21 +++++++++++++++++++++ docs/sandboxes/manage-providers.md | 1 + 2 files changed, 22 insertions(+) diff --git a/docs/inference/configure.md b/docs/inference/configure.md index 78065689e..4798bc09c 100644 --- a/docs/inference/configure.md +++ b/docs/inference/configure.md @@ -100,6 +100,27 @@ This reads `ANTHROPIC_API_KEY` from your environment. :::: +::::{tab-item} Google Cloud Vertex AI + +```console +$ export ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id +$ openshell provider create --name vertex-claude --type vertex --from-existing +``` + +This reads `ANTHROPIC_VERTEX_PROJECT_ID` from your environment and makes it available inside sandboxes. + +**Prerequisites:** +- Google Cloud project with Vertex AI API enabled +- Application Default Credentials configured: `gcloud auth application-default login` + +**Usage:** +- **Direct API calls:** Attach this provider to sandboxes to inject the project ID credential. Call Vertex AI directly from your code using the Anthropic SDK. +- **Inference routing:** Configure `inference.local` to proxy requests to Vertex AI (see "Set Inference Routing" section below). + +**Known Limitation:** When using inference routing, GCP OAuth authentication is not yet fully implemented. The provider can be created and configured, but API calls through `inference.local` will fail until OAuth token generation is implemented. Direct API calls from sandbox code using the Anthropic SDK work if you handle authentication yourself. + +:::: + ::::: ## Set Inference Routing diff --git a/docs/sandboxes/manage-providers.md b/docs/sandboxes/manage-providers.md index 6d35766bf..bd75b978f 100644 --- a/docs/sandboxes/manage-providers.md +++ b/docs/sandboxes/manage-providers.md @@ -179,6 +179,7 @@ The following provider types are supported. | `nvidia` | `NVIDIA_API_KEY` | NVIDIA API Catalog | | `openai` | `OPENAI_API_KEY` | Any OpenAI-compatible endpoint. Set `--config OPENAI_BASE_URL` to point to the provider. Refer to {doc}`/inference/configure`. | | `opencode` | `OPENCODE_API_KEY`, `OPENROUTER_API_KEY`, `OPENAI_API_KEY` | opencode tool | +| `vertex` | `ANTHROPIC_VERTEX_PROJECT_ID` | Google Cloud Vertex AI with Claude models. Requires GCP Application Default Credentials. **Note:** OAuth authentication not yet fully implemented. | :::{tip} Use the `generic` type for any service not listed above. You define the From 17bf43411f27258c0e3297b8fc2a8ed6c4a0aebc Mon Sep 17 00:00:00 2001 From: itdove Date: Mon, 6 Apr 2026 18:45:53 -0400 Subject: [PATCH 04/18] feat(vertex): implement GCP OAuth authentication for Vertex AI - Add gcp_auth dependency for OAuth token generation - Generate OAuth tokens from Application Default Credentials in vertex provider - Store tokens as VERTEX_OAUTH_TOKEN credential for router authentication - Update inference profile to use Bearer auth with OAuth tokens - Construct Vertex-specific URLs with :streamRawPredict endpoint - Support project ID from credentials for URL construction - Add model parameter to build_backend_url for Vertex routing --- Cargo.lock | 38 +++++++++++++++ crates/openshell-core/src/inference.rs | 11 +++-- crates/openshell-providers/Cargo.toml | 2 + .../src/providers/vertex.rs | 48 +++++++++++++++++-- crates/openshell-router/src/backend.rs | 41 +++++++++++++--- crates/openshell-server/src/inference.rs | 28 ++++++++++- 6 files changed, 153 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 98797cc24..1e2b542ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1509,6 +1509,32 @@ dependencies = [ "slab", ] +[[package]] +name = "gcp_auth" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2b3d0b409a042a380111af38136310839af8ac1a0917fb6e84515ed1e4bf3ee" +dependencies = [ + "async-trait", + "base64 0.22.1", + "bytes", + "chrono", + "http", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-util", + "ring", + "rustls-pki-types", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "tracing", + "tracing-futures", + "url", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -2919,8 +2945,10 @@ dependencies = [ name = "openshell-providers" version = "0.0.0" dependencies = [ + "gcp_auth", "openshell-core", "thiserror 2.0.18", + "tokio", ] [[package]] @@ -5378,6 +5406,16 @@ dependencies = [ "valuable", ] +[[package]] +name = "tracing-futures" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2" +dependencies = [ + "pin-project", + "tracing", +] + [[package]] name = "tracing-log" version = "0.2.0" diff --git a/crates/openshell-core/src/inference.rs b/crates/openshell-core/src/inference.rs index 78fe72310..0973f25db 100644 --- a/crates/openshell-core/src/inference.rs +++ b/crates/openshell-core/src/inference.rs @@ -88,12 +88,15 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile { static VERTEX_PROFILE: InferenceProviderProfile = InferenceProviderProfile { provider_type: "vertex", + // Base URL template - actual URL constructed at request time with project/region/model default_base_url: "https://us-central1-aiplatform.googleapis.com/v1", protocols: ANTHROPIC_PROTOCOLS, - credential_key_names: &["ANTHROPIC_VERTEX_PROJECT_ID"], - base_url_config_keys: &["ANTHROPIC_VERTEX_REGION", "VERTEX_BASE_URL"], - auth: AuthHeader::Custom("x-api-key"), - default_headers: &[("anthropic-version", "2023-06-01")], + // Look for OAuth token first, fallback to project ID (for manual config) + credential_key_names: &["VERTEX_OAUTH_TOKEN", "ANTHROPIC_VERTEX_PROJECT_ID"], + base_url_config_keys: &["VERTEX_BASE_URL", "ANTHROPIC_VERTEX_REGION"], + // Vertex uses OAuth Bearer tokens, not x-api-key + auth: AuthHeader::Bearer, + default_headers: &[("anthropic-version", "vertex-2023-10-16")], }; /// Look up the inference provider profile for a given provider type. diff --git a/crates/openshell-providers/Cargo.toml b/crates/openshell-providers/Cargo.toml index 41f9ed6c0..0cf14ec2b 100644 --- a/crates/openshell-providers/Cargo.toml +++ b/crates/openshell-providers/Cargo.toml @@ -13,6 +13,8 @@ repository.workspace = true [dependencies] openshell-core = { path = "../openshell-core" } thiserror = { workspace = true } +gcp_auth = "0.12" +tokio = { workspace = true } [lints] workspace = true diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs index 92e77002a..ef7758670 100644 --- a/crates/openshell-providers/src/providers/vertex.rs +++ b/crates/openshell-providers/src/providers/vertex.rs @@ -2,7 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{ - ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, discover_with_spec, + DiscoveredProvider, ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, + discover_with_spec, }; pub struct VertexProvider; @@ -12,13 +13,54 @@ pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec { credential_env_vars: &["ANTHROPIC_VERTEX_PROJECT_ID"], }; +// Additional config keys for Vertex AI +const VERTEX_CONFIG_KEYS: &[&str] = &["ANTHROPIC_VERTEX_REGION"]; + +/// Generate an OAuth token from GCP Application Default Credentials for Vertex AI. +/// +/// Returns `None` if ADC is not configured or token generation fails. +async fn generate_oauth_token() -> Option { + // Try to find an appropriate token provider (checks ADC, service account, metadata server, etc.) + let provider = gcp_auth::provider().await.ok()?; + + // Get token for Vertex AI scope + // Vertex AI uses the Cloud Platform scope + let scopes = &["https://www.googleapis.com/auth/cloud-platform"]; + let token = provider.token(scopes).await.ok()?; + + Some(token.as_str().to_string()) +} + impl ProviderPlugin for VertexProvider { fn id(&self) -> &'static str { SPEC.id } - fn discover_existing(&self) -> Result, ProviderError> { - discover_with_spec(&SPEC, &RealDiscoveryContext) + fn discover_existing(&self) -> Result, ProviderError> { + let mut discovered = discover_with_spec(&SPEC, &RealDiscoveryContext)?; + + // Add region config if present + if let Some(ref mut provider) = discovered { + for &key in VERTEX_CONFIG_KEYS { + if let Ok(value) = std::env::var(key) { + provider.config.insert(key.to_string(), value); + } + } + + // Generate OAuth token from Application Default Credentials + // This replaces the project ID credential with an actual OAuth token + // that can be used for API authentication + let rt = tokio::runtime::Runtime::new() + .map_err(|e| ProviderError::UnsupportedProvider(format!("failed to create tokio runtime: {e}")))?; + + if let Some(token) = rt.block_on(generate_oauth_token()) { + // Store the OAuth token as VERTEX_OAUTH_TOKEN + // The inference router will use this as the Bearer token + provider.credentials.insert("VERTEX_OAUTH_TOKEN".to_string(), token); + } + } + + Ok(discovered) } fn credential_env_vars(&self) -> &'static [&'static str] { diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs index d1d7092c0..3698441f7 100644 --- a/crates/openshell-router/src/backend.rs +++ b/crates/openshell-router/src/backend.rs @@ -95,7 +95,7 @@ async fn send_backend_request( headers: Vec<(String, String)>, body: bytes::Bytes, ) -> Result { - let url = build_backend_url(&route.endpoint, path); + let url = build_backend_url(&route.endpoint, path, &route.model); let reqwest_method: reqwest::Method = method .parse() @@ -241,7 +241,7 @@ pub async fn verify_backend_endpoint( if mock::is_mock_route(route) { return Ok(ValidatedEndpoint { - url: build_backend_url(&route.endpoint, probe.path), + url: build_backend_url(&route.endpoint, probe.path, &route.model), protocol: probe.protocol.to_string(), }); } @@ -306,7 +306,7 @@ async fn try_validation_request( details, }, })?; - let url = build_backend_url(&route.endpoint, path); + let url = build_backend_url(&route.endpoint, path, &route.model); if response.status().is_success() { return Ok(ValidatedEndpoint { @@ -418,8 +418,23 @@ pub async fn proxy_to_backend_streaming( }) } -fn build_backend_url(endpoint: &str, path: &str) -> String { +fn build_backend_url(endpoint: &str, path: &str, model: &str) -> String { let base = endpoint.trim_end_matches('/'); + + // Special handling for Vertex AI + if base.contains("aiplatform.googleapis.com") && path.starts_with("/v1/messages") { + // Vertex AI uses a different path structure: + // https://{region}-aiplatform.googleapis.com/v1/projects/{project}/locations/{region}/publishers/anthropic/models/{model}:streamRawPredict + // The base already has everything up to /models, so we append /{model}:streamRawPredict + let model_suffix = if model.is_empty() { + String::new() + } else { + format!("/{}", model) + }; + return format!("{}{}:streamRawPredict", base, model_suffix); + } + + // Deduplicate /v1 prefix for standard endpoints if base.ends_with("/v1") && (path == "/v1" || path.starts_with("/v1/")) { return format!("{base}{}", &path[3..]); } @@ -438,7 +453,7 @@ mod tests { #[test] fn build_backend_url_dedupes_v1_prefix() { assert_eq!( - build_backend_url("https://api.openai.com/v1", "/v1/chat/completions"), + build_backend_url("https://api.openai.com/v1", "/v1/chat/completions", "gpt-4"), "https://api.openai.com/v1/chat/completions" ); } @@ -446,15 +461,27 @@ mod tests { #[test] fn build_backend_url_preserves_non_versioned_base() { assert_eq!( - build_backend_url("https://api.anthropic.com", "/v1/messages"), + build_backend_url("https://api.anthropic.com", "/v1/messages", "claude-3"), "https://api.anthropic.com/v1/messages" ); } + #[test] + fn build_backend_url_handles_vertex_ai() { + assert_eq!( + build_backend_url( + "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1/publishers/anthropic/models", + "/v1/messages", + "claude-3-5-sonnet-20241022" + ), + "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1/publishers/anthropic/models/claude-3-5-sonnet-20241022:streamRawPredict" + ); + } + #[test] fn build_backend_url_handles_exact_v1_path() { assert_eq!( - build_backend_url("https://api.openai.com/v1", "/v1"), + build_backend_url("https://api.openai.com/v1", "/v1", "gpt-4"), "https://api.openai.com/v1" ); } diff --git a/crates/openshell-server/src/inference.rs b/crates/openshell-server/src/inference.rs index 5d4014b7a..5faa30518 100644 --- a/crates/openshell-server/src/inference.rs +++ b/crates/openshell-server/src/inference.rs @@ -250,11 +250,37 @@ fn resolve_provider_route(provider: &Provider) -> Result Date: Mon, 6 Apr 2026 18:50:57 -0400 Subject: [PATCH 05/18] fix(vertex): use separate thread for OAuth token generation Avoid tokio runtime nesting panic by spawning OAuth token generation in a separate OS thread with its own runtime. This allows provider discovery to work when called from within an existing tokio context. --- .../openshell-providers/src/providers/vertex.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs index ef7758670..0669c8067 100644 --- a/crates/openshell-providers/src/providers/vertex.rs +++ b/crates/openshell-providers/src/providers/vertex.rs @@ -48,12 +48,17 @@ impl ProviderPlugin for VertexProvider { } // Generate OAuth token from Application Default Credentials - // This replaces the project ID credential with an actual OAuth token - // that can be used for API authentication - let rt = tokio::runtime::Runtime::new() - .map_err(|e| ProviderError::UnsupportedProvider(format!("failed to create tokio runtime: {e}")))?; - - if let Some(token) = rt.block_on(generate_oauth_token()) { + // Try to generate token, but don't fail if we're in a nested runtime context + let token = std::thread::spawn(|| { + tokio::runtime::Runtime::new() + .ok() + .and_then(|rt| rt.block_on(generate_oauth_token())) + }) + .join() + .ok() + .flatten(); + + if let Some(token) = token { // Store the OAuth token as VERTEX_OAUTH_TOKEN // The inference router will use this as the Bearer token provider.credentials.insert("VERTEX_OAUTH_TOKEN".to_string(), token); From f606dc37cf261ab29461a2da659bfc94a2a11c8f Mon Sep 17 00:00:00 2001 From: itdove Date: Mon, 6 Apr 2026 19:17:58 -0400 Subject: [PATCH 06/18] feat(scripts): improve cleanup script with sandbox deletion and better ordering - Delete all sandboxes before destroying gateway - Explicitly stop and remove cluster and registry containers by name - Remove images by specific tags (localhost/openshell/*) - Run cargo clean for build artifacts - Add reinstall instructions to completion message - Better error handling with 2>/dev/null redirects --- cleanup-openshell-podman-macos.sh | 46 +++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/cleanup-openshell-podman-macos.sh b/cleanup-openshell-podman-macos.sh index 43efd8dd5..d6b80a411 100755 --- a/cleanup-openshell-podman-macos.sh +++ b/cleanup-openshell-podman-macos.sh @@ -11,19 +11,43 @@ set -e echo "=== OpenShell Podman Cleanup Script ===" echo "" +# Delete all sandboxes first (before destroying gateway) +echo "Deleting all sandboxes..." +if command -v openshell &>/dev/null; then + # Get list of sandboxes and delete each one + openshell sandbox list --no-header 2>/dev/null | awk '{print $1}' | while read -r sandbox; do + if [ -n "$sandbox" ]; then + echo " Deleting sandbox: $sandbox" + openshell sandbox delete "$sandbox" 2>/dev/null || true + fi + done +fi + # Destroy OpenShell gateway (if it exists) echo "Destroying OpenShell gateway..." if command -v openshell &>/dev/null; then openshell gateway destroy --name openshell 2>/dev/null || true fi -# Stop and remove any running OpenShell containers -echo "Stopping OpenShell containers..." -podman ps -a | grep openshell | awk '{print $1}' | xargs -r podman rm -f || true +# Stop and remove cluster container +echo "Stopping cluster container..." +podman stop openshell-cluster-openshell 2>/dev/null || true +podman rm openshell-cluster-openshell 2>/dev/null || true + +# Stop and remove local registry container +echo "Stopping local registry..." +podman stop openshell-local-registry 2>/dev/null || true +podman rm openshell-local-registry 2>/dev/null || true + +# Stop and remove any other OpenShell containers +echo "Cleaning up remaining OpenShell containers..." +podman ps -a | grep openshell | awk '{print $1}' | xargs -r podman rm -f 2>/dev/null || true # Remove OpenShell images echo "Removing OpenShell images..." -podman images | grep -E "openshell|cluster" | awk '{print $3}' | xargs -r podman rmi -f || true +podman rmi localhost/openshell/cluster:dev 2>/dev/null || true +podman rmi localhost/openshell/gateway:dev 2>/dev/null || true +podman images | grep -E "openshell|127.0.0.1:5000/openshell" | awk '{print $3}' | xargs -r podman rmi -f 2>/dev/null || true # Remove CLI binary echo "Removing CLI binary..." @@ -41,8 +65,11 @@ rm -rf ~/.openshell echo "Removing build artifacts..." SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$SCRIPT_DIR" -rm -rf target/ -rm -rf deploy/docker/.build/ +if command -v cargo &>/dev/null; then + echo " Running cargo clean..." + cargo clean 2>/dev/null || true +fi +rm -rf deploy/docker/.build/ 2>/dev/null || true # Clean Podman cache echo "Cleaning Podman build cache..." @@ -51,6 +78,13 @@ podman system prune -af --volumes echo "" echo "=== Cleanup Complete ===" echo "" +echo "OpenShell containers, images, and configuration have been removed." +echo "" +echo "To reinstall OpenShell:" +echo " 1. source scripts/podman.env" +echo " 2. mise run cluster:build:full" +echo " 3. cargo install --path crates/openshell-cli --root ~/.local" +echo "" echo "To completely remove the OpenShell Podman machine:" echo " podman machine stop openshell" echo " podman machine rm openshell" From d36e58b21ff50f5b410b6edb011cefe55ca27322 Mon Sep 17 00:00:00 2001 From: itdove Date: Mon, 6 Apr 2026 19:45:17 -0400 Subject: [PATCH 07/18] feat(sandbox): inject Vertex AI credentials as actual environment variables Add selective direct injection for provider credentials that need to be accessible as real environment variables (not placeholders). This allows tools like `claude` CLI to read Vertex AI credentials directly. Changes: - Add direct_inject_credentials() list for credentials requiring direct access - Modify from_provider_env() to support selective direct injection - Inject ANTHROPIC_VERTEX_PROJECT_ID, VERTEX_OAUTH_TOKEN, and ANTHROPIC_VERTEX_REGION as actual values instead of placeholders - Other credentials continue using openshell:resolve:env:* placeholders for HTTP proxy resolution Security note: Directly injected credentials are visible via /proc/*/environ, unlike placeholder-based credentials which are only resolved within HTTP requests. Only credentials essential for CLI tool compatibility are included. --- crates/openshell-sandbox/src/secrets.rs | 53 +++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/crates/openshell-sandbox/src/secrets.rs b/crates/openshell-sandbox/src/secrets.rs index a27537c91..233056f07 100644 --- a/crates/openshell-sandbox/src/secrets.rs +++ b/crates/openshell-sandbox/src/secrets.rs @@ -10,6 +10,25 @@ const PLACEHOLDER_PREFIX: &str = "openshell:resolve:env:"; /// Public access to the placeholder prefix for fail-closed scanning in other modules. pub(crate) const PLACEHOLDER_PREFIX_PUBLIC: &str = PLACEHOLDER_PREFIX; +/// Credentials that should be injected as actual values into the sandbox environment +/// instead of being converted to placeholders. +/// +/// These credentials are needed by tools (like `claude` CLI) that read environment +/// variables directly rather than making HTTP requests through the proxy. +/// +/// **Security consideration**: These values are visible to all sandbox processes via +/// `/proc//environ`, unlike placeholder-based credentials which are only resolved +/// within HTTP requests. Only include credentials here when direct env var access is +/// required for tool compatibility. +fn direct_inject_credentials() -> &'static [&'static str] { + &[ + // Vertex AI credentials for claude CLI + "ANTHROPIC_VERTEX_PROJECT_ID", + "VERTEX_OAUTH_TOKEN", + "ANTHROPIC_VERTEX_REGION", + ] +} + /// Characters that are valid in an env var key name (used to extract /// placeholder boundaries within concatenated strings like path segments). fn is_env_key_char(b: u8) -> bool { @@ -69,6 +88,19 @@ pub struct SecretResolver { impl SecretResolver { pub(crate) fn from_provider_env( provider_env: HashMap, + ) -> (HashMap, Option) { + Self::from_provider_env_with_direct_inject(provider_env, &direct_inject_credentials()) + } + + /// Create a resolver from provider environment with selective direct injection. + /// + /// Credentials matching keys in `direct_inject` are injected as actual values + /// into the child environment (for tools like `claude` CLI that need real env vars). + /// All other credentials are converted to `openshell:resolve:env:*` placeholders + /// that get resolved by the HTTP proxy. + pub(crate) fn from_provider_env_with_direct_inject( + provider_env: HashMap, + direct_inject: &[&str], ) -> (HashMap, Option) { if provider_env.is_empty() { return (HashMap::new(), None); @@ -78,12 +110,25 @@ impl SecretResolver { let mut by_placeholder = HashMap::with_capacity(provider_env.len()); for (key, value) in provider_env { - let placeholder = placeholder_for_env_key(&key); - child_env.insert(key, placeholder.clone()); - by_placeholder.insert(placeholder, value); + // Check if this credential should be injected directly + if direct_inject.contains(&key.as_str()) { + // Direct injection: put actual value in environment + child_env.insert(key, value); + } else { + // Placeholder: will be resolved by HTTP proxy + let placeholder = placeholder_for_env_key(&key); + child_env.insert(key, placeholder.clone()); + by_placeholder.insert(placeholder, value); + } } - (child_env, Some(Self { by_placeholder })) + let resolver = if by_placeholder.is_empty() { + None + } else { + Some(Self { by_placeholder }) + }; + + (child_env, resolver) } /// Resolve a placeholder string to the real secret value. From 2dd3438a165a898bf3ff8c72aabbfbabab231dd9 Mon Sep 17 00:00:00 2001 From: itdove Date: Mon, 6 Apr 2026 20:03:59 -0400 Subject: [PATCH 08/18] feat(vertex): auto-inject CLAUDE_CODE_USE_VERTEX for claude CLI - Add CLAUDE_CODE_USE_VERTEX to direct injection list - Automatically set CLAUDE_CODE_USE_VERTEX=1 in Vertex provider credentials - Enables claude CLI to auto-detect Vertex AI without manual config Now sandboxes with Vertex provider will automatically have: - ANTHROPIC_VERTEX_PROJECT_ID (from env) - VERTEX_OAUTH_TOKEN (generated from GCP ADC) - CLAUDE_CODE_USE_VERTEX=1 (auto-set) The claude CLI can now use Vertex AI with zero manual configuration. --- crates/openshell-providers/src/providers/vertex.rs | 4 ++++ crates/openshell-sandbox/src/secrets.rs | 1 + 2 files changed, 5 insertions(+) diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs index 0669c8067..6daadd5f9 100644 --- a/crates/openshell-providers/src/providers/vertex.rs +++ b/crates/openshell-providers/src/providers/vertex.rs @@ -47,6 +47,10 @@ impl ProviderPlugin for VertexProvider { } } + // Set CLAUDE_CODE_USE_VERTEX=1 to enable Vertex AI in claude CLI + // Must be in credentials (not config) to be injected into sandbox environment + provider.credentials.insert("CLAUDE_CODE_USE_VERTEX".to_string(), "1".to_string()); + // Generate OAuth token from Application Default Credentials // Try to generate token, but don't fail if we're in a nested runtime context let token = std::thread::spawn(|| { diff --git a/crates/openshell-sandbox/src/secrets.rs b/crates/openshell-sandbox/src/secrets.rs index 233056f07..0cd188b6e 100644 --- a/crates/openshell-sandbox/src/secrets.rs +++ b/crates/openshell-sandbox/src/secrets.rs @@ -26,6 +26,7 @@ fn direct_inject_credentials() -> &'static [&'static str] { "ANTHROPIC_VERTEX_PROJECT_ID", "VERTEX_OAUTH_TOKEN", "ANTHROPIC_VERTEX_REGION", + "CLAUDE_CODE_USE_VERTEX", ] } From bc3342de1a58a54550b8a5c2360528c561111e94 Mon Sep 17 00:00:00 2001 From: itdove Date: Mon, 6 Apr 2026 20:06:01 -0400 Subject: [PATCH 09/18] feat(podman): increase default memory to 12 GB for better build performance - Change Podman machine default memory from 8 GB to 12 GB - Update documentation to reflect 12 GB default - Update troubleshooting to suggest 16 GB for build issues 12 GB provides better performance for Rust compilation and reduces out-of-memory issues during parallel builds. --- docs/get-started/install-podman-macos.md | 6 +++--- scripts/setup-podman-macos.sh | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/get-started/install-podman-macos.md b/docs/get-started/install-podman-macos.md index 661abada0..648f11564 100644 --- a/docs/get-started/install-podman-macos.md +++ b/docs/get-started/install-podman-macos.md @@ -51,7 +51,7 @@ brew install podman mise The `scripts/setup-podman-macos.sh` script automates Podman Machine configuration: -- Creates a dedicated `openshell` Podman machine (8 GB RAM, 4 CPUs) +- Creates a dedicated `openshell` Podman machine (12 GB RAM, 4 CPUs) - Configures cgroup delegation (required for the embedded k3s cluster) - Stops conflicting machines (only one can run at a time, with user confirmation) @@ -161,11 +161,11 @@ openshell sandbox create ### Build fails with memory errors -Increase the Podman machine memory allocation: +Increase the Podman machine memory allocation (default is 12 GB): ```console podman machine stop openshell -podman machine set openshell --memory 8192 +podman machine set openshell --memory 16384 podman machine start openshell ``` diff --git a/scripts/setup-podman-macos.sh b/scripts/setup-podman-macos.sh index 1538259f3..979a51e3e 100755 --- a/scripts/setup-podman-macos.sh +++ b/scripts/setup-podman-macos.sh @@ -9,7 +9,7 @@ set -euo pipefail MACHINE_NAME="${PODMAN_MACHINE_NAME:-openshell}" -MEMORY="${PODMAN_MEMORY:-8192}" +MEMORY="${PODMAN_MEMORY:-12288}" CPUS="${PODMAN_CPUS:-4}" echo "=== OpenShell Podman Setup for macOS ===" From b08de19e134b32147a7eb56b7eb7edfe134fea47 Mon Sep 17 00:00:00 2001 From: itdove Date: Mon, 6 Apr 2026 20:19:40 -0400 Subject: [PATCH 10/18] fix(scripts): update CLI installation command in setup script Replace manual 'cargo build + cp' with 'cargo install --path' Add verification step with 'openshell gateway info' Keep correct 'mise run cluster:build:full' command --- scripts/setup-podman-macos.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/setup-podman-macos.sh b/scripts/setup-podman-macos.sh index 979a51e3e..02fdf2343 100755 --- a/scripts/setup-podman-macos.sh +++ b/scripts/setup-podman-macos.sh @@ -108,9 +108,9 @@ echo "Podman machine '${MACHINE_NAME}' is ready!" echo "" echo "Next steps:" echo " 1. Set up environment: source scripts/podman.env" -echo " 2. Build and deploy: mise run cluster:build:full" -echo " 3. Build CLI: cargo build --release -p openshell-cli" -echo " 4. Install CLI: cp target/release/openshell ~/.local/bin/" +echo " 2. Build and deploy cluster: mise run cluster:build:full" +echo " 3. Install CLI: cargo install --path crates/openshell-cli --root ~/.local" +echo " 4. Verify installation: openshell gateway info" echo "" echo "To make the environment persistent, add to your shell profile (~/.zshrc):" echo " source $(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/scripts/podman.env" From b56828e9efea9a60bd6e4e1b5cf7499373ec9ae1 Mon Sep 17 00:00:00 2001 From: itdove Date: Mon, 6 Apr 2026 23:15:18 -0400 Subject: [PATCH 11/18] fix(router): remove model field from Vertex AI request bodies Vertex AI's :streamRawPredict endpoint expects the model in the URL path, not in the request body. The router was incorrectly inserting the model field, causing "Extra inputs are not permitted" errors. Changes: - Router now detects Vertex AI endpoints and removes model field - Added bash 3 compatibility fix for cluster-deploy-fast.sh - Added scripts/rebuild-cluster.sh for development workflow - Updated documentation for Vertex AI setup and rebuild process Fixes inference routing to Vertex AI via inference.local endpoint. --- CONTRIBUTING.md | 17 ++++++++++++ crates/openshell-router/src/backend.rs | 19 ++++++++++--- docs/get-started/install-podman-macos.md | 25 ++++++++++++++++- docs/inference/configure.md | 15 ++++++----- docs/sandboxes/manage-providers.md | 3 ++- scripts/rebuild-cluster.sh | 34 ++++++++++++++++++++++++ tasks/scripts/cluster-deploy-fast.sh | 19 ++++++++++++- 7 files changed, 119 insertions(+), 13 deletions(-) create mode 100755 scripts/rebuild-cluster.sh diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 19a398a32..d759863a8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -176,6 +176,23 @@ These are the primary `mise` tasks for day-to-day development: | `mise run docs` | Build and serve documentation locally | | `mise run clean` | Clean build artifacts | +## Rebuilding After Code Changes + +When developing OpenShell core components (gateway, router, sandbox supervisor), you need to rebuild the cluster to test your changes: + +```bash +bash scripts/rebuild-cluster.sh +``` + +This script stops the cluster, rebuilds the image with your changes, and restarts it. + +**After rebuilding:** +- Providers need to be recreated (gateway database was reset) +- Inference routing needs to be reconfigured +- Sandboxes need to be recreated + +For a complete cleanup, see the cleanup scripts in the `scripts/` directory. + ## Project Structure | Path | Purpose | diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs index 3698441f7..9b5d1a000 100644 --- a/crates/openshell-router/src/backend.rs +++ b/crates/openshell-router/src/backend.rs @@ -137,13 +137,24 @@ async fn send_backend_request( // Set the "model" field in the JSON body to the route's configured model so the // backend receives the correct model ID regardless of what the client sent. + // + // Exception: Vertex AI's :streamRawPredict endpoint expects the model in the URL + // path (already handled in build_backend_url), not in the request body. + let is_vertex_ai = route.endpoint.contains("aiplatform.googleapis.com"); + let body = match serde_json::from_slice::(&body) { Ok(mut json) => { if let Some(obj) = json.as_object_mut() { - obj.insert( - "model".to_string(), - serde_json::Value::String(route.model.clone()), - ); + if is_vertex_ai { + // Remove model field for Vertex AI (it's in the URL path) + obj.remove("model"); + } else { + // Insert/override model field for standard backends + obj.insert( + "model".to_string(), + serde_json::Value::String(route.model.clone()), + ); + } } bytes::Bytes::from(serde_json::to_vec(&json).unwrap_or_else(|_| body.to_vec())) } diff --git a/docs/get-started/install-podman-macos.md b/docs/get-started/install-podman-macos.md index 648f11564..abc0a3ac6 100644 --- a/docs/get-started/install-podman-macos.md +++ b/docs/get-started/install-podman-macos.md @@ -132,14 +132,37 @@ Verify the gateway is healthy: openshell gateway info ``` +## Rebuilding After Code Changes + +If you're developing OpenShell and need to test code changes, use the rebuild script: + +```console +bash scripts/rebuild-cluster.sh +``` + +This stops the cluster, removes the old image, rebuilds with your changes, and restarts. After rebuilding: +1. Recreate providers (gateway database was reset) +2. Reconfigure inference routing if needed +3. Recreate sandboxes + ## Cleanup -To remove all OpenShell resources and optionally the Podman machine: +### Quick Rebuild (Development) + +```console +bash scripts/rebuild-cluster.sh +``` + +Rebuilds the cluster with latest code changes. Use this during development. + +### Full Cleanup (Start Fresh) ```console bash cleanup-openshell-podman-macos.sh ``` +Removes all OpenShell resources and optionally the Podman machine. Use this to completely reset your installation. + ## Troubleshooting ### Environment variables not set diff --git a/docs/inference/configure.md b/docs/inference/configure.md index 4798bc09c..e13567135 100644 --- a/docs/inference/configure.md +++ b/docs/inference/configure.md @@ -104,23 +104,26 @@ This reads `ANTHROPIC_API_KEY` from your environment. ```console $ export ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id -$ openshell provider create --name vertex-claude --type vertex --from-existing +$ export ANTHROPIC_VERTEX_REGION=us-east5 # Optional, defaults to us-central1 +$ openshell provider create --name vertex --type vertex --from-existing ``` -This reads `ANTHROPIC_VERTEX_PROJECT_ID` from your environment and makes it available inside sandboxes. +This reads `ANTHROPIC_VERTEX_PROJECT_ID` and `ANTHROPIC_VERTEX_REGION` from your environment and automatically generates OAuth tokens from GCP Application Default Credentials. **Prerequisites:** -- Google Cloud project with Vertex AI API enabled +- Google Cloud project with Vertex AI API enabled and Claude models available - Application Default Credentials configured: `gcloud auth application-default login` +- The `~/.config/gcloud/` directory must be uploaded to sandboxes for OAuth token refresh **Usage:** -- **Direct API calls:** Attach this provider to sandboxes to inject the project ID credential. Call Vertex AI directly from your code using the Anthropic SDK. -- **Inference routing:** Configure `inference.local` to proxy requests to Vertex AI (see "Set Inference Routing" section below). +- **Direct API calls:** Tools like `claude` CLI automatically use Vertex AI when `CLAUDE_CODE_USE_VERTEX=1` is set +- **Inference routing:** Configure `inference.local` to proxy requests to Vertex AI (see "Set Inference Routing" section below) -**Known Limitation:** When using inference routing, GCP OAuth authentication is not yet fully implemented. The provider can be created and configured, but API calls through `inference.local` will fail until OAuth token generation is implemented. Direct API calls from sandbox code using the Anthropic SDK work if you handle authentication yourself. +**Model ID Format:** Use `@` separator for versions (e.g., `claude-sonnet-4-5@20250929`) :::: + ::::: ## Set Inference Routing diff --git a/docs/sandboxes/manage-providers.md b/docs/sandboxes/manage-providers.md index bd75b978f..716c16f5a 100644 --- a/docs/sandboxes/manage-providers.md +++ b/docs/sandboxes/manage-providers.md @@ -179,7 +179,7 @@ The following provider types are supported. | `nvidia` | `NVIDIA_API_KEY` | NVIDIA API Catalog | | `openai` | `OPENAI_API_KEY` | Any OpenAI-compatible endpoint. Set `--config OPENAI_BASE_URL` to point to the provider. Refer to {doc}`/inference/configure`. | | `opencode` | `OPENCODE_API_KEY`, `OPENROUTER_API_KEY`, `OPENAI_API_KEY` | opencode tool | -| `vertex` | `ANTHROPIC_VERTEX_PROJECT_ID` | Google Cloud Vertex AI with Claude models. Requires GCP Application Default Credentials. **Note:** OAuth authentication not yet fully implemented. | +| `vertex` | `ANTHROPIC_VERTEX_PROJECT_ID`, `VERTEX_OAUTH_TOKEN`, `CLAUDE_CODE_USE_VERTEX` | Google Cloud Vertex AI with Claude models. Automatically generates OAuth tokens from GCP Application Default Credentials. Set `ANTHROPIC_VERTEX_REGION` (optional, defaults to `us-central1`) to control the region. | :::{tip} Use the `generic` type for any service not listed above. You define the @@ -194,6 +194,7 @@ The following providers have been tested with `inference.local`. Any provider th |---|---|---|---|---| | NVIDIA API Catalog | `nvidia-prod` | `nvidia` | `https://integrate.api.nvidia.com/v1` | `NVIDIA_API_KEY` | | Anthropic | `anthropic-prod` | `anthropic` | `https://api.anthropic.com` | `ANTHROPIC_API_KEY` | +| Google Vertex AI | `vertex` | `vertex` | Auto-configured per region | `ANTHROPIC_VERTEX_PROJECT_ID` (OAuth auto-generated) | | Baseten | `baseten` | `openai` | `https://inference.baseten.co/v1` | `OPENAI_API_KEY` | | Bitdeer AI | `bitdeer` | `openai` | `https://api-inference.bitdeer.ai/v1` | `OPENAI_API_KEY` | | Deepinfra | `deepinfra` | `openai` | `https://api.deepinfra.com/v1/openai` | `OPENAI_API_KEY` | diff --git a/scripts/rebuild-cluster.sh b/scripts/rebuild-cluster.sh new file mode 100755 index 000000000..f836a832a --- /dev/null +++ b/scripts/rebuild-cluster.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Quick rebuild script for development +# Restarts the cluster container with the latest code changes + +set -euo pipefail + +echo "=== OpenShell Quick Rebuild ===" +echo "" + +# Stop and remove cluster container +echo "Stopping cluster container..." +podman stop openshell-cluster-openshell 2>/dev/null || true +podman rm openshell-cluster-openshell 2>/dev/null || true + +# Remove old cluster image +echo "Removing old cluster image..." +podman rmi localhost/openshell/cluster:dev 2>/dev/null || true + +# Rebuild and start cluster +echo "Rebuilding cluster with latest code..." +mise run cluster:build:full + +echo "" +echo "=== Rebuild Complete ===" +echo "" +echo "Next steps:" +echo " 1. Recreate provider: openshell provider create --name --type --from-existing" +echo " 2. Configure inference: openshell inference set --provider --model " +echo " 3. Recreate sandboxes: openshell sandbox create ..." +echo "" diff --git a/tasks/scripts/cluster-deploy-fast.sh b/tasks/scripts/cluster-deploy-fast.sh index 86fe9746d..9bdc6a604 100755 --- a/tasks/scripts/cluster-deploy-fast.sh +++ b/tasks/scripts/cluster-deploy-fast.sh @@ -28,6 +28,23 @@ log_duration() { echo "${label} took $((end - start))s" } +# Read lines into an array variable (bash 3 & 4 compatible) +# Usage: read_lines_into_array array_name < <(command) +read_lines_into_array() { + local array_name=$1 + if ((BASH_VERSINFO[0] >= 4)); then + # Bash 4+: use mapfile (faster) + mapfile -t "$array_name" + else + # Bash 3: use while loop + local line + eval "$array_name=()" + while IFS= read -r line; do + eval "$array_name+=(\"\$line\")" + done + fi +} + if ! $CONTAINER_RUNTIME ps -q --filter "name=^${CONTAINER_NAME}$" --filter "health=healthy" | grep -q .; then echo "Error: Cluster container '${CONTAINER_NAME}' is not running or not healthy." echo "Start the cluster first with: mise run cluster" @@ -86,7 +103,7 @@ fi declare -a changed_files=() detect_start=$(date +%s) -mapfile -t changed_files < <( +read_lines_into_array changed_files < <( { git diff --name-only git diff --name-only --cached From 308dc5cfd3f1358432e8d849460d1d6250877a3a Mon Sep 17 00:00:00 2001 From: itdove Date: Mon, 6 Apr 2026 23:22:08 -0400 Subject: [PATCH 12/18] docs: add Vertex AI example with network policy Added examples/vertex-ai/ directory with: - sandbox-policy.yaml: Network policy for Vertex AI endpoints - README.md: Quick start guide with links to full documentation Provides ready-to-use policy file for Vertex AI integration. --- examples/vertex-ai/README.md | 46 +++++++++++++++++++++ examples/vertex-ai/sandbox-policy.yaml | 55 ++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 examples/vertex-ai/README.md create mode 100644 examples/vertex-ai/sandbox-policy.yaml diff --git a/examples/vertex-ai/README.md b/examples/vertex-ai/README.md new file mode 100644 index 000000000..ec0cdf78a --- /dev/null +++ b/examples/vertex-ai/README.md @@ -0,0 +1,46 @@ +# Google Cloud Vertex AI Example + +This example demonstrates how to use OpenShell with Google Cloud Vertex AI to run Claude models via GCP infrastructure. + +## Quick Start + +```bash +# Configure GCP credentials +export ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id +gcloud auth application-default login + +# Create provider +openshell provider create --name vertex --type vertex --from-existing + +# Create sandbox with policy +openshell sandbox create --name vertex-test --provider vertex \ + --upload ~/.config/gcloud/:.config/gcloud/ \ + --policy examples/vertex-ai/sandbox-policy.yaml + +# Inside sandbox +claude # Automatically uses Vertex AI +``` + +## What's Included + +- **`sandbox-policy.yaml`**: Network policy allowing Google OAuth and Vertex AI endpoints + - Supports major GCP regions (us-east5, us-central1, us-west1, europe-west1, europe-west4, asia-northeast1) + - Enables direct Claude CLI usage + - Enables `inference.local` routing + +## Documentation + +For detailed setup instructions, troubleshooting, and configuration options, see: + +- [Vertex AI Provider Configuration](../../docs/inference/configure.md#google-cloud-vertex-ai) +- [Provider Management](../../docs/sandboxes/manage-providers.md) +- [Inference Routing](../../docs/inference/configure.md) + +## Adding Regions + +To support additional GCP regions, add them to `sandbox-policy.yaml`: + +```yaml +- host: asia-southeast1-aiplatform.googleapis.com + port: 443 +``` diff --git a/examples/vertex-ai/sandbox-policy.yaml b/examples/vertex-ai/sandbox-policy.yaml new file mode 100644 index 000000000..81fa36d10 --- /dev/null +++ b/examples/vertex-ai/sandbox-policy.yaml @@ -0,0 +1,55 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Sandbox policy for Google Cloud Vertex AI +# +# This policy allows sandboxes to access Google Cloud endpoints required for +# Vertex AI with Anthropic Claude models. + +version: 1 + +network_policies: + google_vertex: + name: google-vertex + endpoints: + # Google OAuth endpoints for authentication + - host: oauth2.googleapis.com + port: 443 + - host: accounts.google.com + port: 443 + - host: www.googleapis.com + port: 443 + + # Vertex AI endpoints (global and regional) + - host: aiplatform.googleapis.com + port: 443 + - host: us-east5-aiplatform.googleapis.com + port: 443 + - host: us-central1-aiplatform.googleapis.com + port: 443 + - host: us-west1-aiplatform.googleapis.com + port: 443 + - host: europe-west1-aiplatform.googleapis.com + port: 443 + - host: europe-west4-aiplatform.googleapis.com + port: 443 + - host: asia-northeast1-aiplatform.googleapis.com + port: 443 + + binaries: + # Claude CLI for direct Vertex AI usage + - path: /usr/local/bin/claude + # Python for Anthropic SDK usage + - path: /usr/bin/python3 + # curl for testing + - path: /usr/bin/curl + + inference_local: + name: inference-local + endpoints: + # Local inference routing endpoint + - host: inference.local + port: 80 + binaries: + - path: /usr/bin/curl + - path: /usr/bin/python3 From 83a94b9fbc61951e7997fbeeedf6ac2dbc787747 Mon Sep 17 00:00:00 2001 From: itdove Date: Tue, 7 Apr 2026 09:53:28 -0400 Subject: [PATCH 13/18] fix(build): handle Podman --push flag and array expansion Podman does not support --push flag in build command like Docker buildx. This commit fixes two issues: 1. docker-build-image.sh: Filter out --push flag and execute push as separate command after build completes 2. docker-publish-multiarch.sh: Use safe array expansion syntax to avoid unbound variable errors with set -u when EXTRA_TAGS is empty Note: Multi-arch builds with Podman still require manual workflow due to cross-compilation toolchain issues. Use /tmp/build-multiarch-local.sh for local multi-arch builds with QEMU emulation. Co-Authored-By: Claude Sonnet 4.5 --- tasks/scripts/docker-build-image.sh | 11 ++++++++++- tasks/scripts/docker-publish-multiarch.sh | 4 ++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tasks/scripts/docker-build-image.sh b/tasks/scripts/docker-build-image.sh index 38b200a2e..a76b01d12 100755 --- a/tasks/scripts/docker-build-image.sh +++ b/tasks/scripts/docker-build-image.sh @@ -212,11 +212,13 @@ if [[ "${CONTAINER_RUNTIME}" == "podman" ]]; then ARCH_ARGS+=(--build-arg "BUILDARCH=${TARGETARCH}") fi - # Filter OUTPUT_ARGS: Podman stores images locally by default (no --load) + # Filter OUTPUT_ARGS: Podman doesn't support --load or --push in build command PODMAN_OUTPUT_ARGS=() + PODMAN_SHOULD_PUSH=0 for arg in ${OUTPUT_ARGS[@]+"${OUTPUT_ARGS[@]}"}; do case "${arg}" in --load) ;; # implicit in Podman + --push) PODMAN_SHOULD_PUSH=1 ;; # push after build *) PODMAN_OUTPUT_ARGS+=("${arg}") ;; esac done @@ -227,6 +229,13 @@ if [[ "${CONTAINER_RUNTIME}" == "podman" ]]; then ${TLS_ARGS[@]+"${TLS_ARGS[@]}"} \ ${PODMAN_OUTPUT_ARGS[@]+"${PODMAN_OUTPUT_ARGS[@]}"} \ . + + # Push after build if requested (Podman doesn't support --push in build) + if [[ "${PODMAN_SHOULD_PUSH}" == "1" && "${IS_FINAL_IMAGE}" == "1" ]]; then + echo "Pushing ${IMAGE_NAME}:${IMAGE_TAG}..." + podman_local_tls_args "${IMAGE_NAME}" + podman push ${PODMAN_TLS_ARGS[@]+"${PODMAN_TLS_ARGS[@]}"} "${IMAGE_NAME}:${IMAGE_TAG}" + fi else # Docker: use buildx docker buildx build \ diff --git a/tasks/scripts/docker-publish-multiarch.sh b/tasks/scripts/docker-publish-multiarch.sh index f83a7c203..e8185a952 100755 --- a/tasks/scripts/docker-publish-multiarch.sh +++ b/tasks/scripts/docker-publish-multiarch.sh @@ -50,7 +50,7 @@ echo echo "Building multi-arch cluster image..." tasks/scripts/docker-build-image.sh cluster -TAGS_TO_APPLY=("${EXTRA_TAGS[@]}") +TAGS_TO_APPLY=(${EXTRA_TAGS[@]+"${EXTRA_TAGS[@]}"}) if [[ "${TAG_LATEST}" == "true" ]]; then TAGS_TO_APPLY+=("latest") fi @@ -58,7 +58,7 @@ fi if [[ ${#TAGS_TO_APPLY[@]} -gt 0 ]]; then for component in gateway cluster; do full_image="${REGISTRY}/${component}" - for tag in "${TAGS_TO_APPLY[@]}"; do + for tag in ${TAGS_TO_APPLY[@]+"${TAGS_TO_APPLY[@]}"}; do [[ "${tag}" == "${IMAGE_TAG}" ]] && continue echo "Tagging ${full_image}:${tag}..." if [[ "${CONTAINER_RUNTIME}" == "podman" ]]; then From b2d65457a193561ffcfde5ffce6545608c0e3f35 Mon Sep 17 00:00:00 2001 From: itdove Date: Tue, 7 Apr 2026 12:55:19 -0400 Subject: [PATCH 14/18] feat(build): add Podman multi-arch support to docker-publish-multiarch.sh Add Podman-specific multi-architecture build logic to complement existing Docker buildx support. Podman builds each platform sequentially using manifest lists, while Docker buildx builds in parallel. Changes: - Detect Podman and use manifest-based approach for multi-arch builds - Build each platform (arm64, amd64) separately with explicit TARGETARCH - Create and push manifest list combining all architectures - Preserve existing Docker buildx workflow unchanged - Add informative logging about sequential vs parallel builds Build times: - Podman: Sequential builds (~30-40 min on Linux, ~45-60 min on macOS) - Docker buildx: Parallel builds (~20-30 min) This enables multi-arch image publishing on systems using Podman as the container runtime, supporting both Apple Silicon and Intel architectures. --- tasks/scripts/docker-publish-multiarch.sh | 66 +++++++++++++++++++---- 1 file changed, 57 insertions(+), 9 deletions(-) diff --git a/tasks/scripts/docker-publish-multiarch.sh b/tasks/scripts/docker-publish-multiarch.sh index e8185a952..398c97c00 100755 --- a/tasks/scripts/docker-publish-multiarch.sh +++ b/tasks/scripts/docker-publish-multiarch.sh @@ -27,8 +27,56 @@ fi if [[ "${CONTAINER_RUNTIME}" == "podman" ]]; then echo "Using Podman for multi-arch build (podman manifest)" + echo "Note: Podman builds platforms sequentially (slower than Docker buildx)" export DOCKER_BUILDER="" + + # Podman: build each platform separately and create manifest + IFS=',' read -ra PLATFORM_ARRAY <<< "${PLATFORMS}" + + for component in gateway cluster; do + full_image="${REGISTRY}/${component}" + echo "" + echo "=== Building multi-arch ${component} image ===" + + # Create manifest list + podman manifest rm "${full_image}:${IMAGE_TAG}" 2>/dev/null || true + podman manifest create "${full_image}:${IMAGE_TAG}" + + # Build for each platform + for platform in "${PLATFORM_ARRAY[@]}"; do + arch="${platform##*/}" + case "${arch}" in + amd64) target_arch="amd64" ;; + arm64) target_arch="arm64" ;; + *) echo "Unsupported arch: ${arch}" >&2; exit 1 ;; + esac + + echo "Building ${component} for ${platform}..." + + # Package Helm chart for cluster builds + if [[ "${component}" == "cluster" ]]; then + mkdir -p deploy/docker/.build/charts + helm package deploy/helm/openshell -d deploy/docker/.build/charts/ >/dev/null + fi + + # Build with explicit TARGETARCH/BUILDARCH to avoid cross-compilation + # (QEMU emulation handles running the different architecture) + podman build --platform "${platform}" \ + --build-arg TARGETARCH="${target_arch}" \ + --build-arg BUILDARCH="${target_arch}" \ + --manifest "${full_image}:${IMAGE_TAG}" \ + -f deploy/docker/Dockerfile.images \ + --target "${component}" \ + . + done + + # Push manifest + echo "Pushing ${full_image}:${IMAGE_TAG}..." + podman manifest push "${full_image}:${IMAGE_TAG}" \ + "docker://${full_image}:${IMAGE_TAG}" + done else + # Docker: use buildx BUILDER_NAME=${DOCKER_BUILDER:-multiarch} if docker buildx inspect "${BUILDER_NAME}" >/dev/null 2>&1; then echo "Using existing buildx builder: ${BUILDER_NAME}" @@ -38,17 +86,17 @@ else docker buildx create --name "${BUILDER_NAME}" --use --bootstrap fi export DOCKER_BUILDER="${BUILDER_NAME}" -fi -export DOCKER_PLATFORM="${PLATFORMS}" -export DOCKER_PUSH=1 -export IMAGE_REGISTRY="${REGISTRY}" + export DOCKER_PLATFORM="${PLATFORMS}" + export DOCKER_PUSH=1 + export IMAGE_REGISTRY="${REGISTRY}" -echo "Building multi-arch gateway image..." -tasks/scripts/docker-build-image.sh gateway + echo "Building multi-arch gateway image..." + tasks/scripts/docker-build-image.sh gateway -echo -echo "Building multi-arch cluster image..." -tasks/scripts/docker-build-image.sh cluster + echo + echo "Building multi-arch cluster image..." + tasks/scripts/docker-build-image.sh cluster +fi TAGS_TO_APPLY=(${EXTRA_TAGS[@]+"${EXTRA_TAGS[@]}"}) if [[ "${TAG_LATEST}" == "true" ]]; then From 8a27b2fa20dd1a882e7553986fe0fc9a90945f33 Mon Sep 17 00:00:00 2001 From: itdove Date: Tue, 7 Apr 2026 14:47:26 -0400 Subject: [PATCH 15/18] fix: apply cargo fmt formatting to vertex provider Fix CI formatting check failures: - Split long .insert() calls across multiple lines - Reformat MockDiscoveryContext initialization No functional changes, formatting only. --- crates/openshell-providers/src/providers/vertex.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs index 6daadd5f9..de8d45d31 100644 --- a/crates/openshell-providers/src/providers/vertex.rs +++ b/crates/openshell-providers/src/providers/vertex.rs @@ -49,7 +49,9 @@ impl ProviderPlugin for VertexProvider { // Set CLAUDE_CODE_USE_VERTEX=1 to enable Vertex AI in claude CLI // Must be in credentials (not config) to be injected into sandbox environment - provider.credentials.insert("CLAUDE_CODE_USE_VERTEX".to_string(), "1".to_string()); + provider + .credentials + .insert("CLAUDE_CODE_USE_VERTEX".to_string(), "1".to_string()); // Generate OAuth token from Application Default Credentials // Try to generate token, but don't fail if we're in a nested runtime context @@ -65,7 +67,9 @@ impl ProviderPlugin for VertexProvider { if let Some(token) = token { // Store the OAuth token as VERTEX_OAUTH_TOKEN // The inference router will use this as the Bearer token - provider.credentials.insert("VERTEX_OAUTH_TOKEN".to_string(), token); + provider + .credentials + .insert("VERTEX_OAUTH_TOKEN".to_string(), token); } } @@ -85,8 +89,8 @@ mod tests { #[test] fn discovers_vertex_env_credentials() { - let ctx = MockDiscoveryContext::new() - .with_env("ANTHROPIC_VERTEX_PROJECT_ID", "my-gcp-project"); + let ctx = + MockDiscoveryContext::new().with_env("ANTHROPIC_VERTEX_PROJECT_ID", "my-gcp-project"); let discovered = discover_with_spec(&SPEC, &ctx) .expect("discovery") .expect("provider"); From 8241dc702323efd89281a42b458e84e22cd5b2b1 Mon Sep 17 00:00:00 2001 From: itdove Date: Tue, 7 Apr 2026 16:16:06 -0400 Subject: [PATCH 16/18] refactor: remove OAuth token storage from Vertex provider Remove short-lived OAuth token generation and storage in gateway database. Tokens are now generated on-demand inside sandboxes from uploaded ADC files. Changes: - Remove generate_oauth_token() function and gcp_auth dependency - Remove VERTEX_OAUTH_TOKEN from direct credential injection - Remove OAuth token insertion in discover_existing() - Add unset IMAGE_TAG/TAG_LATEST in podman.env to prevent build conflicts - Update Cargo.lock to remove gcp_auth dependency tree Benefits: - No stale token pollution in database - Tokens generated fresh on-demand (auto-refresh via ADC) - Simpler provider creation (synchronous, no async OAuth) - Reduced dependency footprint (removes 32 packages) - Better security (tokens not persisted in database) Token lifecycle: - Provider stores only ANTHROPIC_VERTEX_PROJECT_ID and region - Sandboxes require --upload ~/.config/gcloud/ for token generation - Claude CLI uses gcp_auth to generate/refresh tokens from ADC - Tokens valid for 1 hour, automatically refreshed via refresh token --- Cargo.lock | 38 ------------------- crates/openshell-providers/Cargo.toml | 2 - .../src/providers/vertex.rs | 37 ++---------------- crates/openshell-sandbox/src/secrets.rs | 4 +- scripts/podman.env | 5 +++ 5 files changed, 12 insertions(+), 74 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1e2b542ee..98797cc24 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1509,32 +1509,6 @@ dependencies = [ "slab", ] -[[package]] -name = "gcp_auth" -version = "0.12.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2b3d0b409a042a380111af38136310839af8ac1a0917fb6e84515ed1e4bf3ee" -dependencies = [ - "async-trait", - "base64 0.22.1", - "bytes", - "chrono", - "http", - "http-body-util", - "hyper", - "hyper-rustls", - "hyper-util", - "ring", - "rustls-pki-types", - "serde", - "serde_json", - "thiserror 2.0.18", - "tokio", - "tracing", - "tracing-futures", - "url", -] - [[package]] name = "generic-array" version = "0.14.7" @@ -2945,10 +2919,8 @@ dependencies = [ name = "openshell-providers" version = "0.0.0" dependencies = [ - "gcp_auth", "openshell-core", "thiserror 2.0.18", - "tokio", ] [[package]] @@ -5406,16 +5378,6 @@ dependencies = [ "valuable", ] -[[package]] -name = "tracing-futures" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2" -dependencies = [ - "pin-project", - "tracing", -] - [[package]] name = "tracing-log" version = "0.2.0" diff --git a/crates/openshell-providers/Cargo.toml b/crates/openshell-providers/Cargo.toml index 0cf14ec2b..41f9ed6c0 100644 --- a/crates/openshell-providers/Cargo.toml +++ b/crates/openshell-providers/Cargo.toml @@ -13,8 +13,6 @@ repository.workspace = true [dependencies] openshell-core = { path = "../openshell-core" } thiserror = { workspace = true } -gcp_auth = "0.12" -tokio = { workspace = true } [lints] workspace = true diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs index de8d45d31..5b2ecdf9d 100644 --- a/crates/openshell-providers/src/providers/vertex.rs +++ b/crates/openshell-providers/src/providers/vertex.rs @@ -16,21 +16,6 @@ pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec { // Additional config keys for Vertex AI const VERTEX_CONFIG_KEYS: &[&str] = &["ANTHROPIC_VERTEX_REGION"]; -/// Generate an OAuth token from GCP Application Default Credentials for Vertex AI. -/// -/// Returns `None` if ADC is not configured or token generation fails. -async fn generate_oauth_token() -> Option { - // Try to find an appropriate token provider (checks ADC, service account, metadata server, etc.) - let provider = gcp_auth::provider().await.ok()?; - - // Get token for Vertex AI scope - // Vertex AI uses the Cloud Platform scope - let scopes = &["https://www.googleapis.com/auth/cloud-platform"]; - let token = provider.token(scopes).await.ok()?; - - Some(token.as_str().to_string()) -} - impl ProviderPlugin for VertexProvider { fn id(&self) -> &'static str { SPEC.id @@ -53,24 +38,10 @@ impl ProviderPlugin for VertexProvider { .credentials .insert("CLAUDE_CODE_USE_VERTEX".to_string(), "1".to_string()); - // Generate OAuth token from Application Default Credentials - // Try to generate token, but don't fail if we're in a nested runtime context - let token = std::thread::spawn(|| { - tokio::runtime::Runtime::new() - .ok() - .and_then(|rt| rt.block_on(generate_oauth_token())) - }) - .join() - .ok() - .flatten(); - - if let Some(token) = token { - // Store the OAuth token as VERTEX_OAUTH_TOKEN - // The inference router will use this as the Bearer token - provider - .credentials - .insert("VERTEX_OAUTH_TOKEN".to_string(), token); - } + // NOTE: We do NOT generate/store VERTEX_OAUTH_TOKEN here. + // OAuth tokens are short-lived (~1 hour) and storing them leads to stale token pollution. + // Instead, sandboxes generate fresh tokens on-demand from the uploaded ADC file + // (requires --upload ~/.config/gcloud/:.config/gcloud/ when creating sandbox). } Ok(discovered) diff --git a/crates/openshell-sandbox/src/secrets.rs b/crates/openshell-sandbox/src/secrets.rs index 0cd188b6e..87c353c83 100644 --- a/crates/openshell-sandbox/src/secrets.rs +++ b/crates/openshell-sandbox/src/secrets.rs @@ -23,8 +23,10 @@ pub(crate) const PLACEHOLDER_PREFIX_PUBLIC: &str = PLACEHOLDER_PREFIX; fn direct_inject_credentials() -> &'static [&'static str] { &[ // Vertex AI credentials for claude CLI + // NOTE: VERTEX_OAUTH_TOKEN is NOT included here - sandboxes generate + // fresh tokens on-demand from the uploaded ADC file instead of using + // a pre-generated (and likely expired) token from the provider database. "ANTHROPIC_VERTEX_PROJECT_ID", - "VERTEX_OAUTH_TOKEN", "ANTHROPIC_VERTEX_REGION", "CLAUDE_CODE_USE_VERTEX", ] diff --git a/scripts/podman.env b/scripts/podman.env index 5aba469b2..459627c0e 100644 --- a/scripts/podman.env +++ b/scripts/podman.env @@ -8,6 +8,11 @@ MACHINE_NAME="${PODMAN_MACHINE_NAME:-openshell}" +# Clear variables from other build workflows that would interfere with local development +unset IMAGE_TAG +unset TAG_LATEST +unset REGISTRY + # Get Podman socket path from the machine if command -v podman &>/dev/null; then SOCKET_PATH=$(podman machine inspect "${MACHINE_NAME}" --format '{{.ConnectionInfo.PodmanSocket.Path}}' 2>/dev/null) From 987b2a0e4d2d6154aa3ba19634c0a6eed843b609 Mon Sep 17 00:00:00 2001 From: itdove Date: Tue, 7 Apr 2026 16:27:27 -0400 Subject: [PATCH 17/18] docs(vertex): improve ADC detection and troubleshooting docs - Check for ADC in both GOOGLE_APPLICATION_CREDENTIALS and default location - Add critical warning about --upload ~/.config/gcloud/ requirement - Document security model for credential injection strategy - Add comprehensive troubleshooting section with solutions for: - Authentication failures (missing ADC) - Project not found errors - Region not supported errors --- .../src/providers/vertex.rs | 28 ++++++ examples/vertex-ai/README.md | 93 +++++++++++++++++-- 2 files changed, 115 insertions(+), 6 deletions(-) diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs index 5b2ecdf9d..38d54a24e 100644 --- a/crates/openshell-providers/src/providers/vertex.rs +++ b/crates/openshell-providers/src/providers/vertex.rs @@ -42,6 +42,34 @@ impl ProviderPlugin for VertexProvider { // OAuth tokens are short-lived (~1 hour) and storing them leads to stale token pollution. // Instead, sandboxes generate fresh tokens on-demand from the uploaded ADC file // (requires --upload ~/.config/gcloud/:.config/gcloud/ when creating sandbox). + + // Warn if ADC doesn't exist on host + let adc_exists = if let Ok(custom_path) = std::env::var("GOOGLE_APPLICATION_CREDENTIALS") { + std::path::Path::new(&custom_path).exists() + } else { + let default_path = format!( + "{}/.config/gcloud/application_default_credentials.json", + std::env::var("HOME").unwrap_or_default() + ); + std::path::Path::new(&default_path).exists() + }; + + if !adc_exists { + eprintln!(); + eprintln!("⚠️ Warning: GCP Application Default Credentials not found"); + eprintln!(" Sandboxes will need ADC uploaded to generate OAuth tokens."); + eprintln!(); + eprintln!(" Configure ADC with:"); + eprintln!(" gcloud auth application-default login"); + eprintln!(); + eprintln!(" Or use a service account key:"); + eprintln!(" export GOOGLE_APPLICATION_CREDENTIALS=/path/to/key.json"); + eprintln!(); + eprintln!(" Then upload credentials when creating sandboxes:"); + eprintln!(" openshell sandbox create --provider vertex \\"); + eprintln!(" --upload ~/.config/gcloud/:.config/gcloud/"); + eprintln!(); + } } Ok(discovered) diff --git a/examples/vertex-ai/README.md b/examples/vertex-ai/README.md index ec0cdf78a..2423c3d04 100644 --- a/examples/vertex-ai/README.md +++ b/examples/vertex-ai/README.md @@ -2,22 +2,32 @@ This example demonstrates how to use OpenShell with Google Cloud Vertex AI to run Claude models via GCP infrastructure. +## ⚠️ Critical Requirement + +Vertex AI sandboxes **MUST** upload GCP credentials to generate OAuth tokens: + +```bash +--upload ~/.config/gcloud/:.config/gcloud/ +``` + +Without this upload, token generation will fail and sandboxes cannot connect to Vertex AI. + ## Quick Start ```bash -# Configure GCP credentials +# 1. Configure GCP credentials export ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id gcloud auth application-default login -# Create provider +# 2. Create provider openshell provider create --name vertex --type vertex --from-existing -# Create sandbox with policy +# 3. Create sandbox with credentials uploaded openshell sandbox create --name vertex-test --provider vertex \ - --upload ~/.config/gcloud/:.config/gcloud/ \ + --upload ~/.config/gcloud/:.config/gcloud/ \ # ← REQUIRED --policy examples/vertex-ai/sandbox-policy.yaml -# Inside sandbox +# 4. Inside sandbox claude # Automatically uses Vertex AI ``` @@ -28,9 +38,80 @@ claude # Automatically uses Vertex AI - Enables direct Claude CLI usage - Enables `inference.local` routing +## Security Model + +### Credential Injection + +Vertex AI uses selective credential injection for CLI tool compatibility: + +**Directly injected (visible in `/proc//environ`):** +- `ANTHROPIC_VERTEX_PROJECT_ID` - Not sensitive (public project ID, visible in API URLs) +- `CLAUDE_CODE_USE_VERTEX` - Configuration flag (boolean) +- `ANTHROPIC_VERTEX_REGION` - Public metadata (region name) + +**Generated in sandbox (not stored in gateway database):** +- OAuth access tokens - Generated on-demand from uploaded ADC file, automatically refreshed + +**Trade-off:** Direct injection required for Claude CLI compatibility (cannot use HTTP proxy placeholders). Risk is low since no secrets are exposed via environment variables. + +## Troubleshooting + +### "Authentication failed" or "invalid credentials" + +**Cause:** Sandbox cannot generate OAuth tokens (ADC file not uploaded or missing). + +**Solution:** +1. Verify ADC exists on host: + ```bash + ls -la ~/.config/gcloud/application_default_credentials.json + ``` + +2. If missing, configure ADC: + ```bash + gcloud auth application-default login + ``` + +3. Ensure sandbox creation includes upload: + ```bash + openshell sandbox create --provider vertex \ + --upload ~/.config/gcloud/:.config/gcloud/ # ← Required + ``` + +### "Project not found" errors + +**Cause:** Invalid or inaccessible GCP project ID. + +**Solution:** +1. Verify project exists and you have access: + ```bash + gcloud projects describe $ANTHROPIC_VERTEX_PROJECT_ID + ``` + +2. Check Vertex AI API is enabled: + ```bash + gcloud services list --enabled --project=$ANTHROPIC_VERTEX_PROJECT_ID | grep aiplatform + ``` + +3. Enable if needed: + ```bash + gcloud services enable aiplatform.googleapis.com --project=$ANTHROPIC_VERTEX_PROJECT_ID + ``` + +### "Region not supported" errors + +**Cause:** Vertex AI endpoint for your region not in network policy. + +**Solution:** Add region to `sandbox-policy.yaml`: +```yaml +- host: your-region-aiplatform.googleapis.com + port: 443 +``` + +Supported regions: us-central1, us-east5, us-west1, europe-west1, europe-west4, asia-northeast1, asia-southeast1 + ## Documentation -For detailed setup instructions, troubleshooting, and configuration options, see: +For detailed setup instructions and configuration options, see: - [Vertex AI Provider Configuration](../../docs/inference/configure.md#google-cloud-vertex-ai) - [Provider Management](../../docs/sandboxes/manage-providers.md) From c58f3c7eec90b8dd252e4943ee1c9f062e42515f Mon Sep 17 00:00:00 2001 From: itdove Date: Tue, 7 Apr 2026 16:29:40 -0400 Subject: [PATCH 18/18] style(vertex): apply cargo fmt formatting --- .../src/providers/vertex.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs index 38d54a24e..f5b5b67d0 100644 --- a/crates/openshell-providers/src/providers/vertex.rs +++ b/crates/openshell-providers/src/providers/vertex.rs @@ -44,15 +44,16 @@ impl ProviderPlugin for VertexProvider { // (requires --upload ~/.config/gcloud/:.config/gcloud/ when creating sandbox). // Warn if ADC doesn't exist on host - let adc_exists = if let Ok(custom_path) = std::env::var("GOOGLE_APPLICATION_CREDENTIALS") { - std::path::Path::new(&custom_path).exists() - } else { - let default_path = format!( - "{}/.config/gcloud/application_default_credentials.json", - std::env::var("HOME").unwrap_or_default() - ); - std::path::Path::new(&default_path).exists() - }; + let adc_exists = + if let Ok(custom_path) = std::env::var("GOOGLE_APPLICATION_CREDENTIALS") { + std::path::Path::new(&custom_path).exists() + } else { + let default_path = format!( + "{}/.config/gcloud/application_default_credentials.json", + std::env::var("HOME").unwrap_or_default() + ); + std::path::Path::new(&default_path).exists() + }; if !adc_exists { eprintln!();