Skip to content

Commit c46a516

Browse files
authored
feat(gateway): support adding remote and local gateways (#262)
1 parent 3f8cf43 commit c46a516

File tree

6 files changed

+630
-149
lines changed

6 files changed

+630
-149
lines changed

crates/navigator-bootstrap/src/docker.rs

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,71 @@ pub async fn create_ssh_docker_client(remote: &RemoteOptions) -> Result<Docker>
121121
.wrap_err("failed to negotiate Docker API version with remote daemon")
122122
}
123123

124+
/// Find the running openshell gateway container by image name.
125+
///
126+
/// Lists all running containers and returns the name of the one whose image
127+
/// contains `openshell/cluster`. When `port` is provided, only containers
128+
/// with a matching host port binding are considered — this disambiguates
129+
/// when multiple gateway containers are running on the same host.
130+
///
131+
/// Fails if zero or multiple containers match.
132+
pub async fn find_gateway_container(docker: &Docker, port: Option<u16>) -> Result<String> {
133+
let containers = docker
134+
.list_containers(Some(ListContainersOptionsBuilder::new().all(false).build()))
135+
.await
136+
.into_diagnostic()
137+
.wrap_err("failed to list Docker containers")?;
138+
139+
let is_gateway_image = |c: &bollard::models::ContainerSummary| {
140+
c.image
141+
.as_deref()
142+
.is_some_and(|img| img.contains("openshell/cluster"))
143+
};
144+
145+
let has_port = |c: &bollard::models::ContainerSummary, p: u16| {
146+
c.ports
147+
.as_deref()
148+
.unwrap_or_default()
149+
.iter()
150+
.any(|binding| binding.public_port == Some(p))
151+
};
152+
153+
let container_name = |c: &bollard::models::ContainerSummary| {
154+
c.names
155+
.as_ref()
156+
.and_then(|n| n.first())
157+
.map(|n| n.trim_start_matches('/').to_string())
158+
};
159+
160+
let matches: Vec<String> = containers
161+
.iter()
162+
.filter(|c| is_gateway_image(c) && port.map_or(true, |p| has_port(c, p)))
163+
.filter_map(container_name)
164+
.collect();
165+
166+
match matches.len() {
167+
0 => {
168+
let hint = if let Some(p) = port {
169+
format!(
170+
"No openshell gateway container found listening on port {p}.\n\
171+
Is the gateway running? Check with: docker ps"
172+
)
173+
} else {
174+
"No openshell gateway container found.\n\
175+
Is the gateway running? Check with: docker ps"
176+
.to_string()
177+
};
178+
Err(miette::miette!("{hint}"))
179+
}
180+
1 => Ok(matches.into_iter().next().unwrap()),
181+
_ => Err(miette::miette!(
182+
"Found multiple openshell gateway containers: {}\n\
183+
Specify the port in the endpoint URL to select one (e.g. https://host:8080).",
184+
matches.join(", ")
185+
)),
186+
}
187+
}
188+
124189
pub async fn ensure_network(docker: &Docker) -> Result<()> {
125190
// Always remove and recreate the network to guarantee a clean state.
126191
// Stale Docker networks (e.g., from a previous interrupted destroy or

crates/navigator-bootstrap/src/lib.rs

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@ use crate::docker::{
3333
ensure_image, ensure_network, ensure_volume, start_container, stop_container,
3434
};
3535
use crate::metadata::{
36-
create_gateway_metadata, create_gateway_metadata_with_host, extract_host_from_ssh_destination,
37-
local_gateway_host, resolve_ssh_hostname,
36+
create_gateway_metadata, create_gateway_metadata_with_host, local_gateway_host,
3837
};
3938
use crate::mtls::store_pki_bundle;
4039
use crate::pki::generate_pki;
@@ -46,9 +45,10 @@ use crate::runtime::{
4645
pub use crate::constants::container_name;
4746
pub use crate::docker::{ExistingGatewayInfo, create_ssh_docker_client};
4847
pub use crate::metadata::{
49-
GatewayMetadata, clear_active_gateway, get_gateway_metadata, list_gateways,
50-
load_active_gateway, load_gateway_metadata, load_last_sandbox, remove_gateway_metadata,
51-
save_active_gateway, save_last_sandbox, store_gateway_metadata,
48+
GatewayMetadata, clear_active_gateway, extract_host_from_ssh_destination, get_gateway_metadata,
49+
list_gateways, load_active_gateway, load_gateway_metadata, load_last_sandbox,
50+
remove_gateway_metadata, resolve_ssh_hostname, save_active_gateway, save_last_sandbox,
51+
store_gateway_metadata,
5252
};
5353

5454
/// Options for remote SSH deployment.
@@ -479,6 +479,30 @@ pub async fn gateway_handle(name: &str, remote: Option<&RemoteOptions>) -> Resul
479479
})
480480
}
481481

482+
/// Extract mTLS certificates from an existing gateway container and store
483+
/// them locally so the CLI can connect.
484+
///
485+
/// Connects to Docker (local or remote via SSH), auto-discovers the running
486+
/// gateway container by image name (narrowed by `port` when provided), reads
487+
/// the PKI bundle from Kubernetes secrets inside it, and writes the client
488+
/// materials (ca.crt, tls.crt, tls.key) to the gateway config directory.
489+
pub async fn extract_and_store_pki(
490+
name: &str,
491+
remote: Option<&RemoteOptions>,
492+
port: Option<u16>,
493+
) -> Result<()> {
494+
let docker = match remote {
495+
Some(r) => create_ssh_docker_client(r).await?,
496+
None => Docker::connect_with_local_defaults().into_diagnostic()?,
497+
};
498+
let cname = docker::find_gateway_container(&docker, port).await?;
499+
let bundle = load_existing_pki_bundle(&docker, &cname, constants::KUBECONFIG_PATH)
500+
.await
501+
.map_err(|e| miette::miette!("Failed to extract TLS certificates: {e}"))?;
502+
store_pki_bundle(name, &bundle)?;
503+
Ok(())
504+
}
505+
482506
pub async fn ensure_gateway_image(version: &str, registry_token: Option<&str>) -> Result<String> {
483507
let docker = Docker::connect_with_local_defaults().into_diagnostic()?;
484508
let image_ref = format!("{}:{version}", image::DEFAULT_GATEWAY_IMAGE);

crates/navigator-cli/src/bootstrap.rs

Lines changed: 83 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
//! Auto-bootstrap helpers for sandbox creation.
55
//!
66
//! When `sandbox create` cannot reach a gateway, these helpers determine whether
7-
//! to offer gateway bootstrap, prompt the user for confirmation, and execute the
8-
//! local or remote bootstrap flow.
7+
//! to attempt gateway bootstrap and execute the local or remote bootstrap flow.
8+
//! Bootstrap proceeds automatically unless the user opts out with `--no-bootstrap`.
99
10-
use crate::tls::TlsOptions;
11-
use dialoguer::Confirm;
10+
use std::time::Duration;
11+
12+
use crate::tls::{TlsOptions, grpc_client};
1213
use miette::Result;
1314
use owo_colors::OwoColorize;
14-
use std::io::IsTerminal;
1515

1616
use crate::run::{deploy_gateway_with_panel, print_deploy_summary};
1717

@@ -95,46 +95,55 @@ fn is_connectivity_error(error: &miette::Report) -> bool {
9595
connectivity_patterns.iter().any(|p| lower.contains(p))
9696
}
9797

98-
/// Prompt the user to confirm gateway bootstrap.
98+
/// Decide whether gateway bootstrap should proceed.
9999
///
100-
/// When `override_value` is `Some(true)` or `Some(false)`, the decision is
101-
/// made immediately (from `--bootstrap` / `--no-bootstrap`). Otherwise,
102-
/// prompts interactively when stdin is a terminal, or returns an error in
103-
/// non-interactive mode.
100+
/// When `override_value` is `Some(false)` (from `--no-bootstrap`), returns
101+
/// `false` to skip bootstrap. Otherwise returns `true` — a gateway is created
102+
/// automatically without prompting the user.
104103
pub fn confirm_bootstrap(override_value: Option<bool>) -> Result<bool> {
105-
// Explicit flag takes precedence over interactive detection.
106-
if let Some(value) = override_value {
107-
return Ok(value);
108-
}
109-
110-
if !std::io::stdin().is_terminal() {
111-
return Err(miette::miette!(
112-
"Gateway not reachable and bootstrap requires confirmation from an interactive terminal.\n\
113-
Pass --bootstrap to auto-confirm, or run 'openshell gateway start' first."
114-
));
104+
if let Some(false) = override_value {
105+
return Ok(false);
115106
}
107+
Ok(true)
108+
}
116109

117-
let confirmed = Confirm::new()
118-
.with_prompt(format!(
119-
"{} No gateway available to launch sandbox in. Create one now?",
120-
"!".yellow()
121-
))
122-
.default(true)
123-
.interact()
124-
.map_err(|e| miette::miette!("failed to read confirmation: {e}"))?;
125-
126-
Ok(confirmed)
110+
/// Resolve the gateway name for bootstrap.
111+
///
112+
/// Respects `$OPENSHELL_GATEWAY` if set, otherwise falls back to the default.
113+
fn resolve_bootstrap_name() -> String {
114+
std::env::var("OPENSHELL_GATEWAY")
115+
.ok()
116+
.filter(|v| !v.trim().is_empty())
117+
.unwrap_or_else(|| DEFAULT_GATEWAY_NAME.to_string())
127118
}
128119

129120
/// Bootstrap a local gateway and return refreshed TLS options that pick up the
130-
/// newly-written mTLS certificates.
121+
/// newly-written mTLS certificates, along with the gateway name used.
131122
pub async fn run_bootstrap(
132123
remote: Option<&str>,
133124
ssh_key: Option<&str>,
134-
) -> Result<(TlsOptions, String)> {
125+
) -> Result<(TlsOptions, String, String)> {
126+
let gateway_name = resolve_bootstrap_name();
135127
let location = if remote.is_some() { "remote" } else { "local" };
136128

137-
let mut options = navigator_bootstrap::DeployOptions::new(DEFAULT_GATEWAY_NAME);
129+
eprintln!();
130+
eprintln!(
131+
"{} No gateway found — starting one automatically.",
132+
"ℹ".cyan().bold()
133+
);
134+
eprintln!();
135+
eprintln!(" The Gateway provides a secure control plane for OpenShell. It streamlines");
136+
eprintln!(" access for humans and agents alike — handles sandbox orchestration, and");
137+
eprintln!(" enables secure, concurrent agent workflows.");
138+
eprintln!();
139+
eprintln!(
140+
" Manage it later with: {} or {}",
141+
"openshell gateway status".bold(),
142+
"openshell gateway stop".bold(),
143+
);
144+
eprintln!();
145+
146+
let mut options = navigator_bootstrap::DeployOptions::new(&gateway_name);
138147
if let Some(dest) = remote {
139148
let mut remote_opts = navigator_bootstrap::RemoteOptions::new(dest);
140149
if let Some(key) = ssh_key {
@@ -151,23 +160,59 @@ pub async fn run_bootstrap(
151160
options = options.with_registry_token(token);
152161
}
153162

154-
let handle = deploy_gateway_with_panel(options, DEFAULT_GATEWAY_NAME, location).await?;
163+
let handle = deploy_gateway_with_panel(options, &gateway_name, location).await?;
155164
let server = handle.gateway_endpoint().to_string();
156165

157-
print_deploy_summary(DEFAULT_GATEWAY_NAME, &handle);
166+
print_deploy_summary(&gateway_name, &handle);
158167

159168
// Auto-activate the bootstrapped gateway.
160-
if let Err(err) = navigator_bootstrap::save_active_gateway(DEFAULT_GATEWAY_NAME) {
169+
if let Err(err) = navigator_bootstrap::save_active_gateway(&gateway_name) {
161170
tracing::debug!("failed to set active gateway after bootstrap: {err}");
162171
}
163172

164173
// Build fresh TLS options that resolve the newly-written mTLS certs from
165174
// the default XDG path for this gateway, using the gateway name directly.
166175
let tls = TlsOptions::default()
167-
.with_gateway_name(DEFAULT_GATEWAY_NAME)
176+
.with_gateway_name(&gateway_name)
168177
.with_default_paths(&server);
169178

170-
Ok((tls, server))
179+
// Wait for the gateway gRPC endpoint to accept connections before
180+
// handing back to the caller. The Docker health check may pass before
181+
// the gRPC listener is fully ready, so retry with backoff.
182+
wait_for_grpc_ready(&server, &tls).await?;
183+
184+
Ok((tls, server, gateway_name))
185+
}
186+
187+
/// Retry connecting to the gateway gRPC endpoint until it succeeds or a
188+
/// timeout is reached. Uses exponential backoff starting at 500 ms, doubling
189+
/// up to 4 s, with a total deadline of 30 s.
190+
async fn wait_for_grpc_ready(server: &str, tls: &TlsOptions) -> Result<()> {
191+
const MAX_WAIT: Duration = Duration::from_secs(30);
192+
const INITIAL_BACKOFF: Duration = Duration::from_millis(500);
193+
194+
let start = std::time::Instant::now();
195+
let mut backoff = INITIAL_BACKOFF;
196+
let mut last_err = None;
197+
198+
while start.elapsed() < MAX_WAIT {
199+
match grpc_client(server, tls).await {
200+
Ok(_client) => return Ok(()),
201+
Err(err) => {
202+
tracing::debug!(
203+
elapsed = ?start.elapsed(),
204+
"gateway not yet accepting connections: {err:#}"
205+
);
206+
last_err = Some(err);
207+
}
208+
}
209+
tokio::time::sleep(backoff).await;
210+
backoff = (backoff * 2).min(Duration::from_secs(4));
211+
}
212+
213+
Err(last_err
214+
.unwrap_or_else(|| miette::miette!("timed out waiting for gateway"))
215+
.wrap_err("gateway deployed but not accepting connections after 30 s"))
171216
}
172217

173218
#[cfg(test)]

0 commit comments

Comments
 (0)