Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/openshell-sandbox/data/sandbox-policy.rego
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ deny_reason := reason if {
policy := data.network_policies[name]
endpoint_allowed(policy, input.network)
not binary_allowed(policy, input.exec)
r := sprintf("binary '%s' (ancestors: [%s], cmdline: [%s]) not allowed in policy '%s'", [input.exec.path, ancestors_str, cmdline_str, name])
r := sprintf("binary '%s' not allowed in policy '%s' (ancestors: [%s], cmdline: [%s]). SYMLINK HINT: the binary path is the kernel-resolved target from /proc/<pid>/exe, not the symlink. If your policy specifies a symlink (e.g., /usr/bin/python3) but the actual binary is /usr/bin/python3.11, either: (1) use the canonical path in your policy (run 'readlink -f /usr/bin/python3' inside the sandbox), or (2) ensure symlink resolution is working (check sandbox logs for 'Cannot access container filesystem')", [input.exec.path, name, ancestors_str, cmdline_str])
]
all_reasons := array.concat(endpoint_misses, binary_misses)
count(all_reasons) > 0
Expand Down
70 changes: 59 additions & 11 deletions crates/openshell-sandbox/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ pub async fn run_sandbox(
// Load policy and initialize OPA engine
let openshell_endpoint_for_proxy = openshell_endpoint.clone();
let sandbox_name_for_agg = sandbox.clone();
let (policy, opa_engine) = load_policy(
let (policy, opa_engine, retained_proto) = load_policy(
sandbox_id.clone(),
sandbox,
openshell_endpoint.clone(),
Expand Down Expand Up @@ -568,22 +568,54 @@ pub async fn run_sandbox(
entrypoint_pid.store(handle.pid(), Ordering::Release);
info!(pid = handle.pid(), "Process started");

// Resolve policy binary symlinks now that the container filesystem is
// accessible via /proc/<pid>/root/. This expands symlinks like
// /usr/bin/python3 → /usr/bin/python3.11 in the OPA policy data so that
// either path matches at evaluation time.
//
// If /proc/<pid>/root/ is inaccessible (restricted ptrace, rootless
// container, etc.), resolve_binary_in_container logs a warning per binary
// and falls back to literal path matching. The reload itself still
// succeeds — only the symlink expansion is skipped.
if let (Some(engine), Some(proto)) = (&opa_engine, &retained_proto) {
let pid = handle.pid();
if let Err(e) = engine.reload_from_proto_with_pid(proto, pid) {
warn!(
error = %e,
"Failed to rebuild OPA engine with symlink resolution (non-fatal, \
falling back to literal path matching)"
);
} else {
info!(
pid = pid,
"Policy binary symlink resolution attempted via container filesystem \
(check logs above for per-binary results)"
);
}
}

// Spawn background policy poll task (gRPC mode only).
if let (Some(id), Some(endpoint), Some(engine)) =
(&sandbox_id, &openshell_endpoint, &opa_engine)
{
let poll_id = id.clone();
let poll_endpoint = endpoint.clone();
let poll_engine = engine.clone();
let poll_pid = entrypoint_pid.clone();
let poll_interval_secs: u64 = std::env::var("OPENSHELL_POLICY_POLL_INTERVAL_SECS")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(10);

tokio::spawn(async move {
if let Err(e) =
run_policy_poll_loop(&poll_endpoint, &poll_id, &poll_engine, poll_interval_secs)
.await
if let Err(e) = run_policy_poll_loop(
&poll_endpoint,
&poll_id,
&poll_engine,
&poll_pid,
poll_interval_secs,
)
.await
{
warn!(error = %e, "Policy poll loop exited with error");
}
Expand Down Expand Up @@ -1158,13 +1190,21 @@ mod baseline_tests {
/// 2. If `sandbox_id` and `openshell_endpoint` are provided, fetch via gRPC
/// 3. If the server returns no policy, discover from disk or use restrictive default
/// 4. Otherwise, return an error
///
/// Returns the policy, the OPA engine, and (for gRPC mode) the original proto
/// policy. The proto is retained so the OPA engine can be rebuilt with symlink
/// resolution after the container entrypoint starts.
async fn load_policy(
sandbox_id: Option<String>,
sandbox: Option<String>,
openshell_endpoint: Option<String>,
policy_rules: Option<String>,
policy_data: Option<String>,
) -> Result<(SandboxPolicy, Option<Arc<OpaEngine>>)> {
) -> Result<(
SandboxPolicy,
Option<Arc<OpaEngine>>,
Option<openshell_core::proto::SandboxPolicy>,
)> {
// File mode: load OPA engine from rego rules + YAML data (dev override)
if let (Some(policy_file), Some(data_file)) = (&policy_rules, &policy_data) {
info!(
Expand All @@ -1188,7 +1228,7 @@ async fn load_policy(
process: config.process,
};
enrich_sandbox_baseline_paths(&mut policy);
return Ok((policy, Some(Arc::new(engine))));
return Ok((policy, Some(Arc::new(engine)), None));
}

// gRPC mode: fetch typed proto policy, construct OPA engine from baked rules + proto data
Expand Down Expand Up @@ -1244,11 +1284,14 @@ async fn load_policy(
// Build OPA engine from baked-in rules + typed proto data.
// In cluster mode, proxy networking is always enabled so OPA is
// always required for allow/deny decisions.
// The initial load uses pid=0 (no symlink resolution) because the
// container hasn't started yet. After the entrypoint spawns, the
// engine is rebuilt with the real PID for symlink resolution.
info!("Creating OPA engine from proto policy data");
let opa_engine = Some(Arc::new(OpaEngine::from_proto(&proto_policy)?));

let policy = SandboxPolicy::try_from(proto_policy)?;
return Ok((policy, opa_engine));
let policy = SandboxPolicy::try_from(proto_policy.clone())?;
return Ok((policy, opa_engine, Some(proto_policy)));
}

// No policy source available
Expand Down Expand Up @@ -1505,12 +1548,16 @@ async fn flush_proposals_to_gateway(
Ok(())
}

/// `reload_from_proto()`. Reports load success/failure back to the server.
/// On failure, the previous engine is untouched (LKG behavior).
/// `reload_from_proto_with_pid()`. Reports load success/failure back to the
/// server. On failure, the previous engine is untouched (LKG behavior).
///
/// When the entrypoint PID is available, policy reloads include symlink
/// resolution for binary paths via the container filesystem.
async fn run_policy_poll_loop(
endpoint: &str,
sandbox_id: &str,
opa_engine: &Arc<OpaEngine>,
entrypoint_pid: &Arc<AtomicU32>,
interval_secs: u64,
) -> Result<()> {
use crate::grpc_client::CachedOpenShellClient;
Expand Down Expand Up @@ -1580,7 +1627,8 @@ async fn run_policy_poll_loop(
continue;
};

match opa_engine.reload_from_proto(policy) {
let pid = entrypoint_pid.load(Ordering::Acquire);
match opa_engine.reload_from_proto_with_pid(policy, pid) {
Ok(()) => {
if result.global_policy_version > 0 {
info!(
Expand Down
Loading
Loading