Skip to content

Commit 3a418e2

Browse files
authored
Merge pull request #70 from Sewer56/optimize-a-bit
Changed: Trim hot path overhead across bash and sandbox tools
2 parents bdc72b0 + 3ad8a77 commit 3a418e2

21 files changed

Lines changed: 713 additions & 411 deletions

File tree

src/llm-coding-tools-bubblewrap/src/probe.rs

Lines changed: 57 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
use crate::path_util::normalize_path;
66
use crate::{Availability, LinuxBwrapError, Preset};
77
use parking_lot::RwLock;
8+
use std::collections::HashSet;
89
use std::env;
9-
use std::ffi::OsString;
10+
use std::ffi::{OsStr, OsString};
1011
use std::path::{Path, PathBuf};
1112
use std::process::{Command, Output, Stdio};
1213
use std::sync::Arc;
@@ -91,10 +92,10 @@ fn profile_name(preset: Option<Preset>) -> &'static str {
9192
}
9293
}
9394

94-
/// Searches `PATH` directories for a file named `name` and returns the first match.
95-
pub(crate) fn find_binary_on_path(name: &str) -> Option<Box<Path>> {
96-
let path = env::var_os("PATH")?;
97-
for dir in env::split_paths(&path) {
95+
#[inline]
96+
fn find_binary_on_path_in(name: &str, path: Option<&OsStr>) -> Option<Box<Path>> {
97+
let path = path?;
98+
for dir in env::split_paths(path) {
9899
if !dir.is_absolute() || dir.as_os_str().is_empty() {
99100
continue;
100101
}
@@ -115,29 +116,59 @@ pub(crate) fn first_shell_candidate_with<F, R>(mut classify: F) -> Option<(Box<P
115116
where
116117
F: FnMut(&Path) -> Option<R>,
117118
{
119+
first_shell_candidate_with_in(env::var_os("PATH").as_deref(), &mut classify)
120+
}
121+
122+
fn first_shell_candidate_with_in<F, R>(
123+
env_path: Option<&OsStr>,
124+
classify: &mut F,
125+
) -> Option<(Box<Path>, R)>
126+
where
127+
F: FnMut(&Path) -> Option<R>,
128+
{
129+
let mut seen = HashSet::with_capacity(10);
130+
118131
for name in ["bash", "sh"] {
119-
if let Some(path) = find_binary_on_path(name) {
120-
let path = normalize_path(path.as_ref());
121-
if let Some(r) = classify(path.as_ref()) {
122-
return Some((path, r));
132+
if let Some(shell_path) = find_binary_on_path_in(name, env_path) {
133+
if let Some(result) = classify_shell_candidate(classify, &mut seen, shell_path) {
134+
return Some(result);
123135
}
124136
}
125137
}
138+
126139
for candidate in SHELL_CANDIDATES {
127-
let path = PathBuf::from(candidate);
128-
if path.is_file() {
129-
let path = normalize_path(&path);
130-
if let Some(r) = classify(path.as_ref()) {
131-
return Some((path, r));
140+
let candidate_path = PathBuf::from(candidate);
141+
if candidate_path.is_file() {
142+
if let Some(result) =
143+
classify_shell_candidate(classify, &mut seen, candidate_path.into_boxed_path())
144+
{
145+
return Some(result);
132146
}
133147
}
134148
}
149+
135150
None
136151
}
137152

138-
/// Returns any available host shell (`bash` preferred, then `sh`).
139-
pub(crate) fn resolve_host_shell() -> Option<Box<Path>> {
140-
first_shell_candidate_with(|_| Some(())).map(|(path, _)| path)
153+
#[inline]
154+
fn classify_shell_candidate<F, R>(
155+
classify: &mut F,
156+
seen: &mut HashSet<Box<Path>>,
157+
path: Box<Path>,
158+
) -> Option<(Box<Path>, R)>
159+
where
160+
F: FnMut(&Path) -> Option<R>,
161+
{
162+
let path = normalize_path(path.as_ref());
163+
if !seen.insert(path.clone()) {
164+
return None;
165+
}
166+
classify(path.as_ref()).map(|result| (path, result))
167+
}
168+
169+
#[inline]
170+
fn resolve_host_shell_in(path: Option<&OsStr>) -> Option<Box<Path>> {
171+
first_shell_candidate_with_in(path, &mut |_| Some(())).map(|(path, _)| path)
141172
}
142173

143174
fn probe_backend() -> LinuxBwrapBackend {
@@ -157,7 +188,7 @@ fn probe_backend() -> LinuxBwrapBackend {
157188
}
158189
}
159190

160-
let backend = probe_backend_uncached();
191+
let backend = probe_backend_uncached(path.as_deref());
161192
*cache.write() = Some((path, backend.clone()));
162193
backend
163194
}
@@ -166,31 +197,14 @@ fn probe_backend() -> LinuxBwrapBackend {
166197
///
167198
/// The probe binds the host root read-only and runs a tiny shell command. That
168199
/// checks both namespace support and shell visibility on FHS and Nix systems.
169-
fn probe_backend_uncached() -> LinuxBwrapBackend {
170-
let Some(bwrap) = find_binary_on_path("bwrap") else {
200+
fn probe_backend_uncached(path: Option<&OsStr>) -> LinuxBwrapBackend {
201+
let Some(bwrap) = find_binary_on_path_in("bwrap", path) else {
171202
return LinuxBwrapBackend::MissingBinary {
172203
reason: Box::from("`bwrap` was not found on PATH"),
173204
};
174205
};
175206

176-
let version = Command::new(bwrap.as_os_str())
177-
.arg("--version")
178-
.stdin(Stdio::null())
179-
.stdout(Stdio::null())
180-
.stderr(Stdio::piped())
181-
.output();
182-
let Ok(version) = version else {
183-
return LinuxBwrapBackend::MissingBinary {
184-
reason: format!("failed to execute {}", bwrap.display()).into_boxed_str(),
185-
};
186-
};
187-
if !version.status.success() {
188-
return LinuxBwrapBackend::Unusable {
189-
reason: probe_failure_reason(&version, "`bwrap --version` failed"),
190-
};
191-
}
192-
193-
let Some(shell) = resolve_host_shell() else {
207+
let Some(shell) = resolve_host_shell_in(path) else {
194208
return LinuxBwrapBackend::Unusable {
195209
reason: Box::from("no usable host shell (`bash` or `sh`) was found"),
196210
};
@@ -257,6 +271,11 @@ mod tests {
257271
use serial_test::serial;
258272
use tempfile::TempDir;
259273

274+
/// Searches `PATH` directories for a file named `name` and returns the first match.
275+
fn find_binary_on_path(name: &str) -> Option<Box<Path>> {
276+
find_binary_on_path_in(name, env::var_os("PATH").as_deref())
277+
}
278+
260279
// These tests swap PATH and exercise a process-wide availability cache, so
261280
// cases that probe `bwrap` run serially to avoid cross-test contamination.
262281

@@ -298,14 +317,6 @@ mod tests {
298317
// Make `bwrap` look installed, then fail the "can it sandbox?" probe.
299318
let script = format!(
300319
r#"#!/bin/sh
301-
# Handle --version probe
302-
for arg in "$@"; do
303-
if [ "$arg" = "--version" ]; then
304-
echo "bubblewrap 0.8.0"
305-
exit 0
306-
fi
307-
done
308-
# Fail the "can it sandbox?" probe
309320
echo "{}" >&2
310321
exit 1
311322
"#,

src/llm-coding-tools-bubblewrap/src/profile/types.rs

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,25 @@ impl Profile {
468468
)))
469469
}
470470

471+
/// Returns true only for exact path matches against prevalidated directories:
472+
/// workspace(), synthetic_home(), cache_root() (when mount_cache_root()),
473+
/// TmpBacking::BindHost host_dir, and entries in read_only_mounts() and read_write_mounts().
474+
#[inline]
475+
pub(crate) fn is_prevalidated_workdir(&self, workdir: &Path) -> bool {
476+
workdir == self.workspace()
477+
|| workdir == self.synthetic_home()
478+
|| (self.mount_cache_root() && workdir == self.cache_root())
479+
|| matches!(self.tmp_backing(), TmpBacking::BindHost(host_dir) if workdir == host_dir.as_ref())
480+
|| self
481+
.read_only_mounts()
482+
.iter()
483+
.any(|mount| workdir == mount.as_ref())
484+
|| self
485+
.read_write_mounts()
486+
.iter()
487+
.any(|mount| workdir == mount.as_ref())
488+
}
489+
471490
fn sandbox_layout(&self) -> SandboxLayout<'_> {
472491
SandboxLayout {
473492
workspace: self.workspace(),
@@ -541,4 +560,50 @@ mod tests {
541560
Cow::Owned(mapped) => assert_eq!(mapped, Path::new("/workspace/subdir")),
542561
}
543562
}
563+
564+
#[test]
565+
fn is_prevalidated_workdir_accepts_exact_profile_owned_roots() {
566+
let profile = Profile {
567+
preset: None,
568+
workspace: Box::from(Path::new("/host/workspace")),
569+
workspace_dest: Box::from(Path::new("/workspace")),
570+
synthetic_home: Box::from(Path::new("/host/home")),
571+
synthetic_home_dest: Box::from(Path::new("/home/sandbox")),
572+
cache_root: Box::from(Path::new("/host/cache")),
573+
tmp_backing: TmpBacking::BindHost(Box::from(Path::new("/host/tmp"))),
574+
mount_cache_root: true,
575+
compat_symlinks: Arc::new([]),
576+
read_only_mounts: Arc::from([Box::from(Path::new("/host/ro"))]),
577+
read_write_mounts: Arc::from([Box::from(Path::new("/host/rw"))]),
578+
tmpfs_overlays: Arc::new([]),
579+
file_overlays: Arc::new([]),
580+
credential_file_mounts: Arc::new([]),
581+
read_only_host_rootfs: false,
582+
network_policy: NetworkPolicy::Disabled,
583+
clear_env: false,
584+
default_env: Arc::new([]),
585+
extra_env: Arc::new([]),
586+
availability: Availability::Unknown,
587+
bwrap_program: Arc::from(Box::from(Path::new("/usr/bin/bwrap"))),
588+
shell: Box::from(Path::new("/bin/sh")),
589+
static_args: Arc::<[OsString]>::from([]),
590+
};
591+
592+
assert!(profile.is_prevalidated_workdir(Path::new("/host/workspace")));
593+
assert!(profile.is_prevalidated_workdir(Path::new("/host/home")));
594+
assert!(profile.is_prevalidated_workdir(Path::new("/host/cache")));
595+
assert!(profile.is_prevalidated_workdir(Path::new("/host/tmp")));
596+
assert!(profile.is_prevalidated_workdir(Path::new("/host/ro")));
597+
assert!(profile.is_prevalidated_workdir(Path::new("/host/rw")));
598+
}
599+
600+
#[test]
601+
fn is_prevalidated_workdir_rejects_nested_or_unmounted_paths() {
602+
let profile = profile_with_workspace_dest("/workspace");
603+
604+
assert!(!profile.is_prevalidated_workdir(Path::new("/host/workspace/subdir")));
605+
assert!(!profile.is_prevalidated_workdir(Path::new("/host/home/subdir")));
606+
assert!(!profile.is_prevalidated_workdir(Path::new("/host/cache/subdir")));
607+
assert!(!profile.is_prevalidated_workdir(Path::new("/outside")));
608+
}
544609
}

src/llm-coding-tools-bubblewrap/src/wrap/command.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,11 @@ fn resolve_sandbox_cwd<'a>(
6262
profile: &'a Profile,
6363
workdir: Option<&'a Path>,
6464
) -> Result<Cow<'a, Path>, LinuxBwrapError> {
65-
validate_workdir(workdir)?;
65+
if let Some(dir) = workdir {
66+
if !profile.is_prevalidated_workdir(dir) {
67+
validate_workdir(Some(dir))?;
68+
}
69+
}
6670
profile.map_workdir_to_sandbox(workdir)
6771
}
6872

src/llm-coding-tools-core/src/tools/bash/blocking_impl.rs

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
//! Blocking shell command execution.
22
33
use super::{
4-
timeout_error_with_kill_failure, timeout_message_with_buffered_output, validate_workdir,
5-
BashExecutionMode, BashOutput, PIPE_BUFFER_CAPACITY,
4+
string_from_utf8_or_lossy, timeout_error_with_kill_failure,
5+
timeout_message_with_buffered_output, validate_workdir, BashExecutionMode, BashOutput,
6+
PIPE_BUFFER_CAPACITY,
67
};
78
use crate::error::{ToolError, ToolResult};
89
#[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))]
@@ -14,6 +15,11 @@ use std::process::Stdio;
1415
use std::thread;
1516
use std::time::{Duration, Instant};
1617

18+
/// Initial sleep between non-blocking wait polls.
19+
const INITIAL_POLL_INTERVAL_MS: u64 = 1;
20+
/// Upper bound for wait poll backoff.
21+
const MAX_POLL_INTERVAL_MS: u64 = 10;
22+
1723
enum WaitOutcome {
1824
Exited(std::process::ExitStatus),
1925
TimedOut { kill_error: Option<std::io::Error> },
@@ -96,19 +102,24 @@ pub(in crate::tools::bash) fn run_wrapped_command(
96102
});
97103

98104
let start = Instant::now();
105+
let mut poll_interval_ms = INITIAL_POLL_INTERVAL_MS;
99106

100107
// Poll for completion with timeout.
101108
let wait_outcome = loop {
102109
match child.try_wait() {
103110
Ok(Some(status)) => break WaitOutcome::Exited(status),
104111
Ok(None) => {
105-
if start.elapsed() >= timeout {
112+
let elapsed = start.elapsed();
113+
if elapsed >= timeout {
106114
// Kill entire process tree via Job Object (Windows) or process group (Unix)
107115
break WaitOutcome::TimedOut {
108116
kill_error: child.kill().err(),
109117
};
110118
}
111-
thread::sleep(Duration::from_millis(10));
119+
120+
let remaining = timeout.saturating_sub(elapsed);
121+
thread::sleep(remaining.min(Duration::from_millis(poll_interval_ms)));
122+
poll_interval_ms = (poll_interval_ms << 1).min(MAX_POLL_INTERVAL_MS);
112123
}
113124
Err(e) => break WaitOutcome::WaitError(e),
114125
}
@@ -126,8 +137,8 @@ pub(in crate::tools::bash) fn run_wrapped_command(
126137
match wait_outcome {
127138
WaitOutcome::Exited(status) => Ok(BashOutput {
128139
exit_code: status.code(),
129-
stdout: String::from_utf8_lossy(&stdout_data).into_owned(),
130-
stderr: String::from_utf8_lossy(&stderr_data).into_owned(),
140+
stdout: string_from_utf8_or_lossy(stdout_data),
141+
stderr: string_from_utf8_or_lossy(stderr_data),
131142
}),
132143
WaitOutcome::TimedOut { kill_error } => Err(timeout_error_with_kill_failure(
133144
timeout_message_with_buffered_output(timeout, &stdout_data, &stderr_data),

src/llm-coding-tools-core/src/tools/bash/mod.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ use crate::error::{ToolError, ToolResult};
3535
use crate::ToolOutput;
3636
use core::fmt::Write;
3737
use serde::Serialize;
38+
use std::borrow::Cow;
3839
use std::path::Path;
3940
use std::time::Duration;
4041
#[cfg(feature = "tokio")]
@@ -65,6 +66,17 @@ pub enum BashExecutionMode {
6566
/// 32KB covers typical command output without reallocations.
6667
const PIPE_BUFFER_CAPACITY: usize = 32 * 1024;
6768

69+
#[inline]
70+
fn string_from_utf8_or_lossy(bytes: Vec<u8>) -> String {
71+
match String::from_utf8(bytes) {
72+
Ok(text) => text,
73+
Err(error) => match String::from_utf8_lossy(&error.into_bytes()) {
74+
Cow::Borrowed(text) => text.to_owned(),
75+
Cow::Owned(text) => text,
76+
},
77+
}
78+
}
79+
6880
#[inline]
6981
fn timeout_message_with_buffered_output(
7082
timeout: Duration,

src/llm-coding-tools-core/src/tools/bash/tokio_impl.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
//! Tokio-based async shell command execution.
22
33
use super::{
4-
timeout_error_with_kill_failure, timeout_message_with_buffered_output, validate_workdir,
5-
BashExecutionMode, BashOutput, PIPE_BUFFER_CAPACITY,
4+
string_from_utf8_or_lossy, timeout_error_with_kill_failure,
5+
timeout_message_with_buffered_output, validate_workdir, BashExecutionMode, BashOutput,
6+
PIPE_BUFFER_CAPACITY,
67
};
78
use crate::error::{ToolError, ToolResult};
89
#[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))]
@@ -55,7 +56,7 @@ where
5556
fn take_pipe_buffer(buffer: SharedPipeBuffer) -> Vec<u8> {
5657
match Arc::try_unwrap(buffer) {
5758
Ok(mutex) => mutex.into_inner(),
58-
Err(shared) => shared.lock().clone(),
59+
Err(shared) => core::mem::take(&mut *shared.lock()),
5960
}
6061
}
6162

@@ -165,8 +166,8 @@ pub(in crate::tools::bash) async fn run_wrapped_command(
165166

166167
Ok(BashOutput {
167168
exit_code: status.code(),
168-
stdout: String::from_utf8_lossy(&stdout_data).into_owned(),
169-
stderr: String::from_utf8_lossy(&stderr_data).into_owned(),
169+
stdout: string_from_utf8_or_lossy(stdout_data),
170+
stderr: string_from_utf8_or_lossy(stderr_data),
170171
})
171172
}
172173
None => {

0 commit comments

Comments
 (0)