diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 947fa82..52aa648 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ jobs: unit-tests: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Install uv run: | curl -fL https://github.com/astral-sh/uv/releases/latest/download/uv-x86_64-unknown-linux-gnu.tar.gz \ @@ -22,7 +22,7 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 20 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Install uv run: | curl -fL https://github.com/astral-sh/uv/releases/latest/download/uv-x86_64-unknown-linux-gnu.tar.gz \ @@ -44,7 +44,7 @@ jobs: - name: Upload logs on failure if: failure() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v5 with: name: e2e-linux-logs path: | @@ -53,9 +53,9 @@ jobs: e2e-macos: runs-on: macos-latest - timeout-minutes: 15 + timeout-minutes: 20 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Install uv run: | curl -fL https://github.com/astral-sh/uv/releases/latest/download/uv-aarch64-apple-darwin.tar.gz \ @@ -68,10 +68,11 @@ jobs: run: uv run pytest tests/test_e2e.py -v -s env: VM_STATE_DIR: ${{ runner.temp }}/vm-state + QEMU_ACCEL: tcg # HVF is unavailable on GitHub-hosted macOS runners - name: Upload logs on failure if: failure() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v5 with: name: e2e-macos-logs path: | diff --git a/README.md b/README.md index af8d730..75c6df5 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Agent VM +# less-lethal: userspace LLM agent VM A sandboxed Debian VM with no direct internet access. All traffic is forced through a host-side [mitmproxy](https://mitmproxy.org/) that enforces an allowlist, giving full visibility and control over what the guest can reach. Runs on macOS (Hypervisor.framework) and Linux (KVM or software emulation). No sudo required. diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 2dd4394..97bb6f3 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -237,10 +237,15 @@ def test_cloud_init_success(running_vm): SSH subprocess open during the entire cloud-init run (which includes package installation and can take several minutes in TCG mode). """ - deadline = time.monotonic() + 300 + deadline = time.monotonic() + 600 last_detail = "" while time.monotonic() < deadline: - r = _vm_ssh("cloud-init status --long 2>&1", timeout=15) + try: + r = _vm_ssh("cloud-init status --long 2>&1", timeout=30) + except subprocess.TimeoutExpired: + remaining = int(deadline - time.monotonic()) + _progress(f"cloud-init ({remaining}s left): (SSH timed out, retrying)") + continue remaining = int(deadline - time.monotonic()) # Compact multi-line status into a single progress line. detail = " | ".join( diff --git a/vm.py b/vm.py index 02adef5..f346a30 100755 --- a/vm.py +++ b/vm.py @@ -147,18 +147,27 @@ def launch_qemu(self, qemu_args: list[str]) -> subprocess.Popen: # --------------------------------------------------------------------------- class DarwinBackend(Backend): - """macOS backend: HVF acceleration, Homebrew firmware paths.""" + """macOS backend: HVF acceleration (with TCG fallback), Homebrew firmware paths.""" def __init__(self, brew: Path, arch: Arch, proxy_port: int = PROXY_PORT, ssh_host_port: int = SSH_HOST_PORT) -> None: super().__init__(arch, proxy_port, ssh_host_port) self._brew = brew + override = os.environ.get("QEMU_ACCEL") + if override: + self._accel = override + else: + r = subprocess.run(["sysctl", "-n", "kern.hv_support"], + capture_output=True, text=True) + self._accel = "hvf" if r.returncode == 0 and r.stdout.strip() == "1" else "tcg" @property def machine_args(self) -> list[str]: if self.arch == Arch.ARM64: - return ["-machine", "virt,accel=hvf", "-cpu", "host"] - return ["-machine", "q35,accel=hvf", "-cpu", "host"] + cpu = "host" if self._accel == "hvf" else "cortex-a57" + return ["-machine", f"virt,accel={self._accel}", "-cpu", cpu] + cpu = "host" if self._accel == "hvf" else "qemu64" + return ["-machine", f"q35,accel={self._accel}", "-cpu", cpu] def prepare_efi(self, state_dir: Path) -> tuple[Path, Path]: code_src = self._brew / "share/qemu/edk2-aarch64-code.fd" @@ -178,7 +187,13 @@ class LinuxBackend(Backend): def __init__(self, arch: Arch, proxy_port: int = PROXY_PORT, ssh_host_port: int = SSH_HOST_PORT) -> None: super().__init__(arch, proxy_port, ssh_host_port) - self._accel = "kvm" if Path("/dev/kvm").exists() else "tcg" + override = os.environ.get("QEMU_ACCEL") + if override: + self._accel = override + elif os.access("/dev/kvm", os.R_OK | os.W_OK): + self._accel = "kvm" + else: + self._accel = "tcg" @property def machine_args(self) -> list[str]: @@ -446,13 +461,17 @@ def start_mitmproxy(proxy_port: int = PROXY_PORT) -> subprocess.Popen: log_file = log_path.open("w") print(f"Starting mitmproxy on port {proxy_port} (log: .vm/mitmdump.log)...") proc = subprocess.Popen(cmd, stdout=log_file, stderr=log_file) - time.sleep(1) - if proc.poll() is not None: - log_file.flush() - sys.exit( - f"mitmdump failed to start (exit code {proc.returncode}). " - f"Check {log_path} — port {proxy_port} may already be in use." - ) + + # Poll for up to 3 seconds to catch fast failures (e.g. port in use). + for _ in range(15): + time.sleep(0.2) + if proc.poll() is not None: + log_file.flush() + log_tail = log_path.read_text(errors="replace").strip() + sys.exit( + f"mitmdump failed to start (exit code {proc.returncode}).\n" + f"{log_tail}" + ) return proc