diff --git a/mk/shim.mk b/mk/shim.mk index 32ffb8a..d14ad23 100644 --- a/mk/shim.mk +++ b/mk/shim.mk @@ -9,6 +9,15 @@ $(BUILD_DIR)/shim.o: src/core/shim.S | $(BUILD_DIR) $(BUILD_DIR)/shim.bin: $(BUILD_DIR)/shim.o @echo " OBJCOPY $@" $(Q)$(OBJCOPY) -O binary $< $@ + $(Q)magic=$$(od -An -N4 -tx1 $@ | tr -d '[:space:]'); \ + case "$$magic" in \ + cffaedfe|cefaedfe|feedface|feedfacf|cafebabe|bebafeca|cafebabf|bfbafeca) \ + echo "ERROR: $@ still has a Mach-O header (magic $$magic)."; \ + echo " $(OBJCOPY) does not strip Mach-O containers in -O binary mode."; \ + echo " Install GNU binutils (brew install binutils) and rebuild, or"; \ + echo " set OBJCOPY=/opt/homebrew/opt/binutils/bin/objcopy."; \ + rm -f $@; exit 1;; \ + esac $(BUILD_DIR)/shim_blob.h: $(BUILD_DIR)/shim.bin @echo " GEN $@" diff --git a/mk/tests.mk b/mk/tests.mk index 2268273..cd86dc3 100644 --- a/mk/tests.mk +++ b/mk/tests.mk @@ -6,6 +6,7 @@ test-glibc-coreutils test-perf \ test-matrix test-matrix-elfuse-aarch64 test-matrix-qemu-aarch64 \ test-full test-multi-vcpu test-rwx test-sysroot-rename \ + test-proctitle-low-stack \ test-sysroot-procfs-exec test-timeout-disable \ test-sysroot-nofollow test-sysroot-chdir perf @@ -17,6 +18,8 @@ test-hello: $(ELFUSE_BIN) $(TEST_HELLO_DEP) ## Run the unit test suite plus busybox applet validation check: $(ELFUSE_BIN) $(TEST_DEPS) @bash tests/driver.sh -e $(ELFUSE_BIN) -d $(TEST_DIR) -v + @printf "\n$(BLUE)━━━ proctitle low-stack regression ━━━$(RESET)\n" + @$(MAKE) --no-print-directory test-proctitle-low-stack @printf "\n$(BLUE)━━━ busybox applet validation ━━━$(RESET)\n" @$(MAKE) --no-print-directory test-busybox @printf "\n$(BLUE)━━━ sysroot procfs exec validation ━━━$(RESET)\n" @@ -205,6 +208,14 @@ test-busybox: $(ELFUSE_BIN) $(BUSYBOX_DEPS) fi @bash tests/test-busybox.sh $(ELFUSE_BIN) $(BUSYBOX_BIN) +## Run the low-stack argv rewrite regression on busybox startup +test-proctitle-low-stack: $(ELFUSE_BIN) $(BUSYBOX_DEPS) + @if [ ! -x "$(BUSYBOX_BIN)" ]; then \ + printf "$(RED)✗ Busybox not found.$(RESET) Set BUSYBOX_BIN=/path/to/busybox.\n"; \ + exit 1; \ + fi + @bash tests/test-proctitle-low-stack.sh $(ELFUSE_BIN) $(BUSYBOX_BIN) + # ── Static binary integration tests ────────────────────────────── ifdef GUEST_STATIC_BINS diff --git a/src/main.c b/src/main.c index a4966ff..f82ca45 100644 --- a/src/main.c +++ b/src/main.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -75,6 +76,37 @@ static void cleanup_main_resources(guest_t *g, /* Build-time version string (generated by make into build/version.h) */ #include "version.h" +/* Verify the host CPU's DC ZVA granule matches the shim's hardcoded value. + * + * DCZID_EL0 is readable from EL0 without trapping, so guest libc reads the + * host's value directly and uses it as the stride for memset(0) loops. The + * shim emulates each trapped DC ZVA by zeroing exactly 64 bytes + * (src/core/shim.S). Apple Silicon M1..M4 report DCZID_EL0.BS=4 (64 bytes); + * any future host that advertises a different granule would cause silent + * partial-zero corruption of guest memory. Abort here so the mismatch + * surfaces at startup instead of as data corruption later. + */ +static int host_dc_zva_assert(void) +{ + uint64_t dczid; + __asm__ volatile("mrs %0, DCZID_EL0" : "=r"(dczid)); + if (dczid & (1ULL << 4)) { + log_error( + "host CPU prohibits DC ZVA (DCZID_EL0.DZP=1); cannot run " + "guests that depend on it"); + return -1; + } + unsigned bs = (unsigned) (dczid & 0xF); + if (bs != 4) { + log_error( + "host DCZID_EL0.BS=%u (%u-byte DC ZVA block) but the shim " + "emulates 64 bytes; update src/core/shim.S before running", + bs, 1u << (bs + 2)); + return -1; + } + return 0; +} + int main(int argc, char **argv) { log_init(); @@ -114,6 +146,9 @@ int main(int argc, char **argv) } } + if (host_dc_zva_assert() < 0) + return 1; + /* Parse elfuse options until the first guest argv element. */ while (arg_start < argc && argv[arg_start][0] == '-') { if (!strcmp(argv[arg_start], "--verbose") || diff --git a/src/runtime/proctitle.c b/src/runtime/proctitle.c index 4e296dc..61be8c2 100644 --- a/src/runtime/proctitle.c +++ b/src/runtime/proctitle.c @@ -15,68 +15,35 @@ #include "runtime/proctitle.h" -static char *runtime_find_argv_environ_end(int argc, char **argv, char **envp) +/* Return the contiguous argv block size starting at argv[0]. + * + * Stop at the first non-contiguous argv entry and exclude the environment block + * entirely. Rewriting through envp is unsafe on Apple Silicon because libc's + * optimized memset may zero in cache-line chunks and step past the top of the + * stack when argv/env reach the stack ceiling under a small RLIMIT_STACK. + */ +static size_t runtime_argv_block_size(int argc, char **argv) { - char *end = argv[0]; + char *next = argv[0]; for (int i = 0; i < argc; i++) { - if (!argv[i]) - continue; - - char *next = argv[i] + strlen(argv[i]) + 1; - if (next > end) - end = next; - } - - for (int i = 0; envp[i]; i++) { - char *next = envp[i] + strlen(envp[i]) + 1; - if (next > end) - end = next; - } - - return end; -} - -static bool runtime_duplicate_environment(char ***out_envp) -{ - extern char **environ; - int env_count = 0; - - while (environ[env_count]) - env_count++; - - char **new_environ = - (char **) malloc((size_t) (env_count + 1) * sizeof(char *)); - if (!new_environ) - return false; - - for (int i = 0; i < env_count; i++) { - new_environ[i] = strdup(environ[i]); - if (new_environ[i]) - continue; - - for (int j = 0; j < i; j++) - free(new_environ[j]); - free(new_environ); - return false; + if (!argv[i] || argv[i] != next) + break; + next = argv[i] + strlen(argv[i]) + 1; } - new_environ[env_count] = NULL; - *out_envp = new_environ; - return true; + return (size_t) (next - argv[0]); } void runtime_set_process_title(int argc, char **argv, const char *elf_path) { - extern char **environ; - char **new_environ = NULL; size_t avail; const char *arch = "aarch64"; char title[256]; char thread_name[64]; size_t title_len; - if (argc <= 0 || !argv || !argv[0] || !elf_path || !environ) + if (argc <= 0 || !argv || !argv[0] || !elf_path) return; const char *slash = strrchr(elf_path, '/'); @@ -90,19 +57,23 @@ void runtime_set_process_title(int argc, char **argv, const char *elf_path) snprintf(thread_name, sizeof(thread_name), "%s (%s-linux)", bin, arch); pthread_setname_np(thread_name); - avail = - (size_t) (runtime_find_argv_environ_end(argc, argv, environ) - argv[0]); + avail = runtime_argv_block_size(argc, argv); if (avail == 0) return; - if (!runtime_duplicate_environment(&new_environ)) - return; - environ = new_environ; + /* Write the argv block with explicit byte stores through a volatile + * destination. The libc memcpy/memset on Apple Silicon are free to use + * cache-line-aligned stp/DC ZVA stores; using single-byte STRB removes + * any chance of touching the byte past avail, which on a Linux-style + * initial stack is the first character of envp[0]. + */ + size_t copy = title_len < avail ? title_len : avail - 1; + volatile char *dst = (volatile char *) argv[0]; + for (size_t i = 0; i < copy; i++) + dst[i] = title[i]; + for (size_t i = copy; i < avail; i++) + dst[i] = '\0'; - if (title_len < avail) { - memcpy(argv[0], title, title_len); - memset(argv[0] + title_len, '\0', avail - title_len); - } for (int i = 1; i < argc; i++) argv[i] = NULL; } diff --git a/tests/lib/test-runner.sh b/tests/lib/test-runner.sh index b5322e1..aa6ef6e 100644 --- a/tests/lib/test-runner.sh +++ b/tests/lib/test-runner.sh @@ -10,6 +10,41 @@ : "${TEST_LABEL_WIDTH:=14}" : "${TEST_TIMEOUT:=10}" +# Resolve a working `timeout` binary. macOS doesn't ship one, so fall back to +# GNU coreutils' gtimeout. Wrap as a function so callers keep using the bare +# name `timeout`. Resolution order: TIMEOUT_BIN env override, `timeout` on +# PATH, `gtimeout` on PATH, then Homebrew's stable opt symlinks for ARM and +# Intel macOS (the install prefix differs between the two). +if [ -n "${TIMEOUT_BIN:-}" ]; then + timeout() + { + "$TIMEOUT_BIN" "$@" + } +elif ! command -v timeout > /dev/null 2>&1; then + _timeout_bin= + if command -v gtimeout > /dev/null 2>&1; then + _timeout_bin=gtimeout + else + for _candidate in /opt/homebrew/opt/coreutils/bin/gtimeout \ + /usr/local/opt/coreutils/bin/gtimeout; do + if [ -x "$_candidate" ]; then + _timeout_bin="$_candidate" + break + fi + done + fi + if [ -n "$_timeout_bin" ]; then + # shellcheck disable=SC2317 # Invoked indirectly via `timeout` callers. + eval "timeout() { \"$_timeout_bin\" \"\$@\"; }" + else + echo "test-runner: no 'timeout' or 'gtimeout' in PATH." >&2 + echo " Install GNU coreutils (brew install coreutils), put gtimeout" >&2 + echo " on PATH, or set TIMEOUT_BIN=/path/to/timeout." >&2 + exit 127 + fi + unset _timeout_bin _candidate +fi + if [ -t 1 ]; then # Use ANSI-C quoting so the variables hold real ESC bytes, not the literal # 4-char "\033" sequence. Without this, callers that pass colors as printf diff --git a/tests/test-busybox.sh b/tests/test-busybox.sh index e5dbe07..9969912 100755 --- a/tests/test-busybox.sh +++ b/tests/test-busybox.sh @@ -37,9 +37,50 @@ test_tool_path() printf "%s" "$1" } +# Probe which applets this busybox binary actually carries. The Debian +# busybox-static drops a handful of applets (e.g. comm) compared to a +# full build, and tests for them must skip rather than fail. Hard-fail +# the whole suite if the probe itself fails so a broken elfuse/busybox +# does not silently degrade to "all SKIP". +if ! _bb_list=$(timeout "$TEST_TIMEOUT" "$ELFUSE" "$BB" --list 2>&1); then + printf "test-busybox: probing '%s --list' under elfuse failed:\n%s\n" \ + "$BB" "$_bb_list" >&2 + exit 1 +fi +BB_APPLETS=" $(printf '%s\n' "$_bb_list" | tr '\n' ' ') " +# Sanity: a usable busybox should expose at least one of these common +# applets. A reduced build may legitimately omit sh, so accept any of +# the small universal set; only fail if --list produced nothing usable. +case "$BB_APPLETS" in + *" sh "* | *" echo "* | *" cat "* | *" ls "* | *" true "*) ;; + *) + printf "test-busybox: applet list from '%s --list' looks empty or malformed:\n%s\n" \ + "$BB" "$_bb_list" >&2 + exit 1 + ;; +esac +unset _bb_list + +# Override: skip if the requested applet isn't compiled into this busybox. +# shellcheck disable=SC2329 # Invoked indirectly by tests/lib/test-runner.sh. +test_skip_missing_tool() +{ + local tool="$1" + case "$BB_APPLETS" in + *" $tool "*) return 1 ;; + esac + run_skip "$tool" "applet not in this busybox build" + return 0 +} + run_nc_http_check() { local applet="nc" output rc server_pid port_file port + + if test_skip_missing_tool "$applet"; then + return + fi + port_file=$(mktemp "${TMPDIR}/nc-http-port.XXXXXX") || { test_report skip "$applet" " (failed to create port file)" skip=$((skip + 1)) diff --git a/tests/test-proctitle-low-stack.sh b/tests/test-proctitle-low-stack.sh new file mode 100644 index 0000000..276dc70 --- /dev/null +++ b/tests/test-proctitle-low-stack.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# test-proctitle-low-stack.sh — Regress Apple Silicon argv/env stack overwrite +# +# Copyright 2026 elfuse contributors +# SPDX-License-Identifier: Apache-2.0 +# +# Usage: tests/test-proctitle-low-stack.sh + +set -euo pipefail + +ELFUSE="${1:?Usage: $0 }" +BB="${2:?Usage: $0 }" +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# shellcheck disable=SC2034 # Consumed by tests/lib/test-runner.sh. +TEST_TIMEOUT="${TEST_TIMEOUT:-10}" +# shellcheck source=tests/lib/test-runner.sh +source "$SCRIPT_DIR/lib/test-runner.sh" + +output= +if output="$( + # shellcheck disable=SC2016 # Positional params are expanded by the child shell. + timeout "$TEST_TIMEOUT" sh -c ' + current_stack=$(ulimit -S -s) + case "$current_stack" in + unlimited) ulimit -S -s 8192 ;; + "" | *[!0-9]*) ;; + *) + if [ "$current_stack" -gt 8192 ]; then + ulimit -S -s 8192 + fi + ;; + esac + exec "$1" "$2" echo hello + ' sh "$ELFUSE" "$BB" +)"; then + : +else + rc=$? + if [ "$rc" -eq 124 ]; then + printf "test-proctitle-low-stack: elfuse hung under low stack (timeout after %ss)\n" \ + "$TEST_TIMEOUT" >&2 + exit 1 + fi + printf "test-proctitle-low-stack: elfuse failed under low stack (rc=%d)\n" \ + "$rc" >&2 + exit "$rc" +fi + +if [ "$output" != "hello" ]; then + printf "test-proctitle-low-stack: unexpected output under low stack: %s\n" \ + "$output" >&2 + exit 1 +fi + +printf "test-proctitle-low-stack: PASS\n"