From 8096e1add16cb8e549a6431794d6609fa950016e Mon Sep 17 00:00:00 2001 From: Gunnar Kreitz Date: Wed, 27 May 2026 16:39:39 +0200 Subject: [PATCH] Add command line option for system call restrictions --- isolate.1.txt | 10 ++++++++-- isolate.c | 21 +++++++++++++++------ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/isolate.1.txt b/isolate.1.txt index acc0727..60989e1 100644 --- a/isolate.1.txt +++ b/isolate.1.txt @@ -315,6 +315,11 @@ The following options can be useful in special cases. and directories) created inside the sandbox. If you need them, this option disables that behavior, but you need to carefully check what you open. +*--syscalls=*'flags':: + Override the value of `syscall_flags` from the configuration file for this run. See + "System call restrictions" below for a description of what system calls are affected, + and the meaning of flags. + *--as-uid=*'uid', *--as-gid=*'gid':: Act on behalf of the specified user and group (only if Isolate was invoked by root). This is used in scenarios where a root-controlled process manages creation of sandboxes @@ -480,8 +485,9 @@ two sandboxes running in parallel, or between two instances of the same sandbox one after another. To avoid such information leaks, Isolate forbids the use of a few system calls. -This is controlled by the `syscall_flags` setting in the configuration file, -which contains a sum of the following flags: +This is controlled by the `syscall_flags` setting in the configuration file, or set +on the command line for a run via `--syscalls`. The value of the setting is a +sum of the following flags: * *Keyrings (flag 1)* -- disables the `keyctl` system call which maintains keyrings that store cryptographic material. Keyrings can be used to establish system-wide diff --git a/isolate.c b/isolate.c index e7a7aa8..57b8ca2 100644 --- a/isolate.c +++ b/isolate.c @@ -84,6 +84,7 @@ static bool special_files; static bool wait_if_busy; static int as_uid = -1; static int as_gid = -1; +static int syscall_flags_opt = -1; /* Overrides syscall_flags_cf when != -1 */ int cg_enable; int cg_memory_limit; @@ -821,7 +822,9 @@ setup_seccomp(void) * We install a simple seccomp filter to disallow these syscalls. */ - if (!cf_syscall_flags) + int syscall_flags = syscall_flags_opt == -1 ? cf_syscall_flags : syscall_flags_opt; + + if (!syscall_flags) return; int err; @@ -833,7 +836,7 @@ setup_seccomp(void) /* * Consider allowing syscalls for legacy architectures. */ - if (!(cf_syscall_flags & CF_SYSCALL_LEGACY_ARCH)) + if (!(syscall_flags & CF_SYSCALL_LEGACY_ARCH)) { uint32_t native_arch = seccomp_arch_native(); if (native_arch == SCMP_ARCH_X86_64) @@ -855,7 +858,7 @@ setup_seccomp(void) * Disable keyctl(), because it can be used to establish system-wide * persistent memory. */ - if (cf_syscall_flags & CF_SYSCALL_KEYCTL) + if (syscall_flags & CF_SYSCALL_KEYCTL) { err = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS), SCMP_SYS(keyctl), 0); if (err < 0) @@ -866,7 +869,7 @@ setup_seccomp(void) * Disable creation of AF_VSOCK sockets, which are not namespaced, so they * can be used to cross boundaries between sandboxes. */ - if (cf_syscall_flags & CF_SYSCALL_VSOCK) + if (syscall_flags & CF_SYSCALL_VSOCK) { err = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EAFNOSUPPORT), SCMP_SYS(socket), 1, SCMP_A0(SCMP_CMP_EQ, AF_VSOCK)); if (err < 0) @@ -879,7 +882,7 @@ setup_seccomp(void) * * Similarly for flock. */ - if (cf_syscall_flags & CF_SYSCALL_FCNTL) + if (syscall_flags & CF_SYSCALL_FCNTL) { static const int fcntl_cmds[] = { F_SETLK, @@ -906,7 +909,7 @@ setup_seccomp(void) * Disable io_uring_setup() as the io_uring can be used to create sockets * and it's unlikely to be used in programming contests. */ - if (cf_syscall_flags & CF_SYSCALL_IO_URING) + if (syscall_flags & CF_SYSCALL_IO_URING) { err = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS), SCMP_SYS(io_uring_setup), 0); if (err < 0) @@ -1248,6 +1251,7 @@ Options:\n\ -i, --stdin=\tRedirect stdin from \n\ -o, --stdout=\tRedirect stdout to \n\ -p, --processes[=]\tEnable multiple processes (at most of them); needs --cg\n\ + --syscalls=\tSet syscall_flags (see \"System call restrictions\" in man isolate)\n\ -t, --time=