From a99a776ed22aba0523c21e3d1e2b008efde5aeba Mon Sep 17 00:00:00 2001 From: Laurent Bercot Date: Mon, 17 Nov 2025 15:59:20 +0100 Subject: [PATCH 1/3] Add compatibility mode for s6 support In order to avoid breaking things for the 3 users who use the old s6 support, add compatibility. When command= is empty and a service directory exists in /var/svc.d, use that service directory instead of creating it from the service file data. --- s6-guide.md | 24 +++++++++++++++++------- sh/s6.sh | 16 ++++++++++++---- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/s6-guide.md b/s6-guide.md index 9e14f65cc..c5f76abd2 100644 --- a/s6-guide.md +++ b/s6-guide.md @@ -97,23 +97,33 @@ timeout_kill means no SIGKILL will be sent. ## How it works internally, starting with 0.63 +If the `command` variable is empty *and* there is a user-provided +service directory in `/var/svc.d` with the same name as the service +being called, then everything works as it did previously: the +`/var/svc.d/foo` service directory is linked into the scan directory, +and that's it: you are in full manual control of your service directory. +You can still use the `timeout_ready` and `timeout_down` variables to +tune OpenRC's behaviour, but the other variables have no impact. + +The rest of this section assumes that the `command` variable is not +empty. In that case, you don't need to provide a service directory in +the s6 format: OpenRC will craft one for you. + The first time start() is called, OpenRC uses all the variables in the -service file to build a service directory for s6: a run script, possibly -a notification-fd file, etc. This service directory is then linked into +service file to build a service directory: a run script, possibly a +notification-fd file, etc. This service directory is then linked into the scan directory and s6-svscan is told to register it and spawn a s6-supervise process on it. This means that all the information needed for your service should be given, declaratively, in your service file (and your configuration file -if you have one). You do not need to build your service directory -yourself anymore, this is done automatically: in true OpenRC fashion, -the service file is the One True Source of information for running -your service. +if you have one). In true OpenRC fashion, the service file is the One +True Source of information for running your service. The run script for the s6 service directory is built with in the execline language, because execline makes script generation easier than sh. However, the daemon execution itself is still done via - sh -c "$command $command_args $command_args_foreground" + `sh -c "$command $command_args $command_args_foreground"` for compatibility with other backends. In other words: you can forget that execline is even there, all the user-facing parts use sh as their interpreter and it's all you need to worry about. diff --git a/sh/s6.sh b/sh/s6.sh index 5a63fca0e..dc8d003a6 100644 --- a/sh/s6.sh +++ b/sh/s6.sh @@ -79,6 +79,10 @@ _s6_force_stop() { s6-svunlink -- "$_scandir" "$name" } +_s6_have_legacy_servicedir() { + test -z "$command" && test -x "/var/svc.d/$name/run" +} + _s6_servicedir_creation_needed() { local dir="$_servicedirs/$name" conffile="{$RC_SERVICE%/*}/../conf.d/${RC_SERVICE##*/}" if ! test -e "$dir" ; then @@ -163,21 +167,25 @@ _s6_servicedir_create() { s6_start() { - local r waitcommand waitname + local servicepath r waitcommand waitname _s6_set_variables if ! _s6_sanity_checks ; then eerror "s6 sanity checks failed, cannot start service" return 1 fi - if _s6_servicedir_creation_needed ; then + servicepath="$_servicedirs/$name" + if _s6_have_legacy_servicedir ; then + servicepath="/var/svc.d/$name" + ebegin "Starting $name (via user-provided service directory)" + elif _s6_servicedir_creation_needed ; then ebegin "Starting $name" _s6_servicedir_create else ebegin "Starting $name (using cached service directory)" fi - if s6-svlink -- "$_scandir" "$_servicedirs/$name" ; then : ; else + if s6-svlink -- "$_scandir" "$servicepath" ; then : ; else r=$? - eend $r "Failed to s6-svlink $name into $_scandir" + eend $r "Failed to s6-svlink $servicepath into $_scandir" return $r fi if test -n "$timeout_ready" ; then From a769dd89a92bf51dd087037b6db9ab55f8bde400 Mon Sep 17 00:00:00 2001 From: Laurent Bercot Date: Tue, 18 Nov 2025 09:15:18 +0100 Subject: [PATCH 2/3] start-stop-daemon.c: fix failure case on --notify fd:4 When the created notified pipe (often on 4) is the same number as the notification fd target, dup2() does not clear the FD_CLOEXEC flag, so the fd gets closed right when exec'ing the daemon, reporting readiness failure. Fix this by explicitly testing for the case and clearing the flag when necessary. Note that this happens because of a call to close_range() right before the test. close_range() is the real problem, it should never be used and this bug is a perfect illustration of why; but getting rid of close_range() is a much more invasive change that I don't want to commit to right now, especially since navi's plan is to eventually deprecate start-stop-daemon. --- src/start-stop-daemon/start-stop-daemon.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/start-stop-daemon/start-stop-daemon.c b/src/start-stop-daemon/start-stop-daemon.c index 34ce08c48..fdcd90326 100644 --- a/src/start-stop-daemon/start-stop-daemon.c +++ b/src/start-stop-daemon/start-stop-daemon.c @@ -1130,13 +1130,25 @@ int main(int argc, char **argv) || rc_yesno(getenv("EINFO_QUIET"))) dup2(stderr_fd, STDERR_FILENO); - cloexec_fds_from(3); + cloexec_fds_from(3); /* FIXME: this is problematic, see right below */ if (notify.type == NOTIFY_FD) { if (close(notify.pipe[0]) == -1) eerrorx("%s: failed to close notify pipe[0]: %s", applet, strerror(errno)); if (dup2(notify.pipe[1], notify.fd) == -1) eerrorx("%s: failed to initialize notify fd: %s", applet, strerror(errno)); + + /* if notify.pipe[1] == notify.fd then the FD_CLOEXEC flag is not cleared by dup2, + leading to failure. The workaround here is to clear it manually, but the + real fix is that we should never close/cloexec fds in bulk like this */ + if (notify.pipe[1] == notify.fd) { + int flags = fcntl(notify.fd, F_GETFD, 0); + if (flags == -1) + eerrorx("%s: failed to get flags for notify fd: %s", applet, strerror(errno)); + if (fcntl(notify.fd, F_SETFD, flags & ~FD_CLOEXEC) == -1) + eerrorx("%s: failed to set flags for notify fd: %s", applet, strerror(errno)); + } + } if (scheduler != NULL) { From 4b780a89056ead15b52bb245cc45d29d5d3dc3ee Mon Sep 17 00:00:00 2001 From: Laurent Bercot Date: Tue, 18 Nov 2025 09:20:46 +0100 Subject: [PATCH 3/3] s6-svscanboot: launch s6-svscan with notify=fd:4 Now that the bug that made it fail is fixed, use the start-stop-daemon readiness notification mechanism when launching s6-svscan. (We do not run s6-svscan under supervise-daemon because that would needlessly lengthen the supervision chain. Yo dawg, we heard you like supervisors.) --- init.d/s6-svscan.in | 2 +- sh/s6-svscanboot.sh.in | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/init.d/s6-svscan.in b/init.d/s6-svscan.in index 8d1499d11..f16bffbbf 100644 --- a/init.d/s6-svscan.in +++ b/init.d/s6-svscan.in @@ -18,7 +18,7 @@ command_args="$RC_SVCDIR" command_background=yes pidfile=/var/run/s6-svscan.pid umask=022 -# notify=fd:4 # when notify=fd is fixed, uncomment here and add -d4 in svscanboot +notify=fd:4 depend() { need localmount diff --git a/sh/s6-svscanboot.sh.in b/sh/s6-svscanboot.sh.in index af824a19a..c18e75df7 100644 --- a/sh/s6-svscanboot.sh.in +++ b/sh/s6-svscanboot.sh.in @@ -17,7 +17,6 @@ s6-log -bpd3 -- t s1048576 n10 "$logdir" EOF chmod 0755 "$scandir/s6-svscan-log/run" -# when notify=fd is fixed, add -d4 after -X3 exec redirfd -wnb 1 "$scandir/s6-svscan-log/fifo" \ fdmove -c 2 1 \ -s6-svscan -X3 -- "$scandir" 0/dev/console +s6-svscan -X3 -d4 -- "$scandir" 0/dev/console