Skip to content

Commit 592c6a6

Browse files
miltalexteo
authored andcommitted
[executor] kill process using PID from OCC
1 parent c77c101 commit 592c6a6

2 files changed

Lines changed: 79 additions & 6 deletions

File tree

executor/executable/controllabletask.go

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ func (t *ControllableTask) Kill() error {
425425

426426
log.Debug("end transition loop done")
427427

428-
pid := t.rpc.TaskCmd.Process.Pid
428+
pid := int(response.GetPid())
429429
_ = t.rpc.Close()
430430
t.rpc = nil
431431

@@ -439,9 +439,10 @@ func (t *ControllableTask) Kill() error {
439439

440440
killErrCh := make(chan error)
441441
// When killing we must always use syscall.Kill with a negative PID, in order to kill all
442-
// children which were assigned the same PGID at launch
442+
// children which were assigned the same PGID at launch. Since we kill the child process,
443+
// it should also terminate the shell that is wrapping the command, we avoid using negative PID
443444
go func() {
444-
err := syscall.Kill(-pid, syscall.SIGTERM)
445+
err := syscall.Kill(pid, syscall.SIGTERM)
445446
if err != nil {
446447
log.WithError(err).
447448
WithField("taskId", t.ti.GetTaskID()).
@@ -450,18 +451,31 @@ func (t *ControllableTask) Kill() error {
450451
killErrCh <- err
451452
}()
452453

453-
454454
// Set a small timeout to SIGTERM if SIGTERM fails or timeout passes,
455455
// we perform a SIGKILL.
456456
select {
457457
case killErr := <- killErrCh:
458458
if killErr == nil {
459-
return killErr
459+
time.Sleep(10 * time.Second)
460+
if pidExists(pid) {
461+
// SIGINT for the "Waiting for graceful device shutdown.
462+
// Hit Ctrl-C again to abort immediately" message.
463+
killErr = syscall.Kill(pid, syscall.SIGINT)
464+
if killErr != nil {
465+
log.WithError(killErr).
466+
WithField("taskId", t.ti.GetTaskID()).
467+
Warning("could not gracefully kill task")
468+
}
469+
}
470+
time.Sleep(10 * time.Second)
471+
if !pidExists(pid) {
472+
return killErr
473+
}
460474
}
461475
case <-time.After(10 * time.Second):
462476
}
463477

464-
killErr := syscall.Kill(-pid, syscall.SIGKILL)
478+
killErr := syscall.Kill(pid, syscall.SIGKILL)
465479
if killErr != nil {
466480
log.WithError(killErr).
467481
WithField("taskId", t.ti.GetTaskID()).

executor/executable/pid_util.go

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
* === This file is part of ALICE O² ===
3+
*
4+
* Copyright 2020 CERN and copyright holders of ALICE O².
5+
* Author: Miltiadis Alexis <miltiadis.alexis@cern.ch>
6+
*
7+
* This program is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
*
20+
* In applying this license CERN does not waive the privileges and
21+
* immunities granted to it by virtue of its status as an
22+
* Intergovernmental Organization or submit itself to any jurisdiction.
23+
*/
24+
25+
package executable
26+
27+
import (
28+
"os"
29+
"syscall"
30+
)
31+
32+
// pidExists will check if a pid process is running
33+
func pidExists(pid int) (bool) {
34+
if pid <= 0 {
35+
return false
36+
}
37+
proc, err := os.FindProcess(pid)
38+
if err != nil {
39+
return false
40+
}
41+
err = proc.Signal(syscall.Signal(0))
42+
if err == nil {
43+
return true
44+
}
45+
if err.Error() == "os: process already finished" {
46+
return false
47+
}
48+
errno, ok := err.(syscall.Errno)
49+
if !ok {
50+
return false
51+
}
52+
switch errno {
53+
case syscall.ESRCH:
54+
return false
55+
case syscall.EPERM:
56+
return true
57+
}
58+
return false
59+
}

0 commit comments

Comments
 (0)