@@ -46,8 +46,9 @@ import (
4646)
4747
4848const (
49- KILL_TIMEOUT = 2 * time .Second
50- KILL_TRANSITION_TIMEOUT = 3 * time .Second
49+ SIGTERM_TIMEOUT = 1 * time .Second
50+ SIGINT_TIMEOUT = 3 * time .Second
51+ KILL_TRANSITION_TIMEOUT = 1 * time .Second
5152 TRANSITION_TIMEOUT = 10 * time .Second
5253)
5354
@@ -449,7 +450,8 @@ func (t *ControllableTask) Kill() error {
449450 select {
450451 case commitResponse = <- commitDone :
451452 case <- time .After (KILL_TRANSITION_TIMEOUT ):
452- log .Error ("deadline exceeded" )
453+ log .WithField ("task" , t .ti .TaskID .Value ).
454+ Warn ("teardown transition sequence timed out" )
453455 }
454456 // timeout we should break
455457 if commitResponse == nil {
@@ -458,22 +460,28 @@ func (t *ControllableTask) Kill() error {
458460
459461 log .WithField ("newState" , commitResponse .newState ).
460462 WithError (commitResponse .transitionError ).
463+ WithField ("task" , t .ti .TaskID .Value ).
461464 Debug ("transition committed" )
462465 if commitResponse .transitionError != nil || len (cmd .Event ) == 0 {
463- log .WithError (commitResponse .transitionError ).Error ("cannot gracefully end task" )
466+ log .WithError (commitResponse .transitionError ).
467+ WithField ("task" , t .ti .TaskID .Value ).
468+ Warn ("teardown transition sequence error" )
464469 break
465470 }
466471 reachedState = commitResponse .newState
467472 }
468473
469- log .Debug ("end transition loop done" )
474+ log .WithField ("task" , t .ti .TaskID .Value ).
475+ Debug ("teardown transition sequence done" )
470476 pid = int (response .GetPid ())
471477 if pid == 0 {
472478 // t.knownPid must be valid because GetState was sure to have been successful in the past
473479 pid = t .knownPid
474480 }
475- } else {
476- log .WithError (err ).WithField ("taskId" , t .ti .GetTaskID ()).Warn ("cannot query task status for graceful process termination" )
481+ } else { // If a true PID was never acquired during the lifetime of this task
482+ log .WithError (err ).
483+ WithField ("taskId" , t .ti .GetTaskID ()).
484+ Warn ("cannot query task status for graceful process termination" )
477485 pid = t .knownPid
478486 if pid == 0 {
479487 // The pid was never known through a successful `GetState` in the lifetime
@@ -486,18 +494,21 @@ func (t *ControllableTask) Kill() error {
486494 // terminate the shell that is wrapping the command, so we avoid using
487495 // negative PID is all other cases in order to allow FairMQ cleanup to
488496 // run.
489- log .WithError (err ).WithField ("taskId" , t .ti .GetTaskID ()).Warn ("task PID not known from task, using containing shell PGID" )
497+ log .WithError (err ).WithField ("taskId" , t .ti .GetTaskID ()).
498+ Warn ("task PID not known from task, using containing shell PGID" )
490499 }
491500 }
492501
493502 _ = t .rpc .Close ()
494503 t .rpc = nil
495504
496505 if reachedState == "DONE" {
497- log .Debug ("task exited correctly" )
506+ log .WithField ("taskId" , t .ti .TaskID .Value ).
507+ Debug ("task exited correctly" )
498508 t .pendingFinalTaskStateCh <- mesos .TASK_FINISHED
499509 } else { // something went wrong
500- log .Debug ("task killed" )
510+ log .WithField ("taskId" , t .ti .TaskID .Value ).
511+ Debug ("task killed" )
501512 t .pendingFinalTaskStateCh <- mesos .TASK_KILLED
502513 }
503514
@@ -507,7 +518,7 @@ func (t *ControllableTask) Kill() error {
507518 if err != nil {
508519 log .WithError (err ).
509520 WithField ("taskId" , t .ti .GetTaskID ()).
510- Warning ("could not gracefully kill task " )
521+ Warning ("task SIGTERM failed " )
511522 }
512523 killErrCh <- err
513524 }()
@@ -517,30 +528,30 @@ func (t *ControllableTask) Kill() error {
517528 select {
518529 case killErr := <- killErrCh :
519530 if killErr == nil {
520- time .Sleep (KILL_TIMEOUT )
531+ time .Sleep (SIGTERM_TIMEOUT ) // Waiting for the SIGTERM to kick in
521532 if pidExists (pid ) {
522533 // SIGINT for the "Waiting for graceful device shutdown.
523534 // Hit Ctrl-C again to abort immediately" message.
524535 killErr = syscall .Kill (pid , syscall .SIGINT )
525536 if killErr != nil {
526537 log .WithError (killErr ).
527538 WithField ("taskId" , t .ti .GetTaskID ()).
528- Warning ("could not gracefully kill task " )
539+ Warning ("task SIGINT failed " )
529540 }
530- time .Sleep (KILL_TIMEOUT )
541+ time .Sleep (SIGINT_TIMEOUT )
531542 }
532543 if ! pidExists (pid ) {
533544 return killErr
534545 }
535546 }
536- case <- time .After (KILL_TRANSITION_TIMEOUT ):
547+ case <- time .After (SIGTERM_TIMEOUT + SIGINT_TIMEOUT ):
537548 }
538549
539550 killErr := syscall .Kill (pid , syscall .SIGKILL )
540551 if killErr != nil {
541552 log .WithError (killErr ).
542553 WithField ("taskId" , t .ti .GetTaskID ()).
543- Warning ("could not kill task " )
554+ Warning ("task SIGKILL failed " )
544555 }
545556
546557 return killErr
0 commit comments