@@ -444,6 +444,7 @@ func (m *Manager) acquireTasks(envId uid.ID, taskDescriptors Descriptors) (err e
444444 // - awaiting Task deployment in tasksToRun
445445 deploymentSuccess := true // hopefully
446446 undeployedDescriptors := make (Descriptors , 0 )
447+ undeployedNonCriticalDescriptors := make (Descriptors , 0 )
447448 undeployedCriticalDescriptors := make (Descriptors , 0 )
448449
449450 deployedTasks := make (DeploymentMap )
@@ -496,9 +497,6 @@ func (m *Manager) acquireTasks(envId uid.ID, taskDescriptors Descriptors) (err e
496497 log .WithField ("partition" , envId ).
497498 Errorf ("environment deployment failure: %d tasks requested for deployment, but %d deployed" , len (tasksToRun ), len (deployedTasks ))
498499
499- ////////////////
500- // CHECK HERE //
501- ////////////////
502500 for _ , t := range undeployedDescriptors {
503501 if t .TaskRole .GetTaskTraits ().Critical == true {
504502 deploymentSuccess = false
@@ -507,6 +505,7 @@ func (m *Manager) acquireTasks(envId uid.ID, taskDescriptors Descriptors) (err e
507505 log .WithField ("partition" , envId ).
508506 Errorf ("critical task deployment failure: %s" , printname )
509507 } else {
508+ undeployedNonCriticalDescriptors = append (undeployedNonCriticalDescriptors , t )
510509 printname := fmt .Sprintf ("%s->%s" , t .TaskRole .GetPath (), t .TaskClassName )
511510 log .WithField ("partition" , envId ).
512511 Warnf ("non-critical task deployment failure: %s" , printname )
@@ -538,9 +537,9 @@ func (m *Manager) acquireTasks(envId uid.ID, taskDescriptors Descriptors) (err e
538537 }
539538
540539 err = TasksDeploymentError {
541- tasksErrorBase : tasksErrorBase {taskIds : deployedTaskIds },
542- failedDescriptors : undeployedDescriptors ,
543- failedCriticalDescriptors : undeployedCriticalDescriptors ,
540+ tasksErrorBase : tasksErrorBase {taskIds : deployedTaskIds },
541+ failedNonCriticalDescriptors : undeployedNonCriticalDescriptors ,
542+ failedCriticalDescriptors : undeployedCriticalDescriptors ,
544543 }
545544 }
546545
@@ -664,7 +663,8 @@ func (m *Manager) configureTasks(envId uid.ID, tasks Tasks) error {
664663 }
665664
666665 if response .IsMultiResponse () {
667- taskErrors := make ([]string , len (response .Errors ()))
666+ taskCriticalErrors := make ([]string , 0 )
667+ taskNonCriticalErrors := make ([]string , 0 )
668668 i := 0
669669 for k , v := range response .Errors () {
670670 task := m .GetTask (k .TaskId .Value )
@@ -680,12 +680,20 @@ func (m *Manager) configureTasks(envId uid.ID, tasks Tasks) error {
680680 } else {
681681 taskDescription = fmt .Sprintf ("unknown task (id %s) failed with error: %s" , k .TaskId .Value , v .Error ())
682682 }
683- taskErrors [i ] = taskDescription
683+ if task .GetTraits ().Critical == true || task .parent .GetTaskTraits ().Critical == true {
684+ taskCriticalErrors [i ] = taskDescription
685+ } else {
686+ taskNonCriticalErrors [i ] = taskDescription
687+ }
684688 i ++
685689 }
686690
687- if len (taskErrors ) > 0 {
688- return fmt .Errorf ("CONFIGURE could not complete, errors: %s" , strings .Join (taskErrors , "; " ))
691+ if len (taskNonCriticalErrors ) > 0 {
692+ log .WithField ("partition" , envId ).
693+ Warnf ("non-critical task configuration failure, errors: %s" , strings .Join (taskNonCriticalErrors , "; " ))
694+ }
695+ if len (taskCriticalErrors ) > 0 {
696+ return fmt .Errorf ("CONFIGURE could not complete, errors: %s" , strings .Join (taskCriticalErrors , "; " ))
689697 }
690698 return nil
691699 } else {
@@ -732,13 +740,49 @@ func (m *Manager) transitionTasks(envId uid.ID, tasks Tasks, src string, event s
732740 return errors .New ("unknown MesosCommand error: nil response received" )
733741 }
734742
735- respError := response .Err ()
736- if respError != nil {
737- errText := respError .Error ()
738- if len (strings .TrimSpace (errText )) != 0 {
739- return errors .New (response .Err ().Error ())
743+ if response .IsMultiResponse () {
744+ taskCriticalErrors := make ([]string , 0 )
745+ taskNonCriticalErrors := make ([]string , 0 )
746+ i := 0
747+ for k , v := range response .Errors () {
748+ task := m .GetTask (k .TaskId .Value )
749+ var taskDescription string
750+ if task != nil {
751+ tci := task .GetTaskCommandInfo ()
752+ tciValue := "unknown command"
753+ if tci .Value != nil {
754+ tciValue = * tci .Value
755+ }
756+
757+ taskDescription = fmt .Sprintf ("task '%s' on %s (id %s) failed with error: %s" , tciValue , task .GetHostname (), task .GetTaskId (), v .Error ())
758+ } else {
759+ taskDescription = fmt .Sprintf ("unknown task (id %s) failed with error: %s" , k .TaskId .Value , v .Error ())
760+ }
761+ if task .GetTraits ().Critical == true || task .parent .GetTaskTraits ().Critical == true {
762+ taskCriticalErrors [i ] = taskDescription
763+ } else {
764+ taskNonCriticalErrors [i ] = taskDescription
765+ }
766+ i ++
767+ }
768+
769+ if len (taskNonCriticalErrors ) > 0 {
770+ log .WithField ("partition" , envId ).
771+ Warnf ("non-critical task transition failure, errors: %s" , strings .Join (taskNonCriticalErrors , "; " ))
772+ }
773+ if len (taskCriticalErrors ) > 0 {
774+ return fmt .Errorf ("transition could not complete, errors: %s" , strings .Join (taskCriticalErrors , "; " ))
775+ }
776+ return nil
777+ } else {
778+ respError := response .Err ()
779+ if respError != nil {
780+ errText := respError .Error ()
781+ if len (strings .TrimSpace (errText )) != 0 {
782+ return errors .New (response .Err ().Error ())
783+ }
784+ // FIXME: improve error handling ↑
740785 }
741- // FIXME: improve error handling ↑
742786 }
743787
744788 return nil
0 commit comments