From f339629fbf1a86cdce86b3b584d9d1fb06793113 Mon Sep 17 00:00:00 2001 From: Adam Avilla Date: Fri, 24 Jun 2016 14:12:02 -0700 Subject: [PATCH] Don't exit when slave lost because it may not be our tasks' slave. Exit was added because it was thought it caused the program to hang and consume resources in mesos otherwise. This is not the case as later commits show that printLogs() was actually causing the process to block and then ResourceOffers() was consuming for really an unknown reason :/. This change has been tested to be safe (not consume all mesos resources) when: * Random slaves are lost in cluster. * Slave the task is running on is lost. * Slave the task is running on becomes "unhealthy" (simulated with iptables). --- main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.go b/main.go index 0666d26..2174312 100644 --- a/main.go +++ b/main.go @@ -71,7 +71,7 @@ func (sched *MesosRunonceScheduler) FrameworkMessage(_ sched.SchedulerDriver, ei log.Errorf("framework message from executor %q slave %q: %q", eid, sid, msg) } func (sched *MesosRunonceScheduler) SlaveLost(_ sched.SchedulerDriver, sid *mesos.SlaveID) { - log.Exitf("slave lost: %v", sid) + log.V(1).Infof("slave lost: %v", sid) } func (sched *MesosRunonceScheduler) ExecutorLost(_ sched.SchedulerDriver, eid *mesos.ExecutorID, sid *mesos.SlaveID, code int) { log.Errorf("executor %q lost on slave %q code %d", eid, sid, code)