Commit 582fbff2 authored by Daniele Venzano's avatar Daniele Venzano

Remove executions from the queue in case of fatal errors

parent 683e5749
......@@ -83,7 +83,7 @@ class DockerEngineBackend(zoe_master.backends.base.BaseBackend):
if service.backend_id is not None:
engine.terminate_container(service.backend_id, delete=True)
else:
log.error('Cannot terminate service {}, since it has not backend ID'.format(service.name))
log.error('Cannot terminate service {}, since it has no backend ID'.format(service.name))
service.set_backend_status(service.BACKEND_DESTROY_STATUS)
def platform_state(self) -> ClusterStats:
......@@ -213,4 +213,4 @@ class DockerEngineBackend(zoe_master.backends.base.BaseBackend):
cpu_quota = int(cores * 100000)
engine.update(service.backend_id, cpu_quota=cpu_quota, mem_reservation=memory)
else:
log.error('Cannot update service {} ({}), since it has no backend ID'.format(service.name, service.id))
log.error('Cannot update reservations for service {} ({}), since it has no backend ID'.format(service.name, service.id))
......@@ -261,6 +261,8 @@ class ZoeElasticScheduler:
ret = start_essential(job, placements)
if ret == "fatal":
jobs_to_attempt_scheduling.remove(job)
self.queue.remove(job)
job.termination_lock.release()
continue # trow away the execution
elif ret == "requeue":
self._requeue(job)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment