Commit 59a86198 authored by Daniele Venzano's avatar Daniele Venzano
Browse files

Use core information to fit services in nodes

parent 3fdfe4fa
Pipeline #4570 passed with stages
in 2 minutes and 55 seconds
This diff is collapsed.
......@@ -194,34 +194,6 @@ div.status_line {
font-size: larger;
}
div.node_detail div.memory_total,
div.node_detail div.cores_total {
border: 1px solid black;
background-color: green;
width: 100%;
position: relative;
z-index: 0;
}
div.node_detail div.memory_reserved,
div.node_detail div.cores_reserved {
background-color: red;
display: inline-block;
left: 0;
z-index: 2;
position: relative;
float: left;
}
div.node_detail div.memory_total span,
div.node_detail div.cores_total span {
text-align: end;
width: 100%;
position: relative;
left: 0;
display: block;
z-index: 1;
}
div.scheduler_queue {
display: flex;
overflow: auto;
......@@ -245,3 +217,16 @@ div.essential {
div.running {
border-color: green;
}
div.pie-plots {
width: 20em;
height: 22em;
}
div.plot-container {
float: left;
}
div.plot-container p {
text-align: center;
}
{% extends "base_user.html" %}
{% block title %}Zoe system status{% endblock %}
{% block custom_head %}
<script src="/static/Chart.min.js" type="application/javascript"></script>
{% endblock %}
{% block content %}
<h2>Zoe system status</h2>
......@@ -88,14 +92,107 @@
<div class="platform_node_detail">
{% for node in stats.platform_stats.nodes %}
<div class="node_detail">
<div class="node_name">{{ node['name'] }}</div>
<div class="node_name">
{{ node['name'] }}
{% if node['status'] == 'offline' %}
(node is offline/unreachable)
{% endif %}
</div>
<div class="container_count">{{ node['container_count'] }} containers</div>
<div class="memory_total">
<div class="memory_reserved" style="width: {{ node['memory_reserved'] * 100 / node['memory_total'] }}%;">&nbsp;</div><span>{{ '%0.2f' % (node['memory_reserved'] * 100 / node['memory_total'],) }}% memory reserved</span>
<div class="plot-container">
<p>Memory</p>
<div class="pie-plots">
<canvas class="node_status_canvas" id="{{ node.name }}-mem-res"></canvas>
<canvas class="node_status_canvas" id="{{ node.name }}-mem-use"></canvas>
</div>
</div>
<div class="cores_total">
<div class="cores_reserved" style="width: {{ node['cores_reserved'] * 100 / node['cores_total'] }}%;">&nbsp;</div><span>{{ '%0.2f' % (node['cores_reserved'] * 100 / node['cores_total'],) }}% cores reserved</span>
<script>
data = {
datasets: [{
label: 'Reserved memory',
data: [{{ node['memory_reserved'] }}, {{ node['memory_total'] - node['memory_reserved'] }}],
backgroundColor: ['rgba(0, 169, 225, 1.0)', 'rgba(145, 192, 46, 1.0)']
}],
'labels': ['Reserved', 'Free']
};
ctx = document.getElementById("{{ node.name }}-mem-res").getContext('2d');
new Chart(ctx,{
type: 'pie',
data: data,
options: {
animation: {
animateRotate: false
}
}
});
data = {
datasets: [{
label: 'Used memory',
data: [{{ node['memory_total'] - node['memory_free'] }}, {{ node['memory_free'] }}],
backgroundColor: ['rgba(0, 169, 225, 1.0)', 'rgba(145, 192, 46, 1.0)']
}],
'labels': ['In-use', 'Free']
};
ctx = document.getElementById("{{ node.name }}-mem-use").getContext('2d');
myPieChart = new Chart(ctx,{
type: 'pie',
data: data,
options: {
animation: {
animateRotate: false
}
}
});
</script>
<div class="plot-container">
<p>Cores</p>
<div class="pie-plots">
<canvas class="node_status_canvas" id="{{ node.name }}-cpu-res"></canvas>
<canvas class="node_status_canvas" id="{{ node.name }}-cpu-use"></canvas>
</div>
</div>
<script>
data = {
datasets: [{
label: 'Reserved cores',
data: [{{ node['cores_reserved'] }}, {{ node['cores_total'] - node['cores_reserved'] }}],
backgroundColor: ['rgba(0, 169, 225, 1.0)', 'rgba(145, 192, 46, 1.0)']
}],
'labels': ['Reserved', 'Free']
};
ctx = document.getElementById("{{ node.name }}-cpu-res").getContext('2d');
new Chart(ctx,{
type: 'pie',
data: data,
options: {
animation: {
animateRotate: false
}
}
});
data = {
datasets: [{
label: 'Used memory',
data: [{{ node['cores_total'] - node['cores_free'] }}, {{ node['cores_free'] }}],
backgroundColor: ['rgba(0, 169, 225, 1.0)', 'rgba(145, 192, 46, 1.0)']
}],
'labels': ['In-use', 'Free']
};
ctx = document.getElementById("{{ node.name }}-cpu-use").getContext('2d');
myPieChart = new Chart(ctx,{
type: 'pie',
data: data,
options: {
animation: {
animateRotate: false
}
}
});
</script>
</div>
{% endfor %}
</div>
......
......@@ -104,7 +104,7 @@ class DockerClient:
run_args['mem_reservation'] -= 1
if service_instance.core_limit is not None:
run_args['cpu_quota'] = 100000 * service_instance.core_limit.max
run_args['cpu_quota'] = int(100000 * service_instance.core_limit.max)
if get_conf().gelf_address != '':
run_args['log_config'] = {
......@@ -184,6 +184,9 @@ class DockerClient:
else:
info['ports'][port] = None
info['cpu_period'] = container.attrs['HostConfig']['CpuPeriod']
info['cpu_quota'] = container.attrs['HostConfig']['CpuQuota']
return info
def inspect_container(self, docker_id: str) -> Dict[str, Any]:
......
......@@ -19,6 +19,7 @@ import logging
import threading
import time
from copy import deepcopy
from datetime import datetime
from zoe_lib.config import get_conf
from zoe_lib.state import SQLManager, Service
......@@ -71,6 +72,7 @@ class DockerStateSynchronizer(threading.Thread):
node_stats.status = 'offline'
time.sleep(CHECK_INTERVAL)
continue
node_stats.status = 'online'
service_list = self.state.service_list(backend_host=host_config.name)
try:
......@@ -118,6 +120,20 @@ class DockerStateSynchronizer(threading.Thread):
memory_in_use = sum([stat['memory_stats']['usage'] for stat in stats.values() if 'usage' in stat['memory_stats']])
node_stats.memory_free = node_stats.memory_total - memory_in_use
node_stats.cores_reserved = sum([cont['cpu_quota'] / cont['cpu_period'] for cont in container_list if cont['cpu_period'] != 0])
node_stats.cores_free = node_stats.cores_total - sum([self._get_core_usage(stat) for stat in stats.values()])
def _get_core_usage(self, stat):
try:
this_read_ts = datetime.strptime(stat['read'], '%Y-%m-%dT%H:%M:%S.%f')
except ValueError:
return 0
pre_read_ts = datetime.strptime(stat['preread'], '%Y-%m-%dT%H:%M:%S.%f')
cpu_time_now = stat['cpu_stats']['cpu_usage']['total_usage']
cpu_time_pre = stat['precpu_stats']['cpu_usage']['total_usage']
return (cpu_time_now - cpu_time_pre) / ((this_read_ts - pre_read_ts).total_seconds() * 1000000000)
def _update_service_status(self, service: Service, container):
"""Update the service status."""
if service.backend_status != container['state']:
......
......@@ -12,10 +12,12 @@ class SimulatedNode:
"""A simulated node where containers can be run"""
def __init__(self, real_node: NodeStats):
self.real_reservations = {
"memory": real_node.memory_reserved
"memory": real_node.memory_reserved,
"cores": real_node.cores_reserved
}
self.real_free_resources = {
"memory": real_node.memory_free
"memory": real_node.memory_free,
"cores": real_node.cores_free
}
self.real_active_containers = real_node.container_count
self.services = []
......@@ -23,7 +25,14 @@ class SimulatedNode:
def service_fits(self, service: Service) -> bool:
"""Checks whether a service can fit in this node"""
return service.resource_reservation.memory.min < self.node_free_memory()
return service.resource_reservation.memory.min < self.node_free_memory() and service.resource_reservation.cores.min <= self.node_free_cores()
def service_why_unfit(self, service) -> str:
"""Generate an explanation of why the service does not fit this node."""
if service.resource_reservation.memory.min < self.node_free_memory():
return 'needs {} bytes of memory'.format(self.node_free_memory() - service.resource_reservation.memory.min)
elif service.resource_reservation.cores.min <= self.node_free_cores():
return 'needs {} more cores'.format(self.node_free_cores() - service.resource_reservation.cores.min)
def service_add(self, service):
"""Add a service in this node."""
......@@ -52,19 +61,31 @@ class SimulatedNode:
simulated_reservation = 0
for service in self.services: # type: Service
simulated_reservation += service.resource_reservation.memory.min
assert (self.real_free_resources['memory'] - simulated_reservation) >= 0
return self.real_free_resources['memory'] - simulated_reservation
free = self.real_free_resources['memory'] - simulated_reservation
if free < 0:
log.warning('More memory reserved than there is free on node {}: {}'.format(self.name, free))
return free
def node_free_cores(self):
"""Return the amount of free cores available in this node."""
simulated_reservation = 0
for service in self.services: # type: Service
simulated_reservation += service.resource_reservation.cores.min
free = self.real_free_resources['cores'] - simulated_reservation
if free < 0:
log.warning('More cores reserved than there are free on node {}: {}'.format(self.name, free))
return free
def __repr__(self):
out = 'SN {} | f {}'.format(self.name, self.node_free_memory())
out = 'SN {} | m {} | c {}'.format(self.name, self.node_free_memory(), self.node_free_cores())
return out
class SimulatedPlatform:
"""A simulated cluster, composed by simulated nodes"""
def __init__(self, plastform_status: ClusterStats):
def __init__(self, platform_status: ClusterStats):
self.nodes = {}
for node in plastform_status.nodes:
for node in platform_status.nodes:
if node.status == 'online':
self.nodes[node.name] = SimulatedNode(node)
......@@ -75,9 +96,11 @@ class SimulatedPlatform:
for node_id_, node in self.nodes.items():
if node.service_fits(service):
candidate_nodes.append(node)
else:
log.debug('Cannot fit service {} on node {}: {}'.format(service.id, node.name, node.service_why_unfit(service)))
if len(candidate_nodes) == 0: # this service does not fit anywhere
self.deallocate_essential(execution)
log.debug('Cannot fit essential service {}, bailing out'.format(service.id))
log.debug('Cannot fit essential service {} anywhere, bailing out'.format(service.id))
return False
candidate_nodes.sort(key=lambda n: n.container_count) # smallest first
candidate_nodes[0].service_add(service)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment