Commit 59a86198 authored by Daniele Venzano's avatar Daniele Venzano

Use core information to fit services in nodes

parent 3fdfe4fa
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -194,34 +194,6 @@ div.status_line {
font-size: larger;
}
div.node_detail div.memory_total,
div.node_detail div.cores_total {
border: 1px solid black;
background-color: green;
width: 100%;
position: relative;
z-index: 0;
}
div.node_detail div.memory_reserved,
div.node_detail div.cores_reserved {
background-color: red;
display: inline-block;
left: 0;
z-index: 2;
position: relative;
float: left;
}
div.node_detail div.memory_total span,
div.node_detail div.cores_total span {
text-align: end;
width: 100%;
position: relative;
left: 0;
display: block;
z-index: 1;
}
div.scheduler_queue {
display: flex;
overflow: auto;
......@@ -245,3 +217,16 @@ div.essential {
div.running {
border-color: green;
}
div.pie-plots {
width: 20em;
height: 22em;
}
div.plot-container {
float: left;
}
div.plot-container p {
text-align: center;
}
{% extends "base_user.html" %}
{% block title %}Zoe system status{% endblock %}
{% block custom_head %}
<script src="/static/Chart.min.js" type="application/javascript"></script>
{% endblock %}
{% block content %}
<h2>Zoe system status</h2>
......@@ -88,14 +92,107 @@
<div class="platform_node_detail">
{% for node in stats.platform_stats.nodes %}
<div class="node_detail">
<div class="node_name">{{ node['name'] }}</div>
<div class="node_name">
{{ node['name'] }}
{% if node['status'] == 'offline' %}
(node is offline/unreachable)
{% endif %}
</div>
<div class="container_count">{{ node['container_count'] }} containers</div>
<div class="memory_total">
<div class="memory_reserved" style="width: {{ node['memory_reserved'] * 100 / node['memory_total'] }}%;">&nbsp;</div><span>{{ '%0.2f' % (node['memory_reserved'] * 100 / node['memory_total'],) }}% memory reserved</span>
<div class="plot-container">
<p>Memory</p>
<div class="pie-plots">
<canvas class="node_status_canvas" id="{{ node.name }}-mem-res"></canvas>
<canvas class="node_status_canvas" id="{{ node.name }}-mem-use"></canvas>
</div>
</div>
<div class="cores_total">
<div class="cores_reserved" style="width: {{ node['cores_reserved'] * 100 / node['cores_total'] }}%;">&nbsp;</div><span>{{ '%0.2f' % (node['cores_reserved'] * 100 / node['cores_total'],) }}% cores reserved</span>
<script>
data = {
datasets: [{
label: 'Reserved memory',
data: [{{ node['memory_reserved'] }}, {{ node['memory_total'] - node['memory_reserved'] }}],
backgroundColor: ['rgba(0, 169, 225, 1.0)', 'rgba(145, 192, 46, 1.0)']
}],
'labels': ['Reserved', 'Free']
};
ctx = document.getElementById("{{ node.name }}-mem-res").getContext('2d');
new Chart(ctx,{
type: 'pie',
data: data,
options: {
animation: {
animateRotate: false
}
}
});
data = {
datasets: [{
label: 'Used memory',
data: [{{ node['memory_total'] - node['memory_free'] }}, {{ node['memory_free'] }}],
backgroundColor: ['rgba(0, 169, 225, 1.0)', 'rgba(145, 192, 46, 1.0)']
}],
'labels': ['In-use', 'Free']
};
ctx = document.getElementById("{{ node.name }}-mem-use").getContext('2d');
myPieChart = new Chart(ctx,{
type: 'pie',
data: data,
options: {
animation: {
animateRotate: false
}
}
});
</script>
<div class="plot-container">
<p>Cores</p>
<div class="pie-plots">
<canvas class="node_status_canvas" id="{{ node.name }}-cpu-res"></canvas>
<canvas class="node_status_canvas" id="{{ node.name }}-cpu-use"></canvas>
</div>
</div>
<script>
data = {
datasets: [{
label: 'Reserved cores',
data: [{{ node['cores_reserved'] }}, {{ node['cores_total'] - node['cores_reserved'] }}],
backgroundColor: ['rgba(0, 169, 225, 1.0)', 'rgba(145, 192, 46, 1.0)']
}],
'labels': ['Reserved', 'Free']
};
ctx = document.getElementById("{{ node.name }}-cpu-res").getContext('2d');
new Chart(ctx,{
type: 'pie',
data: data,
options: {
animation: {
animateRotate: false
}
}
});
data = {
datasets: [{
label: 'Used memory',
data: [{{ node['cores_total'] - node['cores_free'] }}, {{ node['cores_free'] }}],
backgroundColor: ['rgba(0, 169, 225, 1.0)', 'rgba(145, 192, 46, 1.0)']
}],
'labels': ['In-use', 'Free']
};
ctx = document.getElementById("{{ node.name }}-cpu-use").getContext('2d');
myPieChart = new Chart(ctx,{
type: 'pie',
data: data,
options: {
animation: {
animateRotate: false
}
}
});
</script>
</div>
{% endfor %}
</div>
......
......@@ -104,7 +104,7 @@ class DockerClient:
run_args['mem_reservation'] -= 1
if service_instance.core_limit is not None:
run_args['cpu_quota'] = 100000 * service_instance.core_limit.max
run_args['cpu_quota'] = int(100000 * service_instance.core_limit.max)
if get_conf().gelf_address != '':
run_args['log_config'] = {
......@@ -184,6 +184,9 @@ class DockerClient:
else:
info['ports'][port] = None
info['cpu_period'] = container.attrs['HostConfig']['CpuPeriod']
info['cpu_quota'] = container.attrs['HostConfig']['CpuQuota']
return info
def inspect_container(self, docker_id: str) -> Dict[str, Any]:
......
......@@ -19,6 +19,7 @@ import logging
import threading
import time
from copy import deepcopy
from datetime import datetime
from zoe_lib.config import get_conf
from zoe_lib.state import SQLManager, Service
......@@ -71,6 +72,7 @@ class DockerStateSynchronizer(threading.Thread):
node_stats.status = 'offline'
time.sleep(CHECK_INTERVAL)
continue
node_stats.status = 'online'
service_list = self.state.service_list(backend_host=host_config.name)
try:
......@@ -118,6 +120,20 @@ class DockerStateSynchronizer(threading.Thread):
memory_in_use = sum([stat['memory_stats']['usage'] for stat in stats.values() if 'usage' in stat['memory_stats']])
node_stats.memory_free = node_stats.memory_total - memory_in_use
node_stats.cores_reserved = sum([cont['cpu_quota'] / cont['cpu_period'] for cont in container_list if cont['cpu_period'] != 0])
node_stats.cores_free = node_stats.cores_total - sum([self._get_core_usage(stat) for stat in stats.values()])
def _get_core_usage(self, stat):
try:
this_read_ts = datetime.strptime(stat['read'], '%Y-%m-%dT%H:%M:%S.%f')
except ValueError:
return 0
pre_read_ts = datetime.strptime(stat['preread'], '%Y-%m-%dT%H:%M:%S.%f')
cpu_time_now = stat['cpu_stats']['cpu_usage']['total_usage']
cpu_time_pre = stat['precpu_stats']['cpu_usage']['total_usage']
return (cpu_time_now - cpu_time_pre) / ((this_read_ts - pre_read_ts).total_seconds() * 1000000000)
def _update_service_status(self, service: Service, container):
"""Update the service status."""
if service.backend_status != container['state']:
......
......@@ -12,10 +12,12 @@ class SimulatedNode:
"""A simulated node where containers can be run"""
def __init__(self, real_node: NodeStats):
self.real_reservations = {
"memory": real_node.memory_reserved
"memory": real_node.memory_reserved,
"cores": real_node.cores_reserved
}
self.real_free_resources = {
"memory": real_node.memory_free
"memory": real_node.memory_free,
"cores": real_node.cores_free
}
self.real_active_containers = real_node.container_count
self.services = []
......@@ -23,7 +25,14 @@ class SimulatedNode:
def service_fits(self, service: Service) -> bool:
"""Checks whether a service can fit in this node"""
return service.resource_reservation.memory.min < self.node_free_memory()
return service.resource_reservation.memory.min < self.node_free_memory() and service.resource_reservation.cores.min <= self.node_free_cores()
def service_why_unfit(self, service) -> str:
"""Generate an explanation of why the service does not fit this node."""
if service.resource_reservation.memory.min < self.node_free_memory():
return 'needs {} bytes of memory'.format(self.node_free_memory() - service.resource_reservation.memory.min)
elif service.resource_reservation.cores.min <= self.node_free_cores():
return 'needs {} more cores'.format(self.node_free_cores() - service.resource_reservation.cores.min)
def service_add(self, service):
"""Add a service in this node."""
......@@ -52,19 +61,31 @@ class SimulatedNode:
simulated_reservation = 0
for service in self.services: # type: Service
simulated_reservation += service.resource_reservation.memory.min
assert (self.real_free_resources['memory'] - simulated_reservation) >= 0
return self.real_free_resources['memory'] - simulated_reservation
free = self.real_free_resources['memory'] - simulated_reservation
if free < 0:
log.warning('More memory reserved than there is free on node {}: {}'.format(self.name, free))
return free
def node_free_cores(self):
"""Return the amount of free cores available in this node."""
simulated_reservation = 0
for service in self.services: # type: Service
simulated_reservation += service.resource_reservation.cores.min
free = self.real_free_resources['cores'] - simulated_reservation
if free < 0:
log.warning('More cores reserved than there are free on node {}: {}'.format(self.name, free))
return free
def __repr__(self):
out = 'SN {} | f {}'.format(self.name, self.node_free_memory())
out = 'SN {} | m {} | c {}'.format(self.name, self.node_free_memory(), self.node_free_cores())
return out
class SimulatedPlatform:
"""A simulated cluster, composed by simulated nodes"""
def __init__(self, plastform_status: ClusterStats):
def __init__(self, platform_status: ClusterStats):
self.nodes = {}
for node in plastform_status.nodes:
for node in platform_status.nodes:
if node.status == 'online':
self.nodes[node.name] = SimulatedNode(node)
......@@ -75,9 +96,11 @@ class SimulatedPlatform:
for node_id_, node in self.nodes.items():
if node.service_fits(service):
candidate_nodes.append(node)
else:
log.debug('Cannot fit service {} on node {}: {}'.format(service.id, node.name, node.service_why_unfit(service)))
if len(candidate_nodes) == 0: # this service does not fit anywhere
self.deallocate_essential(execution)
log.debug('Cannot fit essential service {}, bailing out'.format(service.id))
log.debug('Cannot fit essential service {} anywhere, bailing out'.format(service.id))
return False
candidate_nodes.sort(key=lambda n: n.container_count) # smallest first
candidate_nodes[0].service_add(service)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment