Commit 6a75696a authored by Daniele Venzano's avatar Daniele Venzano

Implement execution inspection

parent 70879462
......@@ -73,3 +73,141 @@ class PlatformStats(Stats):
def __str__(self):
return "Swarm:\n{}\nScheduler:\n{}\n".format(self.swarm, self.scheduler)
class ContainerStats(Stats):
def __init__(self, docker_stats):
super().__init__()
# self.docker_stats = docker_stats
# self.blkio_serviced_ops_read = sum([x['value'] for x in docker_stats['blkio_stats']['io_serviced_recursive'] if x['op'] == 'Read'])
# self.blkio_serviced_ops_write = sum([x['value'] for x in docker_stats['blkio_stats']['io_serviced_recursive'] if x['op'] == 'Write'])
# self.blkio_serviced_ops_async = sum([x['value'] for x in docker_stats['blkio_stats']['io_serviced_recursive'] if x['op'] == 'Async'])
# self.blkio_serviced_ops_sync = sum([x['value'] for x in docker_stats['blkio_stats']['io_serviced_recursive'] if x['op'] == 'Sync'])
# self.blkio_serviced_ops_total = sum([x['value'] for x in docker_stats['blkio_stats']['io_serviced_recursive'] if x['op'] == 'Total'])
self.io_bytes_read = sum([x['value'] for x in docker_stats['blkio_stats']['io_service_bytes_recursive'] if x['op'] == 'Read'])
self.io_bytes_write = sum([x['value'] for x in docker_stats['blkio_stats']['io_service_bytes_recursive'] if x['op'] == 'Write'])
# self.blkio_serviced_bytes_async = sum([x['value'] for x in docker_stats['blkio_stats']['io_service_bytes_recursive'] if x['op'] == 'Async'])
# self.blkio_serviced_bytes_sync = sum([x['value'] for x in docker_stats['blkio_stats']['io_service_bytes_recursive'] if x['op'] == 'Sync'])
# self.blkio_serviced_bytes_total = sum([x['value'] for x in docker_stats['blkio_stats']['io_service_bytes_recursive'] if x['op'] == 'Total'])
self.memory_used = docker_stats['memory_stats']['usage']
self.memory_total = docker_stats['memory_stats']['limit']
self.net_bytes_rx = docker_stats['network']['rx_bytes']
self.net_bytes_tx = docker_stats['network']['tx_bytes']
def to_dict(self) -> dict:
ret = {}
ret.update(vars(self))
return ret
documentation_sample = {
'blkio_stats': {
'io_time_recursive': [],
'io_wait_time_recursive': [],
'io_merged_recursive': [],
'io_service_time_recursive': [],
'io_serviced_recursive': [
{'minor': 0, 'op': 'Read', 'major': 8, 'value': 0},
{'minor': 0, 'op': 'Write', 'major': 8, 'value': 1},
{'minor': 0, 'op': 'Sync', 'major': 8, 'value': 0},
{'minor': 0, 'op': 'Async', 'major': 8, 'value': 1},
{'minor': 0, 'op': 'Total', 'major': 8, 'value': 1},
{'minor': 0, 'op': 'Read', 'major': 252, 'value': 0},
{'minor': 0, 'op': 'Write', 'major': 252, 'value': 1},
{'minor': 0, 'op': 'Sync', 'major': 252, 'value': 0},
{'minor': 0, 'op': 'Async', 'major': 252, 'value': 1},
{'minor': 0, 'op': 'Total', 'major': 252, 'value': 1}
],
'io_service_bytes_recursive': [
{'minor': 0, 'op': 'Read', 'major': 8, 'value': 0},
{'minor': 0, 'op': 'Write', 'major': 8, 'value': 32768},
{'minor': 0, 'op': 'Sync', 'major': 8, 'value': 0},
{'minor': 0, 'op': 'Async', 'major': 8, 'value': 32768},
{'minor': 0, 'op': 'Total', 'major': 8, 'value': 32768},
{'minor': 0, 'op': 'Read', 'major': 252, 'value': 0},
{'minor': 0, 'op': 'Write', 'major': 252, 'value': 32768},
{'minor': 0, 'op': 'Sync', 'major': 252, 'value': 0},
{'minor': 0, 'op': 'Async', 'major': 252, 'value': 32768},
{'minor': 0, 'op': 'Total', 'major': 252, 'value': 32768}
],
'io_queue_recursive': [],
'sectors_recursive': []
},
'cpu_stats': {
'cpu_usage': {
'usage_in_usermode': 8380000000,
'usage_in_kernelmode': 2630000000,
'total_usage': 34451274609,
'percpu_usage': [931702517, 2764976848, 928621564, 2669799012, 1117103491, 2797807324, 1278365416, 2919322388, 1195818284, 2794439644, 1105212782, 2628238214, 1018437691, 2713559369, 913142014, 2966544077, 555254965, 73830222, 129362189, 120696574, 232636452, 54415721, 71511012, 111871561, 261233403, 736167553, 61198008, 713285344, 41359796, 287955073, 78816569, 178589532]},
'throttling_data': {
'throttled_periods': 0,
'throttled_time': 0,
'periods': 0
}, 'system_cpu_usage': 4257821208713451
},
'memory_stats': {
'usage': 249561088,
'limit': 2147483648,
'stats': {
'total_inactive_anon': 12288,
'pgfault': 75899,
'inactive_file': 32768,
'total_rss': 249479168,
'total_writeback': 0,
'total_inactive_file': 32768,
'writeback': 0,
'total_pgmajfault': 0,
'active_file': 0,
'total_pgfault': 75899,
'hierarchical_memory_limit': 2147483648,
'total_active_file': 0,
'total_pgpgout': 34070,
'pgpgout': 34070,
'total_rss_huge': 195035136,
'total_cache': 81920,
'total_mapped_file': 32768,
'total_pgpgin': 47475,
'rss_huge': 195035136,
'unevictable': 0,
'total_unevictable': 0,
'rss': 249479168,
'total_active_anon': 249499648,
'cache': 81920,
'active_anon': 249499648,
'inactive_anon': 12288,
'pgpgin': 47475,
'mapped_file': 32768,
'pgmajfault': 0
},
'max_usage': 266846208,
'failcnt': 0
},
'network': {
'rx_packets': 1214,
'rx_bytes': 308646,
'tx_dropped': 0,
'rx_dropped': 0,
'tx_errors': 0,
'tx_bytes': 61784,
'rx_errors': 0,
'tx_packets': 1019
},
'precpu_stats': {
'cpu_usage': {
'usage_in_usermode': 0,
'usage_in_kernelmode': 0,
'total_usage': 0,
'percpu_usage': None
},
'throttling_data': {
'throttled_periods': 0,
'throttled_time': 0,
'periods': 0
},
'system_cpu_usage': 0
},
'read': '2015-09-09T14:52:19.254587126+02:00'
}
......@@ -141,6 +141,14 @@ class ZoeClient:
self.state.commit()
return app.id
# Containers
def container_stats(self, container_id):
try:
self.state.query(ContainerState).filter_by(id=container_id).one()
except NoResultFound:
return None
return self.server.container_stats(container_id)
# Executions
def execution_delete(self, execution_id: int) -> None:
try:
......
......@@ -229,3 +229,8 @@ class PlatformManager:
notify_execution_finished(container.cluster.execution)
else:
log.warning("Container {} (ID: {}) died unexpectedly")
def container_stats(self, container_id):
state = AlchemySession()
container = state.query(ContainerState).filter_by(id=container_id).one()
return self.swarm.stats(container.docker_id)
......@@ -3,7 +3,7 @@ from sqlalchemy.orm.exc import NoResultFound
from zoe_scheduler.scheduler import zoe_sched
from common.stats import PlatformStats
from common.stats import PlatformStats, ContainerStats
from common.state import AlchemySession, ContainerState
from common.state.execution import ExecutionState
......@@ -17,6 +17,9 @@ class ZoeSchedulerRPCService(rpyc.Service):
def on_disconnect(self):
pass
def exposed_container_stats(self, container_id: int) -> ContainerStats:
return self.sched.platform.container_stats(container_id)
def exposed_execution_schedule(self, execution_id: int) -> bool:
state = AlchemySession()
execution = state.query(ExecutionState).filter_by(id=execution_id).one()
......
......@@ -6,7 +6,7 @@ import docker.utils
import docker.errors
from common.configuration import zoeconf
from common.stats import SwarmStats, SwarmNodeStats
from common.stats import SwarmStats, SwarmNodeStats, ContainerStats
log = logging.getLogger(__name__)
......@@ -108,10 +108,10 @@ class SwarmClient:
logdata = self.cli.logs(container=docker_id, stdout=True, stderr=True, stream=False, timestamps=False, tail="all")
return logdata.decode("utf-8")
def stats(self, docker_id) -> dict:
def stats(self, docker_id) -> ContainerStats:
stats_stream = self.cli.stats(docker_id, decode=True)
for s in stats_stream:
return s
return ContainerStats(s)
class ContainerOptions:
......
......@@ -121,6 +121,18 @@ def execution_logs(container_id: int):
return jsonify(status="ok", log=log)
@api_bp.route('/executions/stats/container/<int:container_id>')
def execution_stats(container_id: int):
client = get_zoe_client()
_api_check_user(client)
stats = client.container_stats(container_id)
if stats is None:
return jsonify(status="error", msg="no stats found")
else:
return jsonify(status="ok", **stats.to_dict())
@api_bp.route('/executions/terminate/<int:exec_id>')
def execution_terminate(exec_id: int):
client = get_zoe_client()
......
......@@ -63,11 +63,11 @@ def application_delete(app_id):
def execution_inspect(execution_id):
client = get_zoe_client()
user = web_utils.check_user(client)
execution_status = client.execution_status(execution_id)
execution = client.execution_get(execution_id)
template_vars = {
"user_id": user.id,
"email": user.email,
'execution': execution_status
'execution': execution
}
return render_template('execution_inspect.html', **template_vars)
body {
font-family: sans-serif;
max-width: 800px;
max-width: 90%;
margin-left: 20px;
}
......@@ -52,4 +52,15 @@ span#status {
div.copyright {
float: left;
padding-left: 3em;
}
textarea#log {
width: 90%;
height: 40em;
}
span.fakelink {
color: rgba(79, 140, 30, 1);
text-decoration: none;
cursor: pointer;
}
\ No newline at end of file
......@@ -2,13 +2,76 @@
{% block title %}Inspect execution {{ execution.name }}{% endblock %}
{% block content %}
<h2>Detailed information for execution {{ execution.name }}</h2>
<div id="contents">
<div id="container_list">
<ul>
{% for c in execution.cluster.containers %}
<li class="containerr_name" id="{{ c.id }}">{{ c.readable_name }}</li>
{% for c in execution.containers %}
<li class="container_name" id="{{ c.id }}"><span onclick="get_container({{ c.id }})" class="fakelink">{{ c.readable_name }}</span></li>
{% endfor %}
</ul>
</div>
{% endfor %}
<div id="stats">
<ul>
<li>Memory used <span id="ram_used">0</span> of <span id="ram_total">0</span></li>
<li>Network: <span id="net_rx">0</span> rx, <span id="net_tx">0</span> tx</li>
<li>Disk I/O: <span id="io_bytes_read">0</span> read, <span id="io_bytes_write">0</span> written</li>
</ul>
</div>
<div id="log_area">
<textarea id="log" disabled></textarea>
</div>
</div>
<script type="application/javascript">
function humanFileSize(bytes) {
var si = true;
var thresh = si ? 1000 : 1024;
if(Math.abs(bytes) < thresh) {
return bytes + ' B';
}
var units = si
? ['kB','MB','GB','TB','PB','EB','ZB','YB']
: ['KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'];
var u = -1;
do {
bytes /= thresh;
++u;
} while(Math.abs(bytes) >= thresh && u < units.length - 1);
return bytes.toFixed(1)+' '+units[u];
}
function get_log(container_id) {
$.getJSON('/api/executions/logs/container/' + container_id)
.done(function( data ) {
$("#log").val(data.log);
}).error(function( data ) {
$("#log").val("error fetching log");
});
}
function get_stats(container_id) {
$.getJSON('/api/executions/stats/container/' + container_id)
.done(function( data ) {
$("#ram_used").text(humanFileSize(data.memory_used));
$("#ram_total").text(humanFileSize(data.memory_total));
$("#net_rx").text(humanFileSize(data.net_bytes_rx));
$("#net_tx").text(humanFileSize(data.net_bytes_tx));
$("#io_bytes_read").text(humanFileSize(data.io_bytes_read));
$("#io_bytes_write").text(humanFileSize(data.io_bytes_write));
}).error(function( data ) {
$("#ram_used").text("N/A");
$("#ram_total").text("N/A");
$("#net_rx").text("N/A");
$("#net_tx").text("N/A");
});
}
function get_container(container_id) {
get_log(container_id);
get_stats(container_id);
}
get_log({{ (execution.containers|first).id }});
get_stats({{ (execution.containers|first).id }});
</script>
{% endblock %}
\ No newline at end of file
......@@ -62,7 +62,7 @@
<td>{{ e[1].name }}</td>
<td>{{ e[1].status }}</td>
<td><script>format_timestamp({{ e[1].time_scheduled.timestamp() }})</script></td>
{% if e[1].time_started == 'None' %}
{% if e[1].time_started == None %}
<td>not yet</td>
{% else %}
<td><script>format_timestamp({{ e[1].time_started.timestamp() }})</script></td>
......@@ -71,6 +71,7 @@
{% if e[1].time_started != 'None' %}
<td><a href="{{ e[2] }}">Open</a></td>
{% endif %}
<td><a href="{{ url_for('web.execution_inspect', execution_id=e[1].id) }}">Inspect</a></td>
<td><a href="{{ url_for('web.execution_terminate', exec_id=e[1].id) }}">Terminate</a></td>
</tr>
{% endfor %}
......
......@@ -125,6 +125,12 @@ def gen_config_cmd(args):
zoeconf.write(open(args.output_file, "w"))
def container_stats_cmd(args):
client = get_zoe_client()
stats = client.container_stats(args.container_id)
print(stats)
def process_arguments() -> Namespace:
global argparser
argparser = ArgumentParser(description="Zoe - Container Analytics as a Service command-line client")
......@@ -204,6 +210,10 @@ def process_arguments() -> Namespace:
argparser_log_get.add_argument('output_file', help="Filename to create with default configuration")
argparser_log_get.set_defaults(func=gen_config_cmd)
argparser_container_stats = subparser.add_parser('container-stats', help="Retrieve statistics on a running container")
argparser_container_stats.add_argument('container_id', help="ID of the container")
argparser_container_stats.set_defaults(func=container_stats_cmd)
return argparser.parse_args()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment