Commit eca85cf3 authored by Daniele Venzano's avatar Daniele Venzano
Browse files

Add a web interface for the AML course

Users are authenticated via a text file, if they have the guest role, they are redirected to a page that starts the AML cluster via AJAX. The AJAX call is performed periodically to refresh the status and restart the cluster if needed.
parent e3a3d6b8
......@@ -15,6 +15,7 @@
"""The real API, exposed as web pages or REST API."""
from datetime import datetime, timedelta
import logging
import re
......@@ -57,7 +58,7 @@ class APIEndpoint:
ret = [e for e in execs if e.user_id == uid or role == 'admin']
return ret
def execution_start(self, uid, role_, exec_name, application_description):
def execution_start(self, uid, role_, exec_name, application_description) -> int:
"""Start an execution."""
try:
zoe_lib.applications.app_validate(application_description)
......@@ -159,9 +160,33 @@ class APIEndpoint:
all_execs = self.sql.execution_list()
for execution in all_execs:
if execution.status == execution.RUNNING_STATUS:
terminated = False
for service in execution.services:
if service.description['monitor'] and service.docker_status == service.DOCKER_DIE_STATUS or service.docker_status == service.DOCKER_DESTROY_STATUS:
log.info("Service {} of execution {} died, terminating execution".format(service.name, execution.id))
self.master.execution_terminate(execution.id)
terminated = True
break
if not terminated and execution.name == "aml-lab":
log.debug('Looking at AML execution {}...'.format(execution.id))
if datetime.now() - execution.time_start > timedelta(hours=get_conf().aml_ttl):
log.info('Terminating AML-LAB execution for user {}, timer expired'.format(execution.user_id))
self.master.execution_terminate(execution.id)
log.debug('Cleanup task finished')
def execution_endpoints(self, uid, role, execution: zoe_lib.sql_manager.Execution):
"""Return a list of the services and public endpoints available for a certain execution."""
services_info = []
endpoints = []
for service in execution.services:
services_info.append(self.service_by_id(uid, role, service.id))
port_mappings = service.ports
for port in service.description['ports']:
if 'expose' in port and port['expose']:
port_number = str(port['port_number']) + "/tcp"
if port_number in port_mappings:
endpoint = port['protocol'] + "://" + port_mappings[port_number][0] + ":" + port_mappings[port_number][1] + port['path']
endpoints.append((port['name'], endpoint))
return services_info, endpoints
......@@ -20,6 +20,7 @@ from typing import List
import tornado.web
import zoe_api.web.start
import zoe_api.web.ajax
import zoe_api.web.executions
from zoe_lib.version import ZOE_API_VERSION, ZOE_VERSION
......@@ -40,7 +41,9 @@ def web_init(api_endpoint) -> List[tornado.web.URLSpec]:
tornado.web.url(r'/executions/restart/([0-9]+)', zoe_api.web.executions.ExecutionRestartWeb, route_args, name='execution_restart'),
tornado.web.url(r'/executions/terminate/([0-9]+)', zoe_api.web.executions.ExecutionTerminateWeb, route_args, name='execution_terminate'),
tornado.web.url(r'/executions/delete/([0-9]+)', zoe_api.web.executions.ExecutionDeleteWeb, route_args, name='execution_delete'),
tornado.web.url(r'/executions/inspect/([0-9]+)', zoe_api.web.executions.ExecutionInspectWeb, route_args, name='execution_inspect')
tornado.web.url(r'/executions/inspect/([0-9]+)', zoe_api.web.executions.ExecutionInspectWeb, route_args, name='execution_inspect'),
tornado.web.url(r'/ajax', zoe_api.web.ajax.AjaxEndpointWeb, route_args, name='ajax')
]
return web_routes
......
# Copyright (c) 2017, Daniele Venzano
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Ajax API for the Zoe web interface."""
import datetime
import json
from tornado.escape import json_decode
from zoe_lib.config import get_conf
import zoe_api.exceptions
from zoe_api.api_endpoint import APIEndpoint # pylint: disable=unused-import
from zoe_api.web.utils import get_auth, catch_exceptions
from zoe_api.web.custom_request_handler import ZoeRequestHandler
class AjaxEndpointWeb(ZoeRequestHandler):
"""Handler class"""
def initialize(self, **kwargs):
"""Initializes the request handler."""
super().initialize(**kwargs)
self.api_endpoint = kwargs['api_endpoint'] # type: APIEndpoint
@catch_exceptions
def post(self):
"""AJAX POST requests."""
uid, role = get_auth(self)
request = json_decode(self.request.body)
if request['type'] == 'start':
app_descr = json.load(open('contrib/zoeapps/eurecom_aml_lab.json', 'r'))
execution = self.api_endpoint.execution_list(uid, role, name='aml-lab')
if len(execution) == 0:
exec_id = self.api_endpoint.execution_start(uid, role, 'aml-lab', app_descr)
pass
else:
execution = execution[0]
exec_id = execution.id
response = {
'status': 'ok',
'execution_id': exec_id
}
elif request['type'] == 'query_status':
try:
execution = self.api_endpoint.execution_by_id(uid, role, request['exec_id'])
except zoe_api.exceptions.ZoeNotFoundException:
response = {
'status': 'ok',
'exec_status': 'none'
}
else:
response = {
'status': 'ok',
'exec_status': execution.status
}
if execution.status == execution.RUNNING_STATUS:
response['ttl'] = ((execution.time_start + datetime.timedelta(hours=get_conf().aml_ttl)) - datetime.datetime.now()).total_seconds()
services_info_, endpoints = self.api_endpoint.execution_endpoints(uid, role, execution)
response['endpoints'] = endpoints
elif execution.status == execution.ERROR_STATUS or execution.status == execution.TERMINATED_STATUS:
self.api_endpoint.execution_delete(uid, role, execution.id)
else:
response = {
'status': 'error',
'message': 'unknown request type'
}
self.write(response)
......@@ -129,17 +129,7 @@ class ExecutionInspectWeb(ZoeRequestHandler):
e = self.api_endpoint.execution_by_id(uid, role, execution_id)
services_info = []
endpoints = []
for service in e.services:
services_info.append(self.api_endpoint.service_by_id(uid, role, service.id))
port_mappings = service.ports
for port in service.description['ports']:
if 'expose' in port and port['expose']:
port_number = str(port['port_number']) + "/tcp"
if port_number in port_mappings:
endpoint = port['protocol'] + "://" + port_mappings[port_number][0] + ":" + port_mappings[port_number][1] + port['path']
endpoints.append((port['name'], endpoint))
services_info, endpoints = self.api_endpoint.execution_endpoints(uid, role, e)
template_vars = {
"e": e,
......
......@@ -48,6 +48,9 @@ class HomeWeb(ZoeRequestHandler):
"""Home page with authentication."""
uid, role = get_auth(self)
if role == 'guest':
return self._aml_homepage(uid)
executions = self.api_endpoint.execution_list(uid, role)
template_vars = {
......@@ -56,14 +59,10 @@ class HomeWeb(ZoeRequestHandler):
}
self.render('home_user.html', **template_vars)
class HomeGuestWeb(ZoeRequestHandler):
"""Handler for guest accesses."""
@catch_exceptions
def get(self):
"""Home page with demo/code access."""
def _aml_homepage(self, uid):
"""Home page for students of the AML course."""
template_vars = {
'kind': 'normal'
'uid': uid
}
return self.render('home_guest.html', **template_vars)
......
This diff is collapsed.
......@@ -62,84 +62,84 @@ span.fakelink {
}
#wrapper {
width: 800px;
width: 800px;
}
#navigation {
background-color: #fff;
border: #ddd 1px solid;
border-radius: 10px;
margin: 10px;
padding: 10px;
background-color: #fff;
border: #ddd 1px solid;
border-radius: 10px;
margin: 10px;
padding: 10px;
}
#navigation li {
margin: 2px 0;
margin: 2px 0;
}
label.error {
color: #ff0000;
margin-left: 10px;
position: relative;
color: #ff0000;
margin-left: 10px;
position: relative;
}
.wizard {
background-color: #fff;
border: #ddd 1px solid;
border-radius: 10px;
margin: 10px;
padding: 10px;
background-color: #fff;
border: #ddd 1px solid;
border-radius: 10px;
margin: 10px;
padding: 10px;
}
.wizard .wizard-header {
background-color: #f4f4f4;
border-bottom: #ddd 1px solid;
border-top-left-radius: 10px;
border-top-right-radius: 10px;
padding: 5px 10px;
margin: 0 0 10px 0;
background-color: #f4f4f4;
border-bottom: #ddd 1px solid;
border-top-left-radius: 10px;
border-top-right-radius: 10px;
padding: 5px 10px;
margin: 0 0 10px 0;
}
.wizard .wizard-step {
margin: 10px 0;
margin: 10px 0;
}
.wizard .wizard-step p {
padding: 5px;
padding: 5px;
}
.navigation {
border-top: #ddd 1px solid;
margin-top: 10px;
padding-top: 10px;
border-top: #ddd 1px solid;
margin-top: 10px;
padding-top: 10px;
}
.navigation ul {
margin: 0;
padding: 0;
list-style: none;
margin: 0;
padding: 0;
list-style: none;
}
.navigation li {
float: left;
margin-right: 10px;
float: left;
margin-right: 10px;
}
.clearfix:before, .clearfix:after {
content: "\0020";
display: block;
height: 0;
visibility: hidden;
content: "\0020";
display: block;
height: 0;
visibility: hidden;
}
.clearfix:after {
clear: both;
clear: both;
}
input {
margin-top: 5px;
margin-top: 5px;
}
section {
padding-bottom: 10px;
}
\ No newline at end of file
}
......@@ -3,44 +3,143 @@
{% block title %}Home{% endblock %}
{% block custom_head %}
{% if refresh > 0 %}
<meta http-equiv="refresh" content="{{ refresh }}">
{% endif %}
<script type="application/javascript">
const AJAX_URL = "{{ reverse_url('ajax') }}";
const SLOW_UPDATE = 60000;
const FAST_UPDATE = 1000;
let update_interval = null;
function ajax(data, success_cb) {
$.ajax({
url: AJAX_URL,
type: 'POST',
data: JSON.stringify(data),
contentType: 'application/json; charset=utf-8',
dataType: 'json',
async: true,
success: success_cb,
error: function () {
show_error('AJAX communication error, the operation will be retried');
}
});
}
function show_error(msg) {
let error_box = $("#ajax-error");
error_box.text("Error: " + msg);
error_box.show();
}
let state = "init";
let execution_id = -1;
function state_machine() {
if (state == "init") {
clearInterval(update_interval);
update_interval = setInterval(function(){update_zoe_status();}, FAST_UPDATE);
$("#state-init").show();
$("#state-starting").hide();
$("#state-started").hide();
ajax({'type': 'start'},
function (data) {
if (data['status'] == 'ok') {
$("#ajax-error").hide();
state = "starting";
execution_id = data['execution_id'];
} else {
show_error(data.message);
}
},
function () {
show_error('AJAX communication error, the operation will be retried');
}
);
} else if (state == "starting") {
$("#state-init").hide();
$("#state-starting").show();
$("#state-started").hide();
} else if (state == "started") {
clearInterval(update_interval);
update_interval = setInterval(function(){update_zoe_status();}, SLOW_UPDATE);
$("#state-init").hide();
$("#state-starting").hide();
$("#state-started").show();
}
}
function update_zoe_status() {
if (execution_id < 0) {
$("#zoe-status").text('off');
state = "init";
} else {
ajax({'type': 'query_status', 'exec_id': execution_id},
function (data) {
if (data['status'] == 'ok') {
$("#ajax-error").hide();
$("#zoe-status").text(data['exec_status']);
if (data['exec_status'] == 'running') {
state = "started";
$('#time_remaining').text(moment.duration(data['ttl'] * 1000).humanize());
let s = "";
for (let ep of data['endpoints']) {
s += "<li><a href=\"" + ep[1] + "\">" + ep[0] + "</a></li>\n";
}
$("#endpoints").html(s);
} else if (data['exec_status'] == 'terminated' || data['exec_status'] == 'none' || data['exec_status'] == 'error') {
state = "init";
}
} else {
show_error(data.message);
}
}
);
}
state_machine();
}
state_machine();
</script>
<style>
body {
width: 80%;
}
.state-box {
border: 1px solid black;
margin-top: 2em;
margin-bottom: 2em;
width: 40%;
padding-left: 10px;
padding-right: 10px;
}
</style>
{% endblock %}
{% block content %}
<h2>Guest cluster access page</h2>
<h2>Algorithmic Machine Learning cluster management</h2>
{% if request.remote_addr != gateway_ip %}
<p>You are logged in as {{ uid }}.</p>
<p>To access your Jupyter Notebook and your Spark cluster, you need to open a browser with a socks proxy configured. To do that, use the following commands.</p>
<p>Through this page you will be able to access the Jupyter notebook web interface, which you will use to upload and work on the notebooks provided on the <a href="https://github.com/DistributedSystemsGroup/Algorithmic-Machine-Learning">Algorithmic Machine Learning course GitHub page</a>.</p>
<p>On <b>Linux</b> copy and paste this command in a terminal window:</p>
<pre>google-chrome --proxy-server={{ user_gateway }} --user-data-dir=/tmp/chrome-zoe-$USER {{ request.url }}</pre>
<p>The work environment contains also an Apache Spark cluster and is created dynamically when you first access this page. After a fixed amount of time, the resources are freed and the Notebook and Spark are terminated. The files you saved in your workspace will be available for your next session.</p>
<p>On <b>Windows</b> copy and paste this command in a cmd window (Start & cmd):</p>
<pre>cd +chrome install directory+
chrome.exe --proxy-server={{ user_gateway }} --user-data-dir=%TEMP%\chrome-zoe {{ request.url }}</pre>
<span style="color: darkred; display: none;" id="ajax-error">AJAX communication error, retrying...</span>
{% else %}
<div class="state-box">
<div id="state-init">
<p>Checking your cluster status...</p>
</div>
{% if execution_urls|length == 0 %}
<p><b>Please wait</b>, this page will refresh automatically.<br>
When this message disappears your cluster will be ready to use.</p>
{% endif %}
<div id="state-starting" style="display: none;">
<p>Please wait, your cluster is <span id="zoe-status">...</span></p>
</div>
{% if execution_status == "running" and execution_urls|length > 0 %}
<ul>
{% for url in execution_urls|sort(0) %}
{% if url[0] == "Jupyter Notebook interface" %}
<li><span style="font-size: larger; padding-bottom: 10px; display: inline-block"><a href="{{ url[1] }}">{{ url[0] }}</a> &larr; Start here</span></li>
{% else %}
<li><span style="font-size: smaller"><a href="{{ url[1] }}">{{ url[0] }}</a></span></li>
{% endif %}
{% endfor %}
</ul>
{% endif %}
<div id="state-started" style="display: none;">
<p>Your cluster is running, it will be destroyed in about <span id="time_remaining"></span></p>
<ul id="endpoints"></ul>
</div>
</div>
<p>Useful resources:</p>
<ul>
......@@ -48,5 +147,4 @@ chrome.exe --proxy-server={{ user_gateway }} --user-data-dir=%TEMP%\chrome-zoe {
<li><a href="https://spark.apache.org/docs/1.5.2/api/python/index.html">Spark Python API</a></li>
</ul>
{% endif %}
{% endblock %}
......@@ -5,7 +5,6 @@
<p>Welcome to Zoe</p>
<ul>
<li><a href="{{ reverse_url('home_user') }}">Guest start page</a></li>
<li><a href="{{ reverse_url('home_user') }}">User start page</a></li>
</ul>
......
......@@ -70,9 +70,14 @@ def get_auth(handler: ZoeRequestHandler):
auth_decoded = base64.decodebytes(bytes(auth_header[6:], 'ascii')).decode('utf-8')
username, password = auth_decoded.split(':', 2)
if get_conf().auth_type == 'text':
authenticator = PlainTextAuthenticator() # type: BaseAuthenticator
elif get_conf().auth_type == 'ldap':
# First of all try to authenticate against a fixed list of users in a text file
authenticator = PlainTextAuthenticator() # type: BaseAuthenticator
uid, role = authenticator.auth(username, password)
if uid is not None:
return uid, role
# It it fails, continue with the normal authentication
if get_conf().auth_type == 'ldap':
authenticator = LDAPAuthenticator()
else:
raise zoe_api.exceptions.ZoeException('Configuration error, unknown authentication method: {}'.format(get_conf().auth_type))
......
......@@ -92,6 +92,8 @@ def load_configuration(test_conf=None):
argparser.add_argument('--docker-tls-key', help='Docker TLS private key file', default='key.pem')
argparser.add_argument('--docker-tls-ca', help='Docker TLS CA certificate file', default='ca.pem')
argparser.add_argument('--aml-ttl', help='TimeToLive in hours for AML executions', type=int, default=4)
opts = argparser.parse_args()
if opts.debug:
argparser.print_values()
......
......@@ -137,7 +137,7 @@ class SwarmClient:
manager = url
else:
raise ZoeLibException('Unsupported URL scheme for Swarm')
log.debug('Connecting to Swarm at {}'.format(manager))
# log.debug('Connecting to Swarm at {}'.format(manager))
self.cli = docker.Client(base_url=manager, version="auto", tls=tls)
def info(self) -> SwarmStats:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment