...
 
Commits (19)
variables:
# Configure postgres service (https://hub.docker.com/_/postgres/)
POSTGRES_DB: zoe
POSTGRES_USER: zoeuser
POSTGRES_PASSWORD: zoepass
static_test:
image: python:3.4
script:
- pip install -r requirements.txt
- pip install -r requirements_tests.txt
- chmod 755 run_tests.sh && ./run_tests.sh
SonarQube:
image: zoerepo/sonar-scanner
variables:
SONAR_PROJECT_KEY: "zoe.$CI_BUILD_REF_NAME"
SONAR_BRANCH: "$CI_BUILD_REF_NAME"
SONAR_PROJECT_NAME: "$CI_PROJECT_NAME"
SONAR_PROJECT_VERSION: "$CI_BUILD_REF"
SONARQUBE_SERVER_URL: "$SONARQUBE_SERVER_URL"
script:
- /usr/bin/sonar-scanner-run.sh
#!groovy
node('docker-trusty') {
stage('Cleanup') {
step([$class: 'WsCleanup'])
}
stage('Checkout') {
checkout scm
sshagent(['67aa0a6c-64d5-40bd-ae9f-4ff66de97411']) {
sh "git checkout ${BRANCH_NAME}"
}
}
stage('Install prerequisites') {
sh "virtualenv venv"
sh """
. venv/bin/activate
pip install -r requirements.txt
"""
sh """
. venv/bin/activate
pip install -r requirements_tests.txt
"""
}
stage('Test') {
try {
sh """
. venv/bin/activate
bash ./run_tests.sh
"""
currentBuild.result = 'SUCCESS'
} catch (Exception err) {
currentBuild.result = 'FAILURE'
}
}
stage('Push') {
try {
if (currentBuild.result == 'SUCCESS') {
echo "Test successful, pushing to github"
sshagent(['67aa0a6c-64d5-40bd-ae9f-4ff66de97411']) {
sh "git remote add github git@github.com:DistributedSystemsGroup/zoe.git"
sh "git branch -a"
sh "git push github HEAD"
sh "git push github --tags"
}
} else {
echo "Build failed, no push"
}
} catch (Exception err) {
currentBuild.result = 'FAILURE'
}
}
stage('Notification and cleanup') {
echo "Sending notifications"
step([$class: 'Mailer', notifyEveryUnstableBuild: true, recipients: 'daniele.venzano@eurecom.fr', sendToIndividuals: true])
step([$class: 'GitHubCommitStatusSetter', errorHandlers: [[$class: 'ChangingBuildStatusErrorHandler', result: 'FAILURE']], reposSource: [$class: 'ManuallyEnteredRepositorySource', url: 'https://github.com/DistributedSystemsGroup/zoe.git']])
}
}
Zoe - Container-based Analytics as a Service
============================================
Note: this branch contains Zoe 0.10.2 with changes related to the AML course for the spring semester.
Zoe provides a simple way to provision data analytics applications using Docker Swarm.
Resources:
......
# This Dockerfile is used to build an image for testing Zoe with Jenkins and the Docker slave plugin
FROM ubuntu:trusty
MAINTAINER Daniele Venzano <venza@brownhat.org>
RUN locale-gen en_US.UTF-8 && \
apt-get -q update && \
DEBIAN_FRONTEND="noninteractive" apt-get -q upgrade -y --no-install-recommends && \
DEBIAN_FRONTEND="noninteractive" apt-get -q install -y --no-install-recommends openssh-server openjdk-7-jre-headless python3 python3-pip git python3-dev build-essential libpq-dev && \
apt-get -q autoremove && \
apt-get -q clean -y && rm -rf /var/lib/apt/lists/* && rm -f /var/cache/apt/*.bin && \
sed -i 's|session required pam_loginuid.so|session optional pam_loginuid.so|g' /etc/pam.d/sshd && \
mkdir -p /var/run/sshd
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8
# Set user jenkins to the image
RUN useradd -m -d /home/jenkins -s /bin/sh jenkins && \
echo "jenkins:jenkins" | chpasswd
RUN pip3 install --upgrade pip virtualenv
RUN printf "\nStrictHostKeyChecking no\n" >> /etc/ssh/ssh_config
# Standard SSH port
EXPOSE 22
# Default command
CMD ["/usr/sbin/sshd", "-D"]
FROM java:alpine
ENV SONAR_SCANNER_VERSION 2.8
RUN apk add --no-cache wget && \
wget https://sonarsource.bintray.com/Distribution/sonar-scanner-cli/sonar-scanner-${SONAR_SCANNER_VERSION}.zip && \
unzip sonar-scanner-${SONAR_SCANNER_VERSION} && \
cd /usr/bin && ln -s /sonar-scanner-${SONAR_SCANNER_VERSION}/bin/sonar-scanner sonar-scanner && \
apk del wget
RUN apk add --no-cache python3 curl && rm -f /var/cache/apk/*
RUN curl https://bootstrap.pypa.io/get-pip.py | python3
RUN pip install pylint
COPY sonar-scanner-run.sh /usr/bin
RUN chmod 755 /usr/bin/sonar-scanner-run.sh
#!/bin/sh
if [ -z "${SONAR_PROJECT_KEY}" ]; then
echo "Undefined \"projectKey\"" && exit 1
else
COMMAND="sonar-scanner -Dsonar.sourceEncoding=UTF-8 -Dsonar.sources=. -Dsonar.exclusions=\"zoe_api/web/static/**\" -Dsonar.host.url=\"$SONARQUBE_SERVER_URL\" -Dsonar.login=\"$SONARQUBE_USER\" -Dsonar.password=\"$SONARQUBE_PASSWORD\" -Dsonar.projectKey=\"${SONAR_PROJECT_KEY}\""
if [ ! -z "${SONAR_PROJECT_VERSION}" ]; then
COMMAND="$COMMAND -Dsonar.projectVersion=\"${SONAR_PROJECT_VERSION}\""
fi
if [ ! -z "${SONAR_PROJECT_NAME}" ]; then
COMMAND="$COMMAND -Dsonar.projectName=\"${SONAR_PROJECT_NAME}\""
fi
if [ ! -z ${CI_BUILD_REF} ]; then
COMMAND="$COMMAND -Dsonar.gitlab.commit_sha=\"${CI_BUILD_REF}\""
fi
if [ ! -z ${CI_BUILD_REF_NAME} ]; then
COMMAND="$COMMAND -Dsonar.gitlab.ref_name=\"${CI_BUILD_REF_NAME}\""
fi
if [ ! -z ${SONAR_BRANCH} ]; then
COMMAND="$COMMAND -Dsonar.branch=\"${SONAR_BRANCH}\""
fi
eval ${COMMAND}
fi
{
"name": "aml-lab",
"priority": 512,
"requires_binary": false,
"requires_binary": true,
"services": [
{
"docker_image": "192.168.45.252:5000/zoerepo/spark-master",
"docker_image": "docker-registry:5000/zapps/spark2-master",
"environment": [
[
"SPARK_MASTER_IP",
"spark-master-{execution_name}-{user_name}-{deployment_name}-zoe.{user_name}-{deployment_name}-zoe"
"{dns_name#self}"
],
[
"HADOOP_USER_NAME",
"{user_name}"
],
[
"PYTHONHASHSEED",
"42"
]
],
"essential_count": 1,
"monitor": false,
"name": "spark-master",
"networks": [],
"ports": [
{
"is_main_endpoint": false,
"expose": true,
"is_main_endpoint": true,
"name": "Spark master web interface",
"path": "/",
"port_number": 8080,
......@@ -24,11 +34,13 @@
}
],
"required_resources": {
"memory": 536870912
}
"memory": 2147483648
},
"startup_order": 0,
"total_count": 1
},
{
"docker_image": "192.168.45.252:5000/zoerepo/spark-worker",
"docker_image": "docker-registry:5000/zapps/spark2-worker",
"environment": [
[
"SPARK_WORKER_CORES",
......@@ -36,19 +48,28 @@
],
[
"SPARK_WORKER_RAM",
"11273240064"
"16106127360"
],
[
"SPARK_MASTER_IP",
"spark-master-{execution_name}-{user_name}-{deployment_name}-zoe.{user_name}-{deployment_name}-zoe"
"{dns_name#spark-master0}"
],
[
"SPARK_LOCAL_IP",
"spark-worker0-{execution_name}-{user_name}-{deployment_name}-zoe.{user_name}-{deployment_name}-zoe"
"{dns_name#self}"
],
[
"PYTHONHASHSEED",
"42"
],
[
"HADOOP_USER_NAME",
"{user_name}"
]
],
"essential_count": 1,
"monitor": false,
"name": "spark-worker0",
"name": "spark-worker",
"networks": [],
"ports": [
{
......@@ -60,33 +81,44 @@
}
],
"required_resources": {
"memory": 12884901888
}
"memory": 17179869184
},
"startup_order": 1,
"total_count": 2
},
{
"docker_image": "192.168.45.252:5000/zoerepo/spark-jupyter-notebook",
"docker_image": "docker-registry:5000/zapps/spark2-jupyter-notebook",
"environment": [
[
"SPARK_MASTER",
"spark://spark-master-{execution_name}-{user_name}-{deployment_name}-zoe.{user_name}-{deployment_name}-zoe:7077"
"spark://{dns_name#spark-master0}:7077"
],
[
"SPARK_EXECUTOR_RAM",
"11273240064"
"16106127360"
],
[
"SPARK_DRIVER_RAM",
"2147483648"
"7516192768"
],
[
"HADOOP_USER_NAME",
"{user_name}"
],
[
"NB_USER",
"{user_name}"
],
[
"PYTHONHASHSEED",
"42"
],
[
"NAMENODE_HOST",
"hdfs-namenode.hdfs"
"hdfs-namenode.zoe"
]
],
"essential_count": 1,
"monitor": true,
"name": "spark-jupyter",
"networks": [],
......@@ -99,6 +131,7 @@
"protocol": "http"
},
{
"expose": true,
"is_main_endpoint": true,
"name": "Jupyter Notebook interface",
"path": "/",
......@@ -107,46 +140,13 @@
}
],
"required_resources": {
"memory": 4294967296
}
},
{
"docker_image": "192.168.45.252:5000/zoerepo/spark-worker",
"environment": [
[
"SPARK_WORKER_CORES",
"6"
],
[
"SPARK_WORKER_RAM",
"11273240064"
],
[
"SPARK_MASTER_IP",
"spark-master-{execution_name}-{user_name}-{deployment_name}-zoe.{user_name}-{deployment_name}-zoe"
],
[
"SPARK_LOCAL_IP",
"spark-worker1-{execution_name}-{user_name}-{deployment_name}-zoe.{user_name}-{deployment_name}-zoe"
]
],
"monitor": false,
"name": "spark-worker1",
"networks": [],
"ports": [
{
"is_main_endpoint": false,
"name": "Spark worker web interface",
"path": "/",
"port_number": 8081,
"protocol": "http"
}
],
"required_resources": {
"memory": 12884901888
}
"memory": 8589934592
},
"startup_order": 0,
"total_count": 1
}
],
"version": 1,
"version": 2,
"will_end": false
}
{
"name": "aml-lab-safran",
"priority": 512,
"requires_binary": true,
"services": [
{
"docker_image": "docker-registry:5000/zapps/jupyter-notebook:2609",
"environment": [
[
"NB_USER",
"{user_name}"
]
],
"essential_count": 1,
"monitor": true,
"name": "jupyter-notebook",
"networks": [],
"ports": [
{
"expose": true,
"is_main_endpoint": true,
"name": "Jupyter Notebook interface",
"path": "/",
"port_number": 8888,
"protocol": "http"
}
],
"required_resources": {
"memory": 8589934592
},
"startup_order": 0,
"total_count": 1,
"volumes": [
["/mnt/cephfs/docker-volumes/safran_data", "/mnt/safran", true]
]
}
],
"version": 2,
"will_end": false
}
......@@ -15,6 +15,7 @@
"""The real API, exposed as web pages or REST API."""
from datetime import datetime, timedelta
import logging
import re
......@@ -57,7 +58,7 @@ class APIEndpoint:
ret = [e for e in execs if e.user_id == uid or role == 'admin']
return ret
def execution_start(self, uid, role_, exec_name, application_description):
def execution_start(self, uid, role_, exec_name, application_description) -> int:
"""Start an execution."""
try:
zoe_lib.applications.app_validate(application_description)
......@@ -159,9 +160,33 @@ class APIEndpoint:
all_execs = self.sql.execution_list()
for execution in all_execs:
if execution.status == execution.RUNNING_STATUS:
terminated = False
for service in execution.services:
if service.description['monitor'] and service.docker_status == service.DOCKER_DIE_STATUS or service.docker_status == service.DOCKER_DESTROY_STATUS:
log.info("Service {} of execution {} died, terminating execution".format(service.name, execution.id))
self.master.execution_terminate(execution.id)
terminated = True
break
if not terminated and execution.name == "aml-lab":
log.debug('Looking at AML execution {}...'.format(execution.id))
if datetime.now() - execution.time_start > timedelta(hours=get_conf().aml_ttl):
log.info('Terminating AML-LAB execution for user {}, timer expired'.format(execution.user_id))
self.master.execution_terminate(execution.id)
log.debug('Cleanup task finished')
def execution_endpoints(self, uid: str, role: str, execution: zoe_lib.sql_manager.Execution):
"""Return a list of the services and public endpoints available for a certain execution."""
services_info = []
endpoints = []
for service in execution.services:
services_info.append(self.service_by_id(uid, role, service.id))
port_mappings = service.ports
for port in service.description['ports']:
if 'expose' in port and port['expose']:
port_number = str(port['port_number']) + "/tcp"
if port_number in port_mappings:
endpoint = port['protocol'] + "://" + port_mappings[port_number][0] + ":" + port_mappings[port_number][1] + port['path']
endpoints.append((port['name'], endpoint))
return services_info, endpoints
......@@ -57,7 +57,7 @@ def zoe_web_main() -> int:
app_settings = {
'static_path': os.path.join(os.path.dirname(__file__), "web", "static"),
'template_path': os.path.join(os.path.dirname(__file__), "web", "templates"),
# 'debug': args.debug
'debug': args.debug
}
app = Application(zoe_api.web.web_init(api_endpoint) + zoe_api.rest_api.api_init(api_endpoint), **app_settings)
JinjaApp.init_app(app)
......
......@@ -19,7 +19,7 @@ from typing import List
import tornado.web
from zoe_api.rest_api.execution import ExecutionAPI, ExecutionCollectionAPI, ExecutionDeleteAPI
from zoe_api.rest_api.execution import ExecutionAPI, ExecutionCollectionAPI, ExecutionDeleteAPI, ExecutionEndpointsAPI
from zoe_api.rest_api.info import InfoAPI
from zoe_api.rest_api.service import ServiceAPI, ServiceLogsAPI
from zoe_api.rest_api.discovery import DiscoveryAPI
......@@ -41,6 +41,7 @@ def api_init(api_endpoint) -> List[tornado.web.URLSpec]:
tornado.web.url(API_PATH + r'/execution/([0-9]+)', ExecutionAPI, route_args),
tornado.web.url(API_PATH + r'/execution/delete/([0-9]+)', ExecutionDeleteAPI, route_args),
tornado.web.url(API_PATH + r'/execution/endpoints/([0-9]+)', ExecutionEndpointsAPI, route_args),
tornado.web.url(API_PATH + r'/execution', ExecutionCollectionAPI, route_args),
tornado.web.url(API_PATH + r'/service/([0-9]+)', ServiceAPI, route_args),
......
......@@ -130,3 +130,29 @@ class ExecutionCollectionAPI(RequestHandler):
def data_received(self, chunk):
"""Not implemented as we do not use stream uploads"""
pass
class ExecutionEndpointsAPI(RequestHandler):
"""The ExecutionEndpoints API endpoint."""
def initialize(self, **kwargs):
"""Initializes the request handler."""
self.api_endpoint = kwargs['api_endpoint'] # type: APIEndpoint
@catch_exceptions
def get(self, execution_id: int):
"""
Get a list of execution endpoints.
:param execution_id: the execution to be deleted
"""
uid, role = get_auth(self)
execution = self.api_endpoint.execution_by_id(uid, role, execution_id)
services_, endpoints = self.api_endpoint.execution_endpoints(uid, role, execution)
self.write({'endpoints': endpoints})
def data_received(self, chunk):
"""Not implemented as we do not use stream uploads"""
pass
......@@ -20,18 +20,15 @@ from typing import List
import tornado.web
import zoe_api.web.start
import zoe_api.web.ajax
import zoe_api.web.executions
from zoe_lib.version import ZOE_API_VERSION, ZOE_VERSION
def web_init(api_endpoint) -> List[tornado.web.URLSpec]:
"""Flask init for the web interface."""
# def before_request():
# """Use the Flask global to hold the api endpoint reference."""
# g.api_endpoint = api_endpoint
"""Tornado init for the web interface."""
# web_bp = Blueprint('web', __name__, template_folder='templates', static_folder='static')
route_args = {
'api_endpoint': api_endpoint
}
......@@ -44,7 +41,9 @@ def web_init(api_endpoint) -> List[tornado.web.URLSpec]:
tornado.web.url(r'/executions/restart/([0-9]+)', zoe_api.web.executions.ExecutionRestartWeb, route_args, name='execution_restart'),
tornado.web.url(r'/executions/terminate/([0-9]+)', zoe_api.web.executions.ExecutionTerminateWeb, route_args, name='execution_terminate'),
tornado.web.url(r'/executions/delete/([0-9]+)', zoe_api.web.executions.ExecutionDeleteWeb, route_args, name='execution_delete'),
tornado.web.url(r'/executions/inspect/([0-9]+)', zoe_api.web.executions.ExecutionInspectWeb, route_args, name='execution_inspect')
tornado.web.url(r'/executions/inspect/([0-9]+)', zoe_api.web.executions.ExecutionInspectWeb, route_args, name='execution_inspect'),
tornado.web.url(r'/ajax', zoe_api.web.ajax.AjaxEndpointWeb, route_args, name='ajax')
]
return web_routes
......
# Copyright (c) 2017, Daniele Venzano
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Ajax API for the Zoe web interface."""
import datetime
import json
from tornado.escape import json_decode
from zoe_lib.config import get_conf
import zoe_api.exceptions
from zoe_api.api_endpoint import APIEndpoint # pylint: disable=unused-import
from zoe_api.web.utils import get_auth, catch_exceptions
from zoe_api.web.custom_request_handler import ZoeRequestHandler
class AjaxEndpointWeb(ZoeRequestHandler):
"""Handler class"""
def initialize(self, **kwargs):
"""Initializes the request handler."""
super().initialize(**kwargs)
self.api_endpoint = kwargs['api_endpoint'] # type: APIEndpoint
@catch_exceptions
def post(self):
"""AJAX POST requests."""
uid, role = get_auth(self)
request = json_decode(self.request.body)
if request['type'] == 'start':
app_descr = json.load(open('contrib/zoeapps/eurecom_aml_lab.json', 'r'))
execution = self.api_endpoint.execution_list(uid, role, name='aml-lab')
if len(execution) == 0:
exec_id = self.api_endpoint.execution_start(uid, role, 'aml-lab', app_descr)
pass
else:
execution = execution[0]
exec_id = execution.id
response = {
'status': 'ok',
'execution_id': exec_id
}
elif request['type'] == 'query_status':
try:
execution = self.api_endpoint.execution_by_id(uid, role, request['exec_id'])
except zoe_api.exceptions.ZoeNotFoundException:
response = {
'status': 'ok',
'exec_status': 'none'
}
else:
response = {
'status': 'ok',
'exec_status': execution.status
}
if execution.status == execution.RUNNING_STATUS:
response['ttl'] = ((execution.time_start + datetime.timedelta(hours=get_conf().aml_ttl)) - datetime.datetime.now()).total_seconds()
services_info_, endpoints = self.api_endpoint.execution_endpoints(uid, role, execution)
response['endpoints'] = endpoints
elif execution.status == execution.ERROR_STATUS or execution.status == execution.TERMINATED_STATUS:
self.api_endpoint.execution_delete(uid, role, execution.id)
else:
response = {
'status': 'error',
'message': 'unknown request type'
}
self.write(response)
......@@ -21,6 +21,7 @@
import json
import datetime
import logging
from jinja2 import Environment, FileSystemLoader, Markup
......@@ -28,9 +29,10 @@ from tornado.escape import squeeze, linkify, url_escape, xhtml_escape
import tornado.web
import zoe_lib.version
import zoe_api.web.utils
log = logging.getLogger(__name__)
class JinjaApp(object):
"""A Jinja2-capable Tornado application."""
......@@ -122,6 +124,7 @@ class ZoeRequestHandler(tornado.web.RequestHandler):
try:
html = self._render(template, **kwargs)
except Exception:
log.exception('Jinja2 template exception')
zoe_api.web.utils.error_page(self, 'Jinja2 template exception', 500)
return
self.finish(html)
......
......@@ -129,12 +129,11 @@ class ExecutionInspectWeb(ZoeRequestHandler):
e = self.api_endpoint.execution_by_id(uid, role, execution_id)
services_info = []
for service in e.services:
services_info.append(self.api_endpoint.service_by_id(uid, role, service.id))
services_info, endpoints = self.api_endpoint.execution_endpoints(uid, role, e)
template_vars = {
"e": e,
"endpoints": endpoints,
"services_info": services_info
}
self.render('execution_inspect.html', **template_vars)
......@@ -48,38 +48,41 @@ class HomeWeb(ZoeRequestHandler):
"""Home page with authentication."""
uid, role = get_auth(self)
if role == 'user' or role == 'admin':
executions = self.api_endpoint.execution_list(uid, role)
if role == 'guest':
return self._aml_homepage(uid)
template_vars = {
'executions': sorted(executions, key=lambda e: e.id),
'is_admin': role == 'admin',
}
self.render('home_user.html', **template_vars)
else:
template_vars = {
'refresh': randint(2, 8),
'execution_status': 'Please wait...',
'execution_urls': [],
}
executions = self.api_endpoint.execution_list(uid, role)
template_vars = {
'executions': sorted(executions, key=lambda e: e.id),
'is_admin': role == 'admin',
}
self.render('home_user.html', **template_vars)
def _aml_homepage(self, uid):
"""Home page for students of the AML course."""
template_vars = {
'uid': uid
}
return self.render('home_guest.html', **template_vars)
app_descr = json.load(open('contrib/zoeapps/eurecom_aml_lab.json', 'r'))
execution = self.api_endpoint.execution_list(uid, role, name='aml-lab')
if len(execution) == 0 or execution[0]['status'] == 'terminated' or execution[0]['status'] == 'finished':
self.api_endpoint.execution_start(uid, role, 'aml-lab', app_descr)
template_vars['execution_status'] = 'submitted'
app_descr = json.load(open('contrib/zoeapps/eurecom_aml_lab.json', 'r'))
execution = self.api_endpoint.execution_list(uid, role, name='aml-lab')
if len(execution) == 0 or execution[0]['status'] == 'terminated' or execution[0]['status'] == 'finished':
self.api_endpoint.execution_start(uid, role, 'aml-lab', app_descr)
template_vars['execution_status'] = 'submitted'
return self.render('home_guest.html', **template_vars)
else:
execution = execution[0]
if execution['status'] != 'running':
template_vars['execution_status'] = execution['status']
return self.render('home_guest.html', **template_vars)
else:
execution = execution[0]
if execution['status'] != 'running':
template_vars['execution_status'] = execution['status']
return self.render('home_guest.html', **template_vars)
else:
template_vars['refresh'] = -1
template_vars['execution_status'] = execution['status']
# for c_id in execution['services']:
# c = cont_api.get(c_id)
# ip = list(c['ip_address'].values())[0] # FIXME how to decide which network is the right one?
# for p in c['ports']:
# template_vars['execution_urls'].append(('{}'.format(p['name']), '{}://{}:{}{}'.format(p['protocol'], ip, p['port_number'], p['path'])))
return self.render('home_guest.html', **template_vars)
template_vars['refresh'] = -1
template_vars['execution_status'] = execution['status']
# for c_id in execution['services']:
# c = cont_api.get(c_id)
# ip = list(c['ip_address'].values())[0] # FIXME how to decide which network is the right one?
# for p in c['ports']:
# template_vars['execution_urls'].append(('{}'.format(p['name']), '{}://{}:{}{}'.format(p['protocol'], ip, p['port_number'], p['path'])))
return self.render('home_guest.html', **template_vars)
This diff is collapsed.
......@@ -62,84 +62,84 @@ span.fakelink {
}
#wrapper {
width: 800px;
width: 800px;
}
#navigation {
background-color: #fff;
border: #ddd 1px solid;
border-radius: 10px;
margin: 10px;
padding: 10px;
background-color: #fff;
border: #ddd 1px solid;
border-radius: 10px;
margin: 10px;
padding: 10px;
}
#navigation li {
margin: 2px 0;
margin: 2px 0;
}
label.error {
color: #ff0000;
margin-left: 10px;
position: relative;
color: #ff0000;
margin-left: 10px;
position: relative;
}
.wizard {
background-color: #fff;
border: #ddd 1px solid;
border-radius: 10px;
margin: 10px;
padding: 10px;
background-color: #fff;
border: #ddd 1px solid;
border-radius: 10px;
margin: 10px;
padding: 10px;
}
.wizard .wizard-header {
background-color: #f4f4f4;
border-bottom: #ddd 1px solid;
border-top-left-radius: 10px;
border-top-right-radius: 10px;
padding: 5px 10px;
margin: 0 0 10px 0;
background-color: #f4f4f4;
border-bottom: #ddd 1px solid;
border-top-left-radius: 10px;
border-top-right-radius: 10px;
padding: 5px 10px;
margin: 0 0 10px 0;
}
.wizard .wizard-step {
margin: 10px 0;
margin: 10px 0;
}
.wizard .wizard-step p {
padding: 5px;
padding: 5px;
}
.navigation {
border-top: #ddd 1px solid;
margin-top: 10px;
padding-top: 10px;
border-top: #ddd 1px solid;
margin-top: 10px;
padding-top: 10px;
}
.navigation ul {
margin: 0;
padding: 0;
list-style: none;
margin: 0;
padding: 0;
list-style: none;
}
.navigation li {
float: left;
margin-right: 10px;
float: left;
margin-right: 10px;
}
.clearfix:before, .clearfix:after {
content: "\0020";
display: block;
height: 0;
visibility: hidden;
content: "\0020";
display: block;
height: 0;
visibility: hidden;
}
.clearfix:after {
clear: both;
clear: both;
}
input {
margin-top: 5px;
margin-top: 5px;
}
section {
padding-bottom: 10px;
}
\ No newline at end of file
}
......@@ -23,6 +23,17 @@
<p>Error message: <code>{{ e.error_message }}</code></p>
{% endif %}
<div id="endpoints">
{% if endpoints|length > 0 %}
<h3>Endpoints:</h3>
{% endif %}
<ul>
{% for e in endpoints %}
<li><a href="{{ e[1] }}">{{ e[0] }}</a></li>
{% endfor %}
</ul>
</div>
<div id="container_list">
{% if services_info|length > 0 %}
<h3>Services:</h3>
......@@ -36,11 +47,6 @@
{% if s['error_message'] is not none %}
<li>Error: {{ s['error_message'] }}</li>
{% endif %}
{% if s['docker_status'] == 'started' %}
{% for p in s['description']['ports'] %}
<li><a href="{{ p['protocol'] }}://{{ s['ip_address'] }}:{{ p['port_number'] }}{{ p['path'] }}">{{ p['name'] }}</a></li>
{% endfor %}
{% endif %}
</ul>
{% endfor %}
</ul>
......
......@@ -3,44 +3,144 @@
{% block title %}Home{% endblock %}
{% block custom_head %}
{% if refresh > 0 %}
<meta http-equiv="refresh" content="{{ refresh }}">
{% endif %}
<script type="application/javascript">
const AJAX_URL = "{{ reverse_url('ajax') }}";
const SLOW_UPDATE = 60000;
const FAST_UPDATE = 1000;
let update_interval = null;
function ajax(data, success_cb) {
$.ajax({
url: AJAX_URL,
type: 'POST',
data: JSON.stringify(data),
contentType: 'application/json; charset=utf-8',
dataType: 'json',
async: true,
success: success_cb,
error: function () {
show_error('AJAX communication error, the operation will be retried');
}
});
}
function show_error(msg) {
let error_box = $("#ajax-error");
error_box.text("Error: " + msg);
error_box.show();
}
let state = "init";
let execution_id = -1;
function state_machine() {
if (state == "init") {
clearInterval(update_interval);
update_interval = setInterval(function(){update_zoe_status();}, FAST_UPDATE);
$("#state-init").show();
$("#state-starting").hide();
$("#state-started").hide();
ajax({'type': 'start'},
function (data) {
if (data['status'] == 'ok') {
$("#ajax-error").hide();
state = "starting";
execution_id = data['execution_id'];
} else {
show_error(data.message);
}
},
function () {
show_error('AJAX communication error, the operation will be retried');
}
);
} else if (state == "starting") {
$("#state-init").hide();
$("#state-starting").show();
$("#state-started").hide();
} else if (state == "started") {
clearInterval(update_interval);
update_interval = setInterval(function(){update_zoe_status();}, SLOW_UPDATE);
$("#state-init").hide();
$("#state-starting").hide();
$("#state-started").show();
}
}
function update_zoe_status() {
if (execution_id < 0) {
$("#zoe-status").text('off');
state = "init";
} else {
ajax({'type': 'query_status', 'exec_id': execution_id},
function (data) {
if (data['status'] == 'ok') {
$("#ajax-error").hide();
$("#zoe-status").text(data['exec_status']);
if (data['exec_status'] == 'running') {
state = "started";
$('#time_remaining').text(moment.duration(data['ttl'] * 1000).humanize());
let s = "";
for (let ep of data['endpoints']) {
s += "<li><a href=\"" + ep[1] + "\">" + ep[0] + "</a></li>\n";
}
$("#endpoints").html(s);
} else if (data['exec_status'] == 'terminated' || data['exec_status'] == 'none' || data['exec_status'] == 'error') {
state = "init";
update_interval = setInterval(function(){update_zoe_status();}, FAST_UPDATE);
}
} else {
show_error(data.message);
}
}
);
}
state_machine();
}
state_machine();
</script>
<style>
body {
width: 80%;
}
.state-box {
border: 1px solid black;
margin-top: 2em;
margin-bottom: 2em;
width: 40%;
padding-left: 10px;
padding-right: 10px;
}
</style>
{% endblock %}
{% block content %}
<h2>Guest cluster access page</h2>
<h2>Algorithmic Machine Learning cluster management</h2>
{% if request.remote_addr != gateway_ip %}
<p>You are logged in as {{ uid }}.</p>
<p>To access your Jupyter Notebook and your Spark cluster, you need to open a browser with a socks proxy configured. To do that, use the following commands.</p>
<p>Through this page you will be able to access the Jupyter notebook web interface, which you will use to upload and work on the notebooks provided on the <a href="https://github.com/DistributedSystemsGroup/Algorithmic-Machine-Learning">Algorithmic Machine Learning course GitHub page</a>.</p>
<p>On <b>Linux</b> copy and paste this command in a terminal window:</p>
<pre>google-chrome --proxy-server={{ user_gateway }} --user-data-dir=/tmp/chrome-zoe-$USER {{ request.url }}</pre>
<p>The work environment contains also an Apache Spark cluster and is created dynamically when you first access this page. After a fixed amount of time, the resources are freed and the Notebook and Spark are terminated. The files you saved in your workspace will be available for your next session.</p>
<p>On <b>Windows</b> copy and paste this command in a cmd window (Start & cmd):</p>
<pre>cd +chrome install directory+
chrome.exe --proxy-server={{ user_gateway }} --user-data-dir=%TEMP%\chrome-zoe {{ request.url }}</pre>
<span style="color: darkred; display: none;" id="ajax-error">AJAX communication error, retrying...</span>
{% else %}
<div class="state-box">
<div id="state-init">
<p>Checking your cluster status...</p>
</div>
{% if execution_urls|length == 0 %}
<p><b>Please wait</b>, this page will refresh automatically.<br>
When this message disappears your cluster will be ready to use.</p>
{% endif %}
<div id="state-starting" style="display: none;">
<p>Please wait, your cluster is <span id="zoe-status">...</span></p>
</div>
{% if execution_status == "running" and execution_urls|length > 0 %}
<ul>
{% for url in execution_urls|sort(0) %}
{% if url[0] == "Jupyter Notebook interface" %}
<li><span style="font-size: larger; padding-bottom: 10px; display: inline-block"><a href="{{ url[1] }}">{{ url[0] }}</a> &larr; Start here</span></li>
{% else %}
<li><span style="font-size: smaller"><a href="{{ url[1] }}">{{ url[0] }}</a></span></li>
{% endif %}
{% endfor %}
</ul>
{% endif %}
<div id="state-started" style="display: none;">
<p>Your cluster is running, it will be destroyed in about <span id="time_remaining"></span></p>
<ul id="endpoints"></ul>
</div>
</div>
<p>Useful resources:</p>
<ul>
......@@ -48,5 +148,4 @@ chrome.exe --proxy-server={{ user_gateway }} --user-data-dir=%TEMP%\chrome-zoe {
<li><a href="https://spark.apache.org/docs/1.5.2/api/python/index.html">Spark Python API</a></li>
</ul>
{% endif %}
{% endblock %}
......@@ -5,7 +5,6 @@
<p>Welcome to Zoe</p>
<ul>
<li><a href="{{ reverse_url('home_user') }}">Guest start page</a></li>
<li><a href="{{ reverse_url('home_user') }}">User start page</a></li>
</ul>
......
......@@ -48,7 +48,7 @@ def catch_exceptions(func):
return error_page(self, str(e), 400)
except Exception as e:
log.exception(str(e))
return {'message': str(e)}, 500
return error_page(self, str(e), 500)
return func_wrapper
......@@ -70,9 +70,16 @@ def get_auth(handler: ZoeRequestHandler):
auth_decoded = base64.decodebytes(bytes(auth_header[6:], 'ascii')).decode('utf-8')
username, password = auth_decoded.split(':', 2)
if get_conf().auth_type == 'text':
authenticator = PlainTextAuthenticator() # type: BaseAuthenticator
elif get_conf().auth_type == 'ldap':
# First of all try to authenticate against a fixed list of users in a text file
authenticator = PlainTextAuthenticator() # type: BaseAuthenticator
try:
uid, role = authenticator.auth(username, password)
return uid, role
except zoe_api.exceptions.ZoeAuthException:
pass
# It it fails, continue with the normal authentication
if get_conf().auth_type == 'ldap':
authenticator = LDAPAuthenticator()
else:
raise zoe_api.exceptions.ZoeException('Configuration error, unknown authentication method: {}'.format(get_conf().auth_type))
......
......@@ -136,9 +136,11 @@ def exec_get_cmd(args):
print('Execution not found')
else:
print('Execution {} (ID: {})'.format(execution['name'], execution['id']))
print('Application name: {}'.format(execution['description']['name']))
print('Status: {}'.format(execution['status']))
if execution['status'] == 'error':
print('Last error: {}'.format(execution['error_message']))
print()
print('Time submit: {}'.format(datetime.datetime.fromtimestamp(execution['time_submit'])))
if execution['time_start'] is None:
......@@ -150,9 +152,17 @@ def exec_get_cmd(args):
print('Time end: {}'.format('not yet'))
else:
print('Time end: {}'.format(datetime.datetime.fromtimestamp(execution['time_end'])))
print()
app = execution['description']
print('Application name: {}'.format(app['name']))
endpoints = exec_api.endpoints(execution['id'])
if len(endpoints) > 0:
print('Exposed endpoints:')
else:
print('This ZApp does not expose any endpoint')
for ep in endpoints:
print(' - {}: {}'.format(ep[0], ep[1]))
print()
for c_id in execution['services']:
service = cont_api.get(c_id)
print('Service {} (ID: {})'.format(service['name'], service['id']))
......@@ -160,10 +170,6 @@ def exec_get_cmd(args):
print(' - docker status: {}'.format(service['docker_status']))
if service['error_message'] is not None:
print(' - error: {}'.format(service['error_message']))
if service['docker_status'] == 'started':
ip = service['ip_address']
for port in service['description']['ports']:
print(' - {}: {}://{}:{}{}'.format(port['name'], port['protocol'], ip, port['port_number'], port['path']))
def exec_kill_cmd(args):
......
......@@ -88,6 +88,12 @@ def load_configuration(test_conf=None):
argparser.add_argument('--scheduler-class', help='Scheduler class to use for scheduling ZApps', default='ZoeSimpleScheduler')
argparser.add_argument('--docker-tls-cert', help='Docker TLS certificate file', default='cert.pem')
argparser.add_argument('--docker-tls-key', help='Docker TLS private key file', default='key.pem')
argparser.add_argument('--docker-tls-ca', help='Docker TLS CA certificate file', default='ca.pem')
argparser.add_argument('--aml-ttl', help='TimeToLive in hours for AML executions', type=int, default=4)
opts = argparser.parse_args()
if opts.debug:
argparser.print_values()
......
......@@ -110,3 +110,16 @@ class ZoeExecutionsAPI(ZoeAPIBase):
raise ZoeAPIException(data['message'])
else:
return data['execution_id']
def endpoints(self, execution_id):
"""
Retrieve the public endpoints exposed by a running execution.
:param execution_id: the execution to inspect
:return:
"""
data, status_code = self._rest_get('/execution/endpoints/' + str(execution_id))
if status_code == 200:
return data['endpoints']
else:
return None
......@@ -407,6 +407,15 @@ class Service(Base):
s_info = swarm.inspect_container(self.docker_id)
return s_info['ip_address'][get_conf().overlay_network_name]
@property
def ports(self):
"""Getter for the port mappings created by Swarm."""
if self.docker_status != self.DOCKER_START_STATUS:
return {}
swarm = SwarmClient(get_conf())
s_info = swarm.inspect_container(self.docker_id)
return s_info['ports']
@property
def user_id(self):
"""Getter for the user_id, that is actually taken form the parent execution."""
......
......@@ -126,15 +126,19 @@ class SwarmClient:
def __init__(self, opts: Namespace) -> None:
self.opts = opts
url = opts.swarm
tls = False
if 'zk://' in url:
url = url[len('zk://'):]
manager = zookeeper_swarm(url)
elif 'http://' or 'https://' in url:
elif 'http://' in url:
manager = url
elif 'https://' in url:
tls = docker.tls.TLSConfig(client_cert=(opts.docker_tls_cert, opts.docker_tls_key), verify=opts.docker_tls_ca)
manager = url
else:
raise ZoeLibException('Unsupported URL scheme for Swarm')
log.debug('Connecting to Swarm at {}'.format(manager))
self.cli = docker.Client(base_url=manager, version="auto")
# log.debug('Connecting to Swarm at {}'.format(manager))
self.cli = docker.Client(base_url=manager, version="auto", tls=tls)
def info(self) -> SwarmStats:
"""Retrieve Swarm statistics. The Docker API returns a mess difficult to parse."""
......@@ -204,13 +208,20 @@ class SwarmClient:
log_config = docker.utils.LogConfig(type="json-file")
try:
host_config = self.cli.create_host_config(network_mode=options.network_name,
binds=options.get_volume_binds(),
mem_limit=options.get_memory_limit(),
memswap_limit=options.get_memory_limit(),
restart_policy=options.restart_policy,
port_bindings=port_bindings,
log_config=log_config)
if options.get_memory_limit() == -1:
host_config = self.cli.create_host_config(network_mode=options.network_name,
binds=options.get_volume_binds(),
restart_policy=options.restart_policy,
port_bindings=port_bindings,
log_config=log_config)
else:
host_config = self.cli.create_host_config(network_mode=options.network_name,
binds=options.get_volume_binds(),
mem_limit=options.get_memory_limit(),
memswap_limit=options.get_memory_limit(),
restart_policy=options.restart_policy,
port_bindings=port_bindings,
log_config=log_config)
cont = self.cli.create_container(image=image,
environment=options.environment,
network_disabled=False,
......
......@@ -15,6 +15,6 @@
"""Versions."""
ZOE_VERSION = '0.20.0-beta'
ZOE_VERSION = '0.10.3-aml'
ZOE_API_VERSION = '0.6'
ZOE_APPLICATION_FORMAT_VERSION = 2
......@@ -25,7 +25,7 @@ from zoe_lib.sql_manager import SQLManager, Service
log = logging.getLogger(__name__)
CHECK_INTERVAL = 300
CHECK_INTERVAL = 30
class ZoeSwarmChecker(threading.Thread):
......@@ -59,6 +59,9 @@ class ZoeSwarmChecker(threading.Thread):
if container['status'] == 'exited':
log.info('resetting status of service {}, died with no event'.format(service.name))
service.set_docker_status(service.DOCKER_DIE_STATUS)
elif container['status'] == 'running' and service.docker_status != service.DOCKER_START_STATUS:
log.info('resetting status of service {}, started with no event'.format(service.name))
service.set_docker_status(service.DOCKER_START_STATUS)
if not found:
service.set_docker_status(service.DOCKER_DESTROY_STATUS)
......