Commit ba50b881 authored by Daniele Venzano's avatar Daniele Venzano

Merge wi9.1-wi9.2 branch

parents a4343a5c 016bc2d2
......@@ -12,10 +12,10 @@ All Zoe processes use one single configuration file, called zoe.conf. It is sear
zoe.conf
--------
Common options:
* ``debug = <true|false>`` : enable or disable debug log output
* ``swarm = zk://zk1:2181,zk2:2181,zk3:2181`` : connection string to the Swarm API endpoint. Can be expressed by a plain http URL or as a zookeeper node list in case Swarm is configured for HA.
* ``api-listen-uri = tcp://*:4850`` : ZeroMQ server connection string, used for the master listening endpoint
* ``deployment-name = devel`` : name of this Zoe deployment. Can be used to have multiple Zoe deployments using the same Swarm (devel and prod, for example)
* ``workspace-deployment-path`` : path appended to the workspace path to distinguish this deployment. If unspecified is equal to the deployment name
......@@ -25,6 +25,7 @@ Common options:
* ``gelf-address = udp://1.2.3.4:1234`` : Enable Docker GELF log output to this destination
* ``workspace-base-path = /mnt/zoe-workspaces`` : Base directory where user workspaces will be created. This directory should reside on a shared filesystem visible by all Docker hosts.
* ``overlay-network-name = zoe`` : name of the pre-configured Docker overlay network Zoe should use
* ``backend = Swarm`` : ' Name of the backend to enable and use
Database options:
......@@ -39,11 +40,23 @@ API options:
* ``listen-address`` : address Zoe will use to listen for incoming connections to the web interface
* ``listen-port`` : port Zoe will use to listen for incoming connections to the web interface
* ``master-url = tcp://127.0.0.1:4850`` : address of the Zoe Master ZeroMQ API
* ``cookie-secret = changeme``: secret used to encrypt cookies
* ``ldap-server-uri = ldap://localhost`` : LDAP server to use for user authentication
* ``ldap-base-dn = ou=something,dc=any,dc=local`` : LDAP base DN for users
* ``ldap-bind-user = cn=guest,ou=something,dc=any,dc=local`` : LDAP user to bind as for user lookup
* ``ldap-bind-password = notsosecret`` : LDAP bind user password
* ``ldap-admin-gid = 5000`` : LDAP group ID for admins
* ``ldap-user-gid = 5001`` : LDAP group ID for users
* ``ldap-guest-gid = 5002`` : LDAP group ID for guests
Scheduler options:
* ``scheduler-class = <**ZoeSimpleScheduler** | ZoeElasticScheduler>`` : Scheduler class to use for scheduling ZApps
* ``scheduler-policy = <**FIFO** | SIZE>`` : Scheduler policy to use for scheduling ZApps
Swarm backend options:
* ``backend-swarm-url = zk://zk1:2181,zk2:2181,zk3:2181`` : connection string to the Swarm API endpoint. Can be expressed by a plain http URL or as a zookeeper node list in case Swarm is configured for HA.
* ``backend-swarm-zk-path = /docker`` : ZooKeeper path used by Docker Swarm
* ``backend-swarm-tls-cert = cert.pem`` : Docker TLS certificate file
* ``backend-swarm-tls-key = key.pem`` : Docker TLS private key file
* ``backend-swarm-tls-ca = ca.pem`` : Docker TLS CA certificate file
......@@ -58,6 +58,16 @@ class APIEndpoint:
ret = [e for e in execs if e.user_id == uid or role == 'admin']
return ret
def _get_proxy(self):
if get_conf().proxy_type == 'apache':
proxy = ApacheProxy(self)
# elif get_conf().proxy_type == 'nginx':
# proxy = NginxProxy(self)
else:
log.info('Proxy disabled')
proxy = None
return proxy
def execution_start(self, uid, role, exec_name, application_description):
"""Start an execution."""
try:
......@@ -75,11 +85,8 @@ class APIEndpoint:
if not success:
raise zoe_api.exceptions.ZoeException('The Zoe master is unavailable, execution will be submitted automatically when the master is back up ({}).'.format(message))
if get_conf().deployment_name != 'test':
if get_conf().proxy_type == 'apache':
proxy = ApacheProxy(self)
#else:
# proxy = NginxProxy(self)
proxy = self._get_proxy()
if proxy is not None:
threading.Thread(target=proxy.proxify, args=(uid, role, new_id)).start()
return new_id
......@@ -95,11 +102,8 @@ class APIEndpoint:
raise zoe_api.exceptions.ZoeAuthException()
if e.is_active:
if get_conf().deployment_name != 'test':
if get_conf().proxy_type == 'apache':
proxy = ApacheProxy(self)
#else:
# proxy = NginxProxy(self)
proxy = self._get_proxy()
if proxy is not None:
proxy.unproxify(uid, role, exec_id)
return self.master.execution_terminate(exec_id)
else:
......
......@@ -22,21 +22,22 @@ import docker
import zoe_api.proxy.base
import zoe_api.api_endpoint
from zoe_master.backends.old_swarm.api_client import SwarmClient
from zoe_master.backends.swarm.api_client import SwarmClient
from zoe_master.backends.kubernetes.api_client import KubernetesClient
from zoe_lib.config import get_conf
log = logging.getLogger(__name__)
class ApacheProxy(zoe_api.proxy.base.BaseProxy):
"""Apache proxy class."""
def __init__(self, apiEndpoint):
self.api_endpoint = apiEndpoint
def __init__(self, api_endpoint):
self.api_endpoint = api_endpoint
def proxify(self, uid, role, execution_id): #pylint: disable=too-many-locals
def proxify(self, uid, role, execution_id): # pylint: disable=too-many-locals
"""Proxify function."""
try:
#Wait until all the services get created and started to be able to get the backend_id
# Wait until all the services get created and started to be able to get the backend_id
while self.api_endpoint.execution_by_id(uid, role, execution_id).status != 'running':
log.info('Waiting for all services get started...')
time.sleep(1)
......@@ -51,20 +52,20 @@ class ApacheProxy(zoe_api.proxy.base.BaseProxy):
if srv.backend_id is None:
time.sleep(1)
else:
lth = lth - 1
lth -= 1
#Start proxifying by adding entry to use proxypass and proxypassreverse in apache2 config file
# Start proxifying by adding entry to use proxypass and proxypassreverse in apache2 config file
for srv in exe.services:
ip, port = None, None
if get_conf().backend == 'OldSwarm':
swarm = SwarmClient(get_conf())
swarm = SwarmClient()
s_info = swarm.inspect_container(srv.backend_id)
port_list = s_info['ports']
for key, val in port_list.items():
exposed_port = key.split('/tcp')[0]
if val != None:
if val is not None:
ip = val[0]
port = val[1]
......@@ -128,11 +129,11 @@ class ApacheProxy(zoe_api.proxy.base.BaseProxy):
reload_id = docker_client.exec_create(get_conf().proxy_container, reload_command)
docker_client.exec_start(reload_id)
#Simply remove the added entries at the apache2 config file when terminating applcations
# Simply remove the added entries at the apache2 config file when terminating applications
def unproxify(self, uid, role, execution_id):
log.info('Unproxifying for user %s - execution %s', uid, str(execution_id))
pattern = '/zoe\/' + uid + '\/' + str(execution_id) + '/d' #pylint: disable=anomalous-backslash-in-string
pattern = '/zoe\/' + uid + '\/' + str(execution_id) + '/d' # pylint: disable=anomalous-backslash-in-string
docker_client = docker.Client(base_url=get_conf().proxy_docker_sock)
del_command = 'sed -i "' + pattern + '" ' + get_conf().proxy_config_file # /etc/apache2/sites-available/all.conf'
del_command = 'sed -i "' + pattern + '" ' + get_conf().proxy_config_file # /etc/apache2/sites-available/all.conf'
del_id = docker_client.exec_create(get_conf().proxy_container, del_command)
docker_client.exec_start(del_id)
......@@ -25,6 +25,7 @@ import tornado.web
from zoe_lib.config import get_conf
from zoe_api.exceptions import ZoeRestAPIException, ZoeNotFoundException, ZoeAuthException, ZoeException
from zoe_api.auth.base import BaseAuthenticator
from zoe_api.auth.ldap import LDAPAuthenticator
from zoe_api.auth.file import PlainTextAuthenticator
from zoe_api.auth.ldapsasl import LDAPSASLAuthenticator
......@@ -79,7 +80,7 @@ def get_auth(handler: tornado.web.RequestHandler):
if auth_header is None or not (auth_header.startswith('Basic ') or auth_header.startswith('Bearer ')):
raise ZoeRestAPIException('missing or wrong authentication information', 401, {'WWW-Authenticate': 'Basic realm="Login Required"'})
#Process for authentication with token
# Process for authentication with token
if "Bearer" in auth_header:
token = auth_header[7:]
......@@ -99,7 +100,7 @@ def get_auth(handler: tornado.web.RequestHandler):
return uid, role
#Process for authentication with username, password
# Process for authentication with username, password
else:
auth_decoded = base64.decodebytes(bytes(auth_header[6:], 'ascii')).decode('utf-8')
username, password = auth_decoded.split(':', 2)
......@@ -107,9 +108,9 @@ def get_auth(handler: tornado.web.RequestHandler):
if get_conf().auth_type == 'text':
authenticator = PlainTextAuthenticator() # type: BaseAuthenticator
elif get_conf().auth_type == 'ldap':
authenticator = LDAPAuthenticator()
authenticator = LDAPAuthenticator() # type: BaseAuthenticator
elif get_conf().auth_type == 'ldapsasl':
authenticator = LDAPSASLAuthenticator()
authenticator = LDAPSASLAuthenticator() # type: BaseAuthenticator
else:
raise ZoeException('Configuration error, unknown authentication method: {}'.format(get_conf().auth_type))
uid, role = authenticator.auth(username, password)
......
......@@ -151,18 +151,12 @@ def _service_check(data):
if not isinstance(volume[2], bool):
raise InvalidApplicationDescription(msg='readonly volume item (third) must be a boolean: {}'.format(volume[2]))
if 'constraints' in data and not hasattr(data['constraints'], '__iter__'):
raise InvalidApplicationDescription(msg='networks should be an iterable')
if get_conf().backend == 'Kubernetes':
if 'replicas' not in data:
data['replicas'] = 1
try:
int(data['replicas'])
except ValueError:
raise InvalidApplicationDescription(msg="replicas field should be an int")
if 'replicas' not in data:
data['replicas'] = 1
try:
int(data['replicas'])
except ValueError:
raise InvalidApplicationDescription(msg="replicas field should be an int")
def _port_check(data):
......
......@@ -49,7 +49,6 @@ def load_configuration(test_conf=None):
# Common options
argparser.add_argument('--debug', action='store_true', help='Enable debug output')
argparser.add_argument('--swarm', help='Swarm/Docker API endpoint (ex.: zk://zk1:2181,zk2:2181 or http://swarm:2380)', default='http://localhost:2375')
argparser.add_argument('--deployment-name', help='name of this Zoe deployment', default='prod')
argparser.add_argument('--dbname', help='DB name', default='zoe')
......@@ -85,7 +84,7 @@ def load_configuration(test_conf=None):
argparser.add_argument('--ldap-guest-gid', type=int, help='LDAP group ID for guests', default=5002)
# Proxy options
argparser.add_argument('--proxy-type', help='Proxy type (apache or nginx)', default='apache')
argparser.add_argument('--proxy-type', help='Proxy type (apache or nginx)', default='none')
argparser.add_argument('--proxy-container', help='Proxy container name', default='apache2')
argparser.add_argument('--proxy-config-file', help='Config file path of apache/nginx proxy container', default='/etc/apache2/sites-available/config.conf')
argparser.add_argument('--proxy-path', help='Proxy base path', default='127.0.0.1')
......@@ -94,7 +93,16 @@ def load_configuration(test_conf=None):
argparser.add_argument('--scheduler-class', help='Scheduler class to use for scheduling ZApps', choices=['ZoeSimpleScheduler', 'ZoeElasticScheduler'], default='ZoeSimpleScheduler')
argparser.add_argument('--scheduler-policy', help='Scheduler policy to use for scheduling ZApps', choices=['FIFO', 'SIZE'], default='FIFO')
argparser.add_argument('--backend', choices=['OldSwarm', 'Kubernetes'], default='OldSwarm')
argparser.add_argument('--backend', choices=['Swarm', 'Kubernetes'], default='Swarm')
# Docker Swarm backend options
argparser.add_argument('--backend-swarm-url', help='Swarm/Docker API endpoint (ex.: zk://zk1:2181,zk2:2181 or http://swarm:2380)', default='http://localhost:2375')
argparser.add_argument('--backend-swarm-zk-path', help='Swarm/Docker optional ZooKeeper path for Swarm Znodes', default='/docker')
argparser.add_argument('--backend-swarm-tls-cert', help='Docker TLS certificate file', default='cert.pem')
argparser.add_argument('--backend-swarm-tls-key', help='Docker TLS private key file', default='key.pem')
argparser.add_argument('--backend-swarm-tls-ca', help='Docker TLS CA certificate file', default='ca.pem')
# Kubernetes backend
argparser.add_argument('--kube-config-file', help='Kubernetes configuration file', default='/opt/zoe/kube.conf')
argparser.add_argument('--cookie-secret', help='secret used to encrypt cookies', default='changeme')
......
......@@ -46,8 +46,3 @@ class InvalidApplicationDescription(ZoeAPIException):
"""
def __init__(self, msg):
super().__init__("Error: " + msg)
class ZoeNotEnoughResourcesException(ZoeLibException):
"""Service failed to start due to not enough available resources."""
pass
......@@ -18,4 +18,4 @@
from zoe_lib.state.base import Base
from zoe_lib.state.execution import Execution
from zoe_lib.state.sql_manager import SQLManager
from zoe_lib.state.service import Service
from zoe_lib.state.service import Service, VolumeDescription
......@@ -196,3 +196,8 @@ class Service:
def is_dead(self):
"""Returns True if this service is not running."""
return self.backend_status == self.BACKEND_DESTROY_STATUS or self.backend_status == self.BACKEND_OOM_STATUS or self.backend_status == self.BACKEND_DIE_STATUS
@property
def unique_name(self):
"""Returns a name for this service that is unique across multiple Zoe instances running on the same backend."""
return self.name + '-' + str(self.execution_id) + '-' + get_conf().deployment_name
......@@ -15,10 +15,9 @@
"""The base class that all backends should implement."""
from typing import Dict
from zoe_lib.state import Execution, Service
from zoe_lib.state import Service
from zoe_master.stats import ClusterStats
from zoe_master.backends.service_instance import ServiceInstance
class BaseBackend:
......@@ -34,8 +33,17 @@ class BaseBackend:
"""Performs a clean shutdown of the resources used by Swarm backend. Any threads that where started in the init() method should be terminated here. This method will be called when Zoe shuts down."""
raise NotImplementedError
def spawn_service(self, execution: Execution, service: Service, env_subst_dict: Dict):
"""Create a container for a service."""
def spawn_service(self, service_instance: ServiceInstance):
"""Create a container for a service.
The backend translates all the configuration parameters given in the ServiceInstance object into backend-specific container options and starts the container.
This function should either:
* raise ``ZoeStartExecutionRetryException`` in case a temporary error is generated
* raise ``ZoeStartExecutionFatalException`` in case a fatal error is generated
* return a backend-specific ID that will be used later by Zoe to interact with the running container
"""
raise NotImplementedError
def terminate_service(self, service: Service) -> None:
......
......@@ -21,6 +21,8 @@ from zoe_lib.config import get_conf
from zoe_lib.state import Service, Execution
from zoe_master.backends.proxy import gen_proxypath, JUPYTER_NOTEBOOK, MONGO_EXPRESS, JUPYTER_PORT, MONGO_PORT
from zoe_master.exceptions import ZoeStartExecutionFatalException
from zoe_master.workspace.filesystem import ZoeFSWorkspace
def gen_environment(execution: Execution, service: Service, env_subst_dict: Dict):
""" Generate a dictionary containing the current cluster status (before the new container is spawned)
......@@ -36,6 +38,7 @@ def gen_environment(execution: Execution, service: Service, env_subst_dict: Dict
raise ZoeStartExecutionFatalException("Service {} has wrong environment expression")
env_list.append((env_name, env_value))
# FIXME this code needs to be removed/changed to be generic
#if 'jupyter' in service.image_name:
env_list.append((JUPYTER_NOTEBOOK, gen_proxypath(execution, service) + '/' + JUPYTER_PORT))
#elif 'mongo-express' in service.image_name:
......@@ -47,4 +50,30 @@ def gen_environment(execution: Execution, service: Service, env_subst_dict: Dict
env_list.append(('SERVICE_NAME', service.name))
env_list.append(('PROXY_PATH', get_conf().proxy_path))
fswk = ZoeFSWorkspace()
env_list.append(('ZOE_WORKSPACE', fswk.get_mountpoint()))
return env_list
def gen_volumes(service_: Service, execution: Execution):
"""Return the list of default volumes to be added to all containers."""
vol_list = []
fswk = ZoeFSWorkspace()
wk_vol = fswk.get(execution.user_id)
vol_list.append(wk_vol)
return vol_list
def gen_labels(service: Service, execution: Execution):
"""Generate container labels, useful for identifying containers in monitoring systems."""
return {
'zoe_execution_name': execution.name,
'zoe_execution_id': str(execution.id),
'zoe_service_name': service.name,
'zoe_service_id': str(service.id),
'zoe_owner': execution.user_id,
'zoe_deployment_name': get_conf().deployment_name,
'zoe_type': 'app_service'
}
......@@ -22,9 +22,18 @@ from zoe_lib.config import get_conf
from zoe_lib.state import Execution, Service
from zoe_master.backends.base import BaseBackend
from zoe_master.backends.old_swarm.backend import OldSwarmBackend
from zoe_master.backends.kubernetes.backend import KubernetesBackend
from zoe_master.exceptions import ZoeStartExecutionFatalException, ZoeStartExecutionRetryException
from zoe_master.backends.service_instance import ServiceInstance
from zoe_master.exceptions import ZoeStartExecutionFatalException, ZoeStartExecutionRetryException, ZoeException
try:
from zoe_master.backends.swarm.backend import SwarmBackend
except ImportError as ex:
SwarmBackend = None
try:
from zoe_master.backends.kubernetes.backend import KubernetesBackend
except ImportError:
KubernetesBackend = None
log = logging.getLogger(__name__)
......@@ -32,10 +41,14 @@ log = logging.getLogger(__name__)
def _get_backend() -> BaseBackend:
"""Return the right backend instance by reading the global configuration."""
backend_name = get_conf().backend
if backend_name == 'OldSwarm':
return OldSwarmBackend(get_conf())
elif backend_name == 'Kubernetes':
if backend_name == 'Kubernetes':
if KubernetesBackend is None:
raise ZoeException('The Kubernetes backend requires the pykube module')
return KubernetesBackend(get_conf())
elif backend_name == 'Swarm':
if SwarmBackend is None:
raise ZoeException('The Swarm backend requires docker python version >= 2.0.2')
return SwarmBackend(get_conf())
else:
log.error('Unknown backend selected')
assert False
......@@ -72,10 +85,12 @@ def service_list_to_containers(execution: Execution, service_list: List[Service]
for service in ordered_service_list:
env_subst_dict['dns_name#self'] = service.dns_name
service.set_starting()
instance = ServiceInstance(execution, service, env_subst_dict)
try:
backend.spawn_service(execution, service, env_subst_dict)
backend_id, ip_address = backend.spawn_service(instance)
except ZoeStartExecutionRetryException as ex:
log.warning('Temporary failure starting service {} of execution {}: {}'.format(service.id, execution.id, ex.message))
service.set_error(ex.message)
execution.set_error_message(ex.message)
terminate_execution(execution)
execution.set_scheduled()
......@@ -94,7 +109,8 @@ def service_list_to_containers(execution: Execution, service_list: List[Service]
execution.set_error()
return "fatal"
else:
execution.set_running()
log.debug('Service {} started'.format(instance.name))
service.set_active(backend_id, ip_address)
return "ok"
......
......@@ -16,15 +16,11 @@
"""Zoe backend implementation for Kubernetes with docker."""
import logging
from typing import Dict
from zoe_lib.config import get_conf
from zoe_lib.exceptions import ZoeLibException, ZoeNotEnoughResourcesException
from zoe_lib.state import Execution, Service
from zoe_master.backends.kubernetes.api_client import DockerContainerOptions, KubernetesClient
from zoe_master.exceptions import ZoeStartExecutionRetryException, ZoeStartExecutionFatalException, ZoeException
from zoe_master.workspace.filesystem import ZoeFSWorkspace
import zoe_master.backends.common
from zoe_lib.state import Service
from zoe_master.backends.kubernetes.api_client import KubernetesClient
from zoe_master.exceptions import ZoeStartExecutionRetryException, ZoeStartExecutionFatalException, ZoeException, ZoeNotEnoughResourcesException
from zoe_master.backends.service_instance import ServiceInstance
import zoe_master.backends.base
from zoe_master.backends.kubernetes.threads import KubernetesMonitor, KubernetesStateSynchronizer
from zoe_master.stats import NodeStats, ClusterStats # pylint: disable=unused-import
......@@ -55,74 +51,17 @@ class KubernetesBackend(zoe_master.backends.base.BaseBackend):
_monitor.quit()
_checker.quit()
def spawn_service(self, execution: Execution, service: Service, env_subst_dict: Dict):
def spawn_service(self, service_instance: ServiceInstance):
"""Spawn a service, translating a Zoe Service into a Docker container."""
copts = DockerContainerOptions()
copts.gelf_log_address = get_conf().gelf_address
copts.name = service.dns_name
copts.set_memory_limit(service.resource_reservation.memory)
copts.set_cores_limit(service.resource_reservation.cores)
copts.network_name = get_conf().overlay_network_name
copts.labels = {
'zoe.execution.name': execution.name,
'zoe.execution.id': str(execution.id),
'zoe.service.name': service.name,
'zoe.service.id': str(service.id),
'zoe.owner': execution.user_id,
'zoe.deployment_name': get_conf().deployment_name,
'zoe.type': 'app_service'
}
if service.is_monitor:
copts.labels['kubernetes.monitor'] = 'true'
else:
copts.labels['kubernetes.monitor'] = 'false'
# Always disable autorestart
# if 'disable_autorestart' in execution.description and execution.description['disable_autorestart']:
# log.debug("Autorestart disabled for service {}".format(service.id))
# copts.restart = False
# else:
# copts.restart = not service.is_monitor # Monitor containers should not restart
copts.restart = False
env_vars = zoe_master.backends.common.gen_environment(execution, service, env_subst_dict)
for name, value in env_vars:
copts.add_env_variable(name, value)
for port in service.ports:
if port.expose:
copts.ports.append(port.number)
for volume in service.volumes:
if volume.type == "host_directory":
copts.add_volume_bind(volume.path, volume.mount_point, volume.readonly)
else:
log.warning('Kubernetes backend does not support volume type {}'.format(volume.type))
# if 'constraints' in service.description:
# for constraint in service.description['constraints']:
# copts.add_constraint(constraint)
fswk = ZoeFSWorkspace()
if fswk.can_be_attached():
copts.add_volume_bind(fswk.get_path(execution.user_id), fswk.get_mountpoint(), False)
copts.add_env_variable('ZOE_WORKSPACE', fswk.get_mountpoint())
# The same dictionary is used for templates in the command
copts.set_command(service.command.format(**env_subst_dict))
copts.set_replicas(service.replicas)
try:
self.kube.spawn_service(copts)
rc_info = self.kube.spawn_replication_controller(service.description['docker_image'], copts)
self.kube.spawn_service(service_instance)
rc_info = self.kube.spawn_replication_controller(service_instance)
except ZoeNotEnoughResourcesException:
service.set_error('Not enough free resources to satisfy reservation request')
raise ZoeStartExecutionRetryException('Not enough free resources to satisfy reservation request for service {}'.format(service.name))
except (ZoeException, ZoeLibException) as e:
raise ZoeStartExecutionRetryException('Not enough free resources to satisfy reservation request for service {}'.format(service_instance.name))
except ZoeException as e:
raise ZoeStartExecutionFatalException(str(e))
service.set_active(rc_info["backend_id"], rc_info['ip_address'])
return rc_info["backend_id"], rc_info['ip_address']
def terminate_service(self, service: Service) -> None:
"""Terminate and delete a container."""
......
......@@ -22,8 +22,10 @@ import time
from zoe_lib.config import get_conf
from zoe_lib.state import SQLManager, Service
from zoe_master.backends.kubernetes.api_client import KubernetesClient
log = logging.getLogger(__name__)
class KubernetesMonitor(threading.Thread):
"""The monitor."""
......@@ -40,7 +42,7 @@ class KubernetesMonitor(threading.Thread):
def run(self):
"""An infinite loop that listens for events from Kubernetes."""
log.info("Monitor thread started")
while True: # pylint: disable=too-many-nested-blocks
while True: # pylint: disable=too-many-nested-blocks
for event in self.kube.replication_controller_event():
log.debug('%s: %s', event.object.name, event.type)
if event.type != 'DELETED' and event.type != 'ADDED':
......@@ -79,8 +81,10 @@ class KubernetesMonitor(threading.Thread):
"""Stops the thread."""
self.stop = True
CHECK_INTERVAL = 300
class KubernetesStateSynchronizer(threading.Thread):
"""The Kubernetes Checker."""
......
This diff is collapsed.
# Copyright (c) 2017, Daniele Venzano
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Zoe backend implementation for old-style stand-alone Docker Swarm."""
import logging
from typing import Dict
from zoe_lib.config import get_conf
from zoe_lib.exceptions import ZoeLibException, ZoeNotEnoughResourcesException
from zoe_lib.state import Execution, Service
from zoe_master.backends.old_swarm.api_client import DockerContainerOptions, SwarmClient
from zoe_master.exceptions import ZoeStartExecutionRetryException, ZoeStartExecutionFatalException, ZoeException
from zoe_master.workspace.filesystem import ZoeFSWorkspace
import zoe_master.backends.common
import zoe_master.backends.base
from zoe_master.backends.old_swarm.threads import SwarmMonitor, SwarmStateSynchronizer
from zoe_master.stats import NodeStats, ClusterStats # pylint: disable=unused-import
log = logging.getLogger(__name__)
# These two module-level variables hold the references to the monitor and checker threads
_monitor = None
_checker = None
class OldSwarmBackend(zoe_master.backends.base.BaseBackend):
"""Zoe backend implementation for old-style stand-alone Docker Swarm."""
def __init__(self, opts):
super().__init__(opts)
self.swarm = SwarmClient(opts)
@classmethod
def init(cls, state):
"""Initializes Swarm backend starting the event monitoring thread."""
global _monitor, _checker
_monitor = SwarmMonitor(state)
_checker = SwarmStateSynchronizer(state)
@classmethod
def shutdown(cls):
"""Performs a clean shutdown of the resources used by Swarm backend."""
_monitor.quit()
_checker.quit()
def spawn_service(self, execution: Execution, service: Service, env_subst_dict: Dict):
"""Spawn a service, translating a Zoe Service into a Docker container."""
copts = DockerContainerOptions()
copts.gelf_log_address = get_conf().gelf_address
copts.name = service.dns_name
copts.set_memory_limit(service.resource_reservation.memory)
copts.network_name = get_conf().overlay_network_name
copts.labels = {
'zoe.execution.name': execution.name,
'zoe.execution.id': str(execution.id),
'zoe.service.name': service.name,
'zoe.service.id': str(service.id),
'zoe.owner': execution.user_id,
'zoe.deployment_name': get_conf().deployment_name,
'zoe.type': 'app_service'
}
if service.is_monitor:
copts.labels['zoe.monitor'] = 'true'
else:
copts.labels['zoe.monitor'] = 'false'
# Always disable autorestart
# if 'disable_autorestart' in execution.description and execution.description['disable_autorestart']:
# log.debug("Autorestart disabled for service {}".format(service.id))
# copts.restart = False
# else:
# copts.restart = not service.is_monitor # Monitor containers should not restart
copts.restart = False
env_vars = zoe_master.backends.common.gen_environment(execution, service, env_subst_dict)
for name, value in env_vars:
copts.add_env_variable(name, value)
for port in service.ports:
if port.expose:
copts.ports.append(port.number)
for volume in service.volumes:
if volume.type == "host_directory":
copts.add_volume_bind(volume.path, volume.mount_point, volume.readonly)
else:
log.warning('Docker Swarm backend does not support volume type {}'.format(volume.type))
# if 'constraints' in service.description:
# for constraint in service.description['constraints']:
# copts.add_constraint(constraint)
fswk = ZoeFSWorkspace()
if fswk.can_be_attached():
copts.add_volume_bind(fswk.get_path(execution.user_id), fswk.get_mountpoint(), False)
copts.add_env_variable('ZOE_WORKSPACE', fswk.get_mountpoint())
# The same dictionary is used for templates in the command
copts.set_command(service.command.format(**env_subst_dict))
try:
cont_info = self.swarm.spawn_container(service.image_name, copts)
except ZoeNotEnoughResourcesException:
service.set_error('Not enough free resources to satisfy reservation request')
raise ZoeStartExecutionRetryException('Not enough free resources to satisfy reservation request for service {}'.format(service.name))
except (ZoeException, ZoeLibException) as e:
raise ZoeStartExecutionFatalException(str(e))
service.set_active(cont_info["backend_id"], cont_info['ip_address'][get_conf().overlay_network_name])
def terminate_service(self, service: Service) -> None:
"""Terminate and delete a container."""
self.swarm.terminate_container(service.backend_id, delete=True)
def platform_state(self) -> ClusterStats:
"""Get the platform state."""
info = self.swarm.info()
for node in info.nodes: # type: NodeStats
node.memory_free = node.memory_total - node.memory_reserved
node.cores_free = node.cores_total - node.cores_reserved
return info
# Copyright (c) 2016, Daniele Venzano
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at