Commit f1742405 authored by Daniele Venzano's avatar Daniele Venzano

Remove the old swarm backend

parent 89f8c0c4
......@@ -93,7 +93,7 @@ def load_configuration(test_conf=None):
argparser.add_argument('--scheduler-class', help='Scheduler class to use for scheduling ZApps', choices=['ZoeSimpleScheduler', 'ZoeElasticScheduler'], default='ZoeSimpleScheduler')
argparser.add_argument('--scheduler-policy', help='Scheduler policy to use for scheduling ZApps', choices=['FIFO', 'SIZE'], default='FIFO')
argparser.add_argument('--backend', choices=['Swarm', 'OldSwarm', 'Kubernetes'], default='Swarm')
argparser.add_argument('--backend', choices=['Swarm', 'Kubernetes'], default='Swarm')
# Docker Swarm backend options
argparser.add_argument('--backend-swarm-url', help='Swarm/Docker API endpoint (ex.: zk://zk1:2181,zk2:2181 or http://swarm:2380)', default='http://localhost:2375')
......
......@@ -25,11 +25,6 @@ from zoe_master.backends.base import BaseBackend
from zoe_master.backends.service_instance import ServiceInstance
from zoe_master.exceptions import ZoeStartExecutionFatalException, ZoeStartExecutionRetryException, ZoeException
try:
from zoe_master.backends.old_swarm.backend import OldSwarmBackend
except ImportError as ex:
OldSwarmBackend = None
try:
from zoe_master.backends.swarm.backend import SwarmBackend
except ImportError as ex:
......@@ -46,11 +41,7 @@ log = logging.getLogger(__name__)
def _get_backend() -> BaseBackend:
"""Return the right backend instance by reading the global configuration."""
backend_name = get_conf().backend
if backend_name == 'OldSwarm':
if OldSwarmBackend is None:
raise ZoeException('The OldSwarm backend requires docker python version < 2')
return OldSwarmBackend(get_conf())
elif backend_name == 'Kubernetes':
if backend_name == 'Kubernetes':
if KubernetesBackend is None:
raise ZoeException('The Kubernetes backend requires the pykube module')
return KubernetesBackend(get_conf())
......
This diff is collapsed.
# Copyright (c) 2017, Daniele Venzano
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Zoe backend implementation for old-style stand-alone Docker Swarm."""
import logging
from typing import Dict
from zoe_lib.config import get_conf
from zoe_lib.exceptions import ZoeLibException, ZoeNotEnoughResourcesException
from zoe_lib.state import Execution, Service
from zoe_master.backends.old_swarm.api_client import DockerContainerOptions, SwarmClient
from zoe_master.exceptions import ZoeStartExecutionRetryException, ZoeStartExecutionFatalException, ZoeException
from zoe_master.workspace.filesystem import ZoeFSWorkspace
import zoe_master.backends.common
import zoe_master.backends.base
from zoe_master.backends.old_swarm.threads import SwarmMonitor, SwarmStateSynchronizer
from zoe_master.stats import NodeStats, ClusterStats # pylint: disable=unused-import
log = logging.getLogger(__name__)
# These two module-level variables hold the references to the monitor and checker threads
_monitor = None
_checker = None
class OldSwarmBackend(zoe_master.backends.base.BaseBackend):
"""Zoe backend implementation for old-style stand-alone Docker Swarm."""
def __init__(self, opts):
super().__init__(opts)
self.swarm = SwarmClient(opts)
@classmethod
def init(cls, state):
"""Initializes Swarm backend starting the event monitoring thread."""
global _monitor, _checker
_monitor = SwarmMonitor(state)
_checker = SwarmStateSynchronizer(state)
@classmethod
def shutdown(cls):
"""Performs a clean shutdown of the resources used by Swarm backend."""
_monitor.quit()
_checker.quit()
def spawn_service(self, execution: Execution, service: Service, env_subst_dict: Dict):
"""Spawn a service, translating a Zoe Service into a Docker container."""
copts = DockerContainerOptions()
copts.gelf_log_address = get_conf().gelf_address
copts.name = service.dns_name
copts.set_memory_limit(service.resource_reservation.memory)
copts.network_name = get_conf().overlay_network_name
copts.labels = {
'zoe.execution.name': execution.name,
'zoe.execution.id': str(execution.id),
'zoe.service.name': service.name,
'zoe.service.id': str(service.id),
'zoe.owner': execution.user_id,
'zoe.deployment_name': get_conf().deployment_name,
'zoe.type': 'app_service'
}
if service.is_monitor:
copts.labels['zoe.monitor'] = 'true'
else:
copts.labels['zoe.monitor'] = 'false'
# Always disable autorestart
# if 'disable_autorestart' in execution.description and execution.description['disable_autorestart']:
# log.debug("Autorestart disabled for service {}".format(service.id))
# copts.restart = False
# else:
# copts.restart = not service.is_monitor # Monitor containers should not restart
copts.restart = False
env_vars = zoe_master.backends.common.gen_environment(execution, service, env_subst_dict)
for name, value in env_vars:
copts.add_env_variable(name, value)
for port in service.ports:
if port.expose:
copts.ports.append(port.number)
for volume in service.volumes:
if volume.type == "host_directory":
copts.add_volume_bind(volume.path, volume.mount_point, volume.readonly)
else:
log.warning('Docker Swarm backend does not support volume type {}'.format(volume.type))
# if 'constraints' in service.description:
# for constraint in service.description['constraints']:
# copts.add_constraint(constraint)
fswk = ZoeFSWorkspace()
if fswk.can_be_attached():
copts.add_volume_bind(fswk.get_path(execution.user_id), fswk.get_mountpoint(), False)
copts.add_env_variable('ZOE_WORKSPACE', fswk.get_mountpoint())
# The same dictionary is used for templates in the command
copts.set_command(service.command.format(**env_subst_dict))
try:
cont_info = self.swarm.spawn_container(service.image_name, copts)
except ZoeNotEnoughResourcesException:
service.set_error('Not enough free resources to satisfy reservation request')
raise ZoeStartExecutionRetryException('Not enough free resources to satisfy reservation request for service {}'.format(service.name))
except (ZoeException, ZoeLibException) as e:
raise ZoeStartExecutionFatalException(str(e))
service.set_active(cont_info["backend_id"], cont_info['ip_address'][get_conf().overlay_network_name])
def terminate_service(self, service: Service) -> None:
"""Terminate and delete a container."""
self.swarm.terminate_container(service.backend_id, delete=True)
def platform_state(self) -> ClusterStats:
"""Get the platform state."""
info = self.swarm.info()
for node in info.nodes: # type: NodeStats
node.memory_free = node.memory_total - node.memory_reserved
node.cores_free = node.cores_total - node.cores_reserved
return info
# Copyright (c) 2016, Daniele Venzano
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Monitor for the Swarm event stream."""
import logging
import threading
import time
from zoe_lib.config import get_conf
from zoe_lib.state import SQLManager, Service
from zoe_master.backends.old_swarm.api_client import SwarmClient
log = logging.getLogger(__name__)
class SwarmMonitor(threading.Thread):
"""The monitor."""
def __init__(self, state: SQLManager) -> None:
super().__init__()
self.setName('monitor')
self.stop = False
self.state = state
self.setDaemon(True)
self.start()
def run(self):
"""The thread loop."""
log.info("Monitor thread started")
swarm = SwarmClient(get_conf())
while True:
try:
swarm.event_listener(lambda x: self._event_cb(x))
except Exception:
log.exception('Exception in monitor thread')
time.sleep(1) # wait a bit before retrying the connection
def _event_cb(self, event: dict) -> bool:
if event['Type'] == 'container':
self._container_event(event)
else:
log.debug('Unmanaged event type: {}'.format(event['Type']))
log.debug(str(event))
if self.stop:
return False
else:
return True
def _container_event(self, event: dict):
if 'zoe.deployment_name' not in event['Actor']['Attributes']:
return
if event['Actor']['Attributes']['zoe.deployment_name'] != get_conf().deployment_name:
return
service_id = event['Actor']['Attributes']['zoe.service.id'] # type: int
service = self.state.service_list(only_one=True, id=service_id)
if service is None:
return
if 'create' in event['Action']:
service.set_backend_status(service.BACKEND_CREATE_STATUS)
elif 'start' in event['Action']:
service.set_backend_status(service.BACKEND_START_STATUS)
elif 'die' in event['Action'] or 'kill' in event['Action'] or 'stop' in event['Action']:
service.set_backend_status(service.BACKEND_DIE_STATUS)
elif 'oom' in event['Action']:
service.set_backend_status(service.BACKEND_OOM_STATUS)
log.warning('Service {} got killed by an OOM condition'.format(service.id))
elif 'destroy' in event['Action']:
service.set_backend_status(service.BACKEND_DESTROY_STATUS)
else:
log.debug('Unmanaged container action: {}'.format(event['Action']))
def quit(self):
"""Stops the thread."""
self.stop = True
CHECK_INTERVAL = 300
class SwarmStateSynchronizer(threading.Thread):
"""The Swarm Checker."""
def __init__(self, state: SQLManager) -> None:
super().__init__()
self.setName('checker')
self.stop = False
self.state = state
self.setDaemon(True)
self.start()
def _find_dead_service(self, container_list, service: Service):
"""Loop through the containers and try to update the service status."""
found = False
for container in container_list:
if container['id'] == service.backend_id:
found = True
if container['status'] == 'exited':
log.info('resetting status of service {}, died with no event'.format(service.name))
service.set_backend_status(service.BACKEND_DIE_STATUS)
if not found:
service.set_backend_status(service.BACKEND_DESTROY_STATUS)
def run(self):
"""The thread loop."""
log.info("Checker thread started")
swarm = SwarmClient(get_conf())
while not self.stop:
service_list = self.state.service_list()
container_list = swarm.list(only_label={'zoe.deployment_name': get_conf().deployment_name})
for service in service_list:
assert isinstance(service, Service)
if service.backend_status == service.BACKEND_DESTROY_STATUS or service.backend_status == service.BACKEND_DIE_STATUS:
continue
self._find_dead_service(container_list, service)
time.sleep(CHECK_INTERVAL)
def quit(self):
"""Stops the thread."""
self.stop = True
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment