Commit 92783187 authored by Daniele Venzano's avatar Daniele Venzano

Move out all source in other repositories, update README

parent d1fbc62d
......@@ -58,5 +58,4 @@ docs/_build/
# PyBuilder
target/
.idea/
zoe.conf
rndc.key
language: python
python:
- "3.4"
install:
- pip install -r requirements.txt
- pip install pytest
before_script:
- bash tests/resources/create_db.sh
script:
- PYTHONPATH=. py.test --test-environment travis --cov=zoe_scheduler --cov=zoe_client --cov=zoe_web
- PYTHONPATH=. sphinx-build -nW -b html -d docs/_build/doctrees docs/ docs/_build/html
Zoe - Container-based Analytics as a Service
============================================
Zoe provides a simple way to provision data analytics clusters and
workflows using container-based (Docker) virtualization. The guiding
principles are:
- ease of use: data scientists know about data and applications,
systems and resource constraints should be kept out of the way
- ease of administration: we have a strong background in systems and
network administration, so we put all effort possible to make Zoe
easy to install and maintain
- use well-known technologies: we try hard not to reinvent the wheel,
we use Python, ZeroMQ, Docker and DNS
- a clear roadmap: our short and long-term objectives should always be
clear and well defined
- openness: the source code is open: clone, modify, discuss, test and
contribute, you are welcome!
Zoe provides a simple way to provision data analytics applications using Docker Swarm.
This is the main repository, it contains the documentation and a number of scripts, useful to install and develop Zoe.
We are in the process of updating documentation and scripts. For now you can refer to the version tagged 0.8.92 in this repository,
when all components where still together.
Resources:
- Documentation: http://zoe-analytics.readthedocs.org/
- Docker images:
https://github.com/DistributedSystemsGroup/zoe-docker-images
- Main website: http://zoe-analytics.eu
- Documentation: http://docs.zoe-analytics.eu
- How to install: http://zoe-analytics.readthedocs.org/en/latest/install.html
Zoe is a distributed application and each component is developed in a separate Git repository.
- Zoe clients: https://github.com/DistributedSystemsGroup/zoe-client
- Zoe scheduler: https://github.com/DistributedSystemsGroup/zoe-scheduler
- Zoe object storage: https://github.com/DistributedSystemsGroup/zoe-object-storage
Zoe can use any Docker image, but we provide some for the preconfigured applications available in the web interface:
- Docker images: https://github.com/DistributedSystemsGroup/zoe-docker-images
|Pypi version| |Python version| |Documentation Status| |Requirements Status|
|Documentation Status|
Zoe is licensed under the terms of the Apache 2.0 license.
.. |Pypi version| image:: https://img.shields.io/pypi/v/zoe-analytics.svg
:target: https://pypi.python.org/pypi/zoe-analytics
.. |Python version| image:: https://img.shields.io/pypi/pyversions/Zoe.svg
:target: https://pypi.python.org/pypi/zoe-analytics
.. |Documentation Status| image:: https://readthedocs.org/projects/zoe-analytics/badge/?version=latest
:target: https://readthedocs.org/projects/zoe-analytics/?badge=latest
.. |Requirements Status| image:: https://requires.io/github/DistributedSystemsGroup/zoe/requirements.svg?branch=master
:target: https://requires.io/github/DistributedSystemsGroup/zoe/requirements/?branch=master
import logging
from common.exceptions import InvalidApplicationDescription
log = logging.getLogger(__name__)
class ZoeApplication:
def __init__(self):
self.name = ''
self.version = 0
self.will_end = True
self.priority = 512
self.requires_binary = False
self.processes = []
@classmethod
def from_dict(cls, data):
ret = cls()
try:
ret.version = int(data["version"])
except ValueError:
raise InvalidApplicationDescription(msg="version field should be an int")
except KeyError:
raise InvalidApplicationDescription(msg="Missing required key: version")
required_keys = ['name', 'will_end', 'priority', 'requires_binary']
for k in required_keys:
try:
setattr(ret, k, data[k])
except KeyError:
raise InvalidApplicationDescription(msg="Missing required key: %s" % k)
try:
ret.will_end = bool(ret.will_end)
except ValueError:
raise InvalidApplicationDescription(msg="will_end field must be a boolean")
try:
ret.requires_binary = bool(ret.requires_binary)
except ValueError:
raise InvalidApplicationDescription(msg="requires_binary field must be a boolean")
try:
ret.priority = int(ret.priority)
except ValueError:
raise InvalidApplicationDescription(msg="priority field must be an int")
if ret.priority < 0 or ret.priority > 1024:
raise InvalidApplicationDescription(msg="priority must be between 0 and 1024")
for p in data['processes']:
ret.processes.append(ZoeApplicationProcess.from_dict(p))
found_monitor = False
for p in ret.processes:
if p.monitor:
found_monitor = True
break
if not found_monitor:
raise InvalidApplicationDescription(msg="at least one process should have monitor set to True")
return ret
def to_dict(self) -> dict:
ret = {
'name': self.name,
'version': self.version,
'will_end': self.will_end,
'priority': self.priority,
'requires_binary': self.requires_binary,
'processes': []
}
for p in self.processes:
ret['processes'].append(p.to_dict())
return ret
def total_memory(self) -> int:
memory = 0
for p in self.processes:
memory += p.required_resources['memory']
return memory
def container_count(self) -> int:
return len(self.processes)
class ZoeProcessEndpoint:
def __init__(self):
self.name = ''
self.protocol = ''
self.port_number = 0
self.path = ''
self.is_main_endpoint = False
def to_dict(self) -> dict:
return {
'name': self.name,
'protocol': self.protocol,
'port_number': self.port_number,
'path': self.path,
'is_main_endpoint': self.is_main_endpoint
}
@classmethod
def from_dict(cls, data):
ret = cls()
required_keys = ['name', 'protocol', 'port_number', 'is_main_endpoint']
for k in required_keys:
try:
setattr(ret, k, data[k])
except KeyError:
raise InvalidApplicationDescription(msg="Missing required key: %s" % k)
try:
ret.port_number = int(ret.port_number)
except ValueError:
raise InvalidApplicationDescription(msg="port_number field should be an integer")
try:
ret.is_main_endpoint = bool(ret.is_main_endpoint)
except ValueError:
raise InvalidApplicationDescription(msg="is_main_endpoint field should be a boolean")
if 'path' in data:
ret.path = data['path']
return ret
def get_url(self, address):
return self.protocol + "://" + address + ":{}".format(self.port_number) + self.path
class ZoeApplicationProcess:
def __init__(self):
self.name = ''
self.version = 0
self.docker_image = ''
self.monitor = False # if this process dies, the whole application is considered as complete and the execution is terminated
self.ports = [] # A list of ZoeProcessEndpoint
self.required_resources = {}
self.environment = [] # Environment variables to pass to Docker
self.command = None # Commandline to pass to the Docker container
def to_dict(self) -> dict:
ret = {
'name': self.name,
'version': self.version,
'docker_image': self.docker_image,
'monitor': self.monitor,
'ports': [p.to_dict() for p in self.ports],
'required_resources': self.required_resources.copy(),
'environment': self.environment.copy(),
'command': self.command
}
return ret
@classmethod
def from_dict(cls, data):
ret = cls()
try:
ret.version = int(data["version"])
except ValueError:
raise InvalidApplicationDescription(msg="version field should be an int")
except KeyError:
raise InvalidApplicationDescription(msg="Missing required key: version")
required_keys = ['name', 'docker_image', 'monitor']
for k in required_keys:
try:
setattr(ret, k, data[k])
except KeyError:
raise InvalidApplicationDescription(msg="Missing required key: %s" % k)
try:
ret.monitor = bool(ret.monitor)
except ValueError:
raise InvalidApplicationDescription(msg="monitor field should be a boolean")
if 'ports' not in data:
raise InvalidApplicationDescription(msg="Missing required key: ports")
if not hasattr(data['ports'], '__iter__'):
raise InvalidApplicationDescription(msg='ports should be an iterable')
for pp in data['ports']:
ret.ports.append(ZoeProcessEndpoint.from_dict(pp))
if 'required_resources' not in data:
raise InvalidApplicationDescription(msg="Missing required key: required_resources")
if not isinstance(data['required_resources'], dict):
raise InvalidApplicationDescription(msg="required_resources should be a dictionary")
if 'memory' not in data['required_resources']:
raise InvalidApplicationDescription(msg="Missing required key: required_resources -> memory")
ret.required_resources = data['required_resources'].copy()
try:
ret.required_resources['memory'] = int(ret.required_resources['memory'])
except ValueError:
raise InvalidApplicationDescription(msg="required_resources -> memory field should be an int")
if 'environment' in data:
if not hasattr(data['environment'], '__iter__'):
raise InvalidApplicationDescription(msg='environment should be an iterable')
ret.environment = data['environment'].copy()
for e in ret.environment:
if len(e) != 2:
raise InvalidApplicationDescription(msg='environment variable should have a name and a value')
if not isinstance(e[0], str):
raise InvalidApplicationDescription(msg='environment variable names must be strings: {}'.format(e[0]))
if not isinstance(e[1], str):
raise InvalidApplicationDescription(msg='environment variable values must be strings: {}'.format(e[1]))
if 'command' in data:
ret.command = data['command']
return ret
def exposed_endpoint(self) -> ZoeProcessEndpoint:
for p in self.ports:
assert isinstance(p, ZoeProcessEndpoint)
if p.is_main_endpoint:
return p
return None
# Taken from https://github.com/bw2/ConfigArgParse
import argparse
from collections import OrderedDict
import os
import re
import sys
import types
from io import StringIO
__version__ = "0.9.4"
ACTION_TYPES_THAT_DONT_NEED_A_VALUE = {argparse._StoreTrueAction,
argparse._StoreFalseAction, argparse._CountAction,
argparse._StoreConstAction, argparse._AppendConstAction}
# global ArgumentParser instances
_parsers = {}
def initArgumentParser(name=None, **kwargs):
"""Creates a global ArgumentParser instance with the given name,
passing any args other than "name" to the ArgumentParser constructor.
This instance can then be retrieved using getArgumentParser(..)
"""
if name is None:
name = "default"
if name in _parsers:
raise ValueError("kwargs besides 'name' can only be passed in the first time. '%s' ArgumentParser already exists: %s" % (name, _parsers[name]))
kwargs.setdefault('formatter_class', argparse.ArgumentDefaultsHelpFormatter)
kwargs.setdefault('conflict_handler', 'resolve')
_parsers[name] = ArgumentParser(**kwargs)
def getArgumentParser(name=None, **kwargs):
"""Returns the global ArgumentParser instance with the given name. The 1st
time this function is called, a new ArgumentParser instance will be created
for the given name, and any args other than "name" will be passed on to the
ArgumentParser constructor.
"""
if name is None:
name = "default"
if len(kwargs) > 0 or name not in _parsers:
initArgumentParser(name, **kwargs)
return _parsers[name]
class ArgumentDefaultsRawHelpFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawTextHelpFormatter, argparse.RawDescriptionHelpFormatter):
"""HelpFormatter that adds default values AND doesn't do line-wrapping"""
pass
# used while parsing args to keep track of where they came from
_COMMAND_LINE_SOURCE_KEY = "command_line"
_ENV_VAR_SOURCE_KEY = "environment_variables"
_CONFIG_FILE_SOURCE_KEY = "config_file"
_DEFAULTS_SOURCE_KEY = "defaults"
class ArgumentParser(argparse.ArgumentParser):
"""Drop-in replacement for argparse.ArgumentParser that adds support for
environment variables and .ini or .yaml-style config files.
"""
def __init__(self,
prog=None,
usage=None,
description=None,
epilog=None,
version=None,
parents=[],
formatter_class=argparse.HelpFormatter,
prefix_chars='-',
fromfile_prefix_chars=None,
argument_default=None,
conflict_handler='error',
add_help=True,
add_config_file_help=True,
add_env_var_help=True,
auto_env_var_prefix=None,
default_config_files=[],
ignore_unknown_config_file_keys=False,
allow_unknown_config_file_keys=False, # deprecated
args_for_setting_config_path=[],
config_arg_is_required=False,
config_arg_help_message="config file path",
args_for_writing_out_config_file=[],
write_out_config_file_arg_help_message="takes the current command line "
"args and writes them out to a config file at the given path, then "
"exits"
):
"""Supports all the same args as the argparse.ArgumentParser
constructor, as well as the following additional args.
Additional Args:
add_config_file_help: Whether to add a description of config file
syntax to the help message.
add_env_var_help: Whether to add something to the help message for
args that can be set through environment variables.
auto_env_var_prefix: If set to a string instead of None, all config-
file-settable options will become also settable via environment
variables whose names are this prefix followed by the config
file key, all in upper case. (eg. setting this to "foo_" will
allow an arg like "--arg1" to also be set via env. var FOO_ARG1)
default_config_files: When specified, this list of config files will
be parsed in order, with the values from each config file
taking precedence over pervious ones. This allows an application
to look for config files in multiple standard locations such as
the install directory, home directory, and current directory:
["<install dir>/app_config.ini",
"~/.my_app_config.ini",
"./app_config.txt"]
ignore_unknown_config_file_keys: If true, settings that are found
in a config file but don't correspond to any defined
configargparse args will be ignored. If false, they will be
processed and appended to the commandline like other args, and
can be retrieved using parse_known_args() instead of parse_args()
allow_unknown_config_file_keys:
@deprecated
Use ignore_unknown_config_file_keys instead.
If true, settings that are found in a config file but don't
correspond to any defined configargparse args, will still be
processed and appended to the command line (eg. for
parsing with parse_known_args()). If false, they will be ignored.
args_for_setting_config_path: A list of one or more command line
args to be used for specifying the config file path
(eg. ["-c", "--config-file"]). Default: []
config_arg_is_required: When args_for_setting_config_path is set,
set this to True to always require users to provide a config path.
config_arg_help_message: the help message to use for the
args listed in args_for_setting_config_path.
args_for_writing_out_config_file: A list of one or more command line
args to use for specifying a config file output path. If
provided, these args cause configargparse to write out a config
file with settings based on the other provided commandline args,
environment variants and defaults, and then to exit.
(eg. ["-w", "--write-out-config-file"]). Default: []
write_out_config_file_arg_help_message: The help message to use for
the args in args_for_writing_out_config_file.
"""
self._add_config_file_help = add_config_file_help
self._add_env_var_help = add_env_var_help
self._auto_env_var_prefix = auto_env_var_prefix
# extract kwargs that can be passed to the super constructor
kwargs_for_super = {k: v for k, v in locals().items() if k in [
"prog", "usage", "description", "epilog", "version", "parents",
"formatter_class", "prefix_chars", "fromfile_prefix_chars",
"argument_default", "conflict_handler", "add_help"]}
if sys.version_info >= (3, 3) and "version" in kwargs_for_super:
del kwargs_for_super["version"] # version arg deprecated in v3.3
argparse.ArgumentParser.__init__(self, **kwargs_for_super)
# parse the additional args
self._default_config_files = default_config_files
self._ignore_unknown_config_file_keys = ignore_unknown_config_file_keys \
or allow_unknown_config_file_keys
if args_for_setting_config_path:
self.add_argument(*args_for_setting_config_path, dest="config_file",
required=config_arg_is_required, help=config_arg_help_message,
is_config_file_arg=True)
if args_for_writing_out_config_file:
self.add_argument(*args_for_writing_out_config_file,
dest="write_out_config_file_to_this_path",
metavar="CONFIG_OUTPUT_PATH",
help=write_out_config_file_arg_help_message,
is_write_out_config_file_arg=True)
def parse_args(self, args=None, namespace=None,
config_file_contents=None, env_vars=os.environ):
"""Supports all the same args as the ArgumentParser.parse_args(..),
as well as the following additional args.
Additional Args:
args: a list of args as in argparse, or a string (eg. "-x -y bla")
config_file_contents: String. Used for testing.
env_vars: Dictionary. Used for testing.
"""
args, argv = self.parse_known_args(args=args,
namespace=namespace,
config_file_contents=config_file_contents,
env_vars=env_vars)
if argv:
self.error('unrecognized arguments: %s' % ' '.join(argv))
return args
def parse_known_args(self, args=None, namespace=None,
config_file_contents=None, env_vars=os.environ):
"""Supports all the same args as the ArgumentParser.parse_args(..),
as well as the following additional args.
Additional Args:
args: a list of args as in argparse, or a string (eg. "-x -y bla")
config_file_contents: String. Used for testing.
env_vars: Dictionary. Used for testing.
"""
if args is None:
args = sys.argv[1:]
elif type(args) == str:
args = args.split()
else:
args = list(args)
for a in self._actions:
a.is_positional_arg = not a.option_strings
# maps string describing the source (eg. env var) to a settings dict
# to keep track of where values came from (used by print_values())
self._source_to_settings = OrderedDict()
if args:
a_v_pair = (None, list(args)) # copy args list to isolate changes
self._source_to_settings[_COMMAND_LINE_SOURCE_KEY] = {'': a_v_pair}
# handle auto_env_var_prefix __init__ arg by setting a.env_var as needed
if self._auto_env_var_prefix is not None:
for a in self._actions:
config_file_keys = self.get_possible_config_keys(a)
if config_file_keys and not (a.env_var or a.is_positional_arg
or a.is_config_file_arg or a.is_write_out_config_file_arg):
stripped_config_file_key = config_file_keys[0].strip(
self.prefix_chars)
a.env_var = (self._auto_env_var_prefix +
stripped_config_file_key).upper()
# add env var settings to the commandline that aren't there already
env_var_args = []
actions_with_env_var_values = [a for a in self._actions
if not a.is_positional_arg and a.env_var and a.env_var in env_vars
and not already_on_command_line(args, a.option_strings)]
for action in actions_with_env_var_values:
key = action.env_var
value = env_vars[key]
env_var_args += self.convert_setting_to_command_line_arg(
action, key, value)
args += env_var_args
if env_var_args:
self._source_to_settings[_ENV_VAR_SOURCE_KEY] = OrderedDict(
[(a.env_var, (a, env_vars[a.env_var]))
for a in actions_with_env_var_values])
# prepare for reading config file(s)
known_config_keys = {config_key: action for action in self._actions
for config_key in self.get_possible_config_keys(action)}
# open the config file(s)
if config_file_contents:
stream = StringIO(config_file_contents)
stream.name = "method arg"
config_streams = [stream]
else:
config_streams = self._open_config_files(args)
# parse each config file
for stream in config_streams[::-1]:
try:
config_settings = self.parse_config_file(stream)
finally:
if hasattr(stream, "close"):
stream.close()
# add each config setting to the commandline unless it's there already
config_args = []
for key, value in config_settings.items():
if key in known_config_keys:
action = known_config_keys[key]
discard_this_key = already_on_command_line(
args, action.option_strings)
else:
action = None
discard_this_key = self._ignore_unknown_config_file_keys or \
already_on_command_line(
args,
self.get_command_line_key_for_unknown_config_file_setting(key))
if not discard_this_key:
config_args += self.convert_setting_to_command_line_arg(
action, key, value)
source_key = "%s|%s" % (_CONFIG_FILE_SOURCE_KEY, stream.name)
if source_key not in self._source_to_settings:
self._source_to_settings[source_key] = OrderedDict()
self._source_to_settings[source_key][key] = (action, value)
args += config_args
# save default settings for use by print_values()
default_settings = OrderedDict()
for action in self._actions:
cares_about_default_value = (not action.is_positional_arg or
action.nargs in [OPTIONAL, ZERO_OR_MORE])
if (already_on_command_line(args, action.option_strings) or
not cares_about_default_value or
action.default is None or
action.default == SUPPRESS or
type(action) in ACTION_TYPES_THAT_DONT_NEED_A_VALUE):
continue
else:
if action.option_strings:
key = action.option_strings[-1]
else:
key = action.dest
default_settings[key] = (action, str(action.default))
if default_settings:
self._source_to_settings[_DEFAULTS_SOURCE_KEY] = default_settings
# parse all args (including commandline, config file, and env var)
namespace, unknown_args = argparse.ArgumentParser.parse_known_args(
self, args=args, namespace=namespace)
# handle any args that have is_write_out_config_file_arg set to true
user_write_out_config_file_arg_actions = [a for a in self._actions
if getattr(a, "is_write_out_config_file_arg", False)]
if user_write_out_config_file_arg_actions:
output_file_paths = []
for action in user_write_out_config_file_arg_actions:
# check if the user specified this arg on the commandline
output_file_path = getattr(namespace, action.dest, None)
if output_file_path:
# validate the output file path
try:
with open(output_file_path, "w") as output_file:
output_file_paths.append(output_file_path)
except IOError as e:
raise ValueError("Couldn't open %s for writing: %s" % (
output_file_path, e))
if output_file_paths:
# generate the config file contents
contents = self.convert_parsed_args_to_config_file_contents(
self._source_to_settings, namespace)
for output_file_path in output_file_paths:
with open(output_file_path, "w") as output_file:
output_file.write(contents)
if len(output_file_paths) == 1:
output_file_paths = output_file_paths[0]
self.exit(0, "Wrote config file to " + str(output_file_paths))
return namespace, unknown_args
def parse_config_file(self, stream):
"""Parses a config file and return a dictionary of settings"""
settings = OrderedDict()
for i, line in enumerate(stream):
line = line.strip()
if not line or line[0] in ["#", ";", "["] or line.startswith("---"):