Commit 497650cb authored by Daniele Venzano's avatar Daniele Venzano

Move everything back to one repository for simpler deployment and easier version management

parent ccfed885
......@@ -59,3 +59,4 @@ docs/_build/
target/
.idea/
state.zoe
*.conf
language: python
python:
- "3.4"
- "3.5"
install:
- pip install -r requirements.txt
before_script:
script:
- PYTHONPATH=. py.test --cov=zoe_cmd --cov=zoe_lib --cov=zoe_web --cov=zoe_scheduler --cov=zoe_observer
......@@ -3,28 +3,36 @@ Zoe - Container-based Analytics as a Service
Zoe provides a simple way to provision data analytics applications using Docker Swarm.
This is the main repository, it contains the documentation and a number of scripts, useful to install and develop Zoe.
We are in the process of doing a major refactoring of the entire codebase and the HEAD version is not fully tested.
Resources:
- Main website: http://zoe-analytics.eu
- Documentation: http://docs.zoe-analytics.eu
- How to install: http://zoe-analytics.readthedocs.org/en/latest/install.html (refers to the 0.8.92 version)
Zoe is a distributed application and each component is developed in a separate Git repository.
- Zoe clients: https://github.com/DistributedSystemsGroup/zoe-client
- Zoe scheduler: https://github.com/DistributedSystemsGroup/zoe-scheduler
- How to install: http://zoe-analytics.readthedocs.org/en/latest/install.html
Zoe can use any Docker image, but we provide some for the pre-configured applications available in the client (Spark and HDFS):
- Docker images: https://github.com/DistributedSystemsGroup/zoe-docker-images
|Documentation Status|
Repository contents
-------------------
- `contrib`: supervisord config files
- `docs`: Sphinx documentation
- `scripts`: Scripts used to test Zoe images outside of Zoe
- `zoe_cmd`: Command-line client
- `zoe_lib`: Client-side library, contains also some modules needed by the observer and the scheduler processes
- `zoe_observer`: The Observer process that monitors Swarm and informs the scheduler of various events
- `zoe_scheduler`: The core of Zoe, the server process that listens for client requests and creates the containers on Swarm
- `zoe_web`: The web client interface
|Travis build| |Documentation Status| |Requirements Status|
Zoe is licensed under the terms of the Apache 2.0 license.
.. |Documentation Status| image:: https://readthedocs.org/projects/zoe-analytics/badge/?version=latest
:target: https://readthedocs.org/projects/zoe-analytics/?badge=latest
.. |Requirements Status| image:: https://requires.io/github/DistributedSystemsGroup/zoe/requirements.svg?branch=master
:target: https://requires.io/github/DistributedSystemsGroup/zoe/requirements/?branch=master
:alt: Requirements Status
.. |Travis build| image:: https://travis-ci.org/DistributedSystemsGroup/zoe.svg
:target: https://travis-ci.org/DistributedSystemsGroup/zoe
\ No newline at end of file
#!/usr/bin/env bash
set -e
echo "Get statistics"
./zoe.py stats
echo "Get superuser"
./zoe.py user-get zoeadmin
echo "Create a new user"
./zoe.py user-new --name test --password test --role guest
echo "Delete a user"
./zoe.py user-rm test
echo "Export an application template"
./zoe.py pre-app-export hdfs > /tmp/zoe-hdfs.json
echo "Upload the template as a new application"
./zoe.py app-new /tmp/zoe-hdfs.json
echo "Get the application back from Zoe"
./zoe.py app-get hdfs
echo "Delete the app"
./zoe.py app-rm hdfs
#!/usr/bin/python3
# Copyright (c) 2016, Daniele Venzano
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import multiprocessing
import time
from zoe_lib.exceptions import ZoeException
log = logging.getLogger('ZoeAIO')
def start_sched():
from zoe_scheduler.entrypoint import main
main()
def start_observer():
from zoe_observer.entrypoint import main
main()
sched = multiprocessing.Process(target=start_sched, name='scheduler')
obs = multiprocessing.Process(target=start_observer, name='observer')
sched.start()
obs.start()
try:
while True:
procs = multiprocessing.active_children()
if len(procs) == 0:
raise ZoeException('All Zoe processes crashed, exiting')
elif len(procs) == 1:
raise ZoeException('A process crashed, terminating {} that is still alive'.format(procs[0].name))
else:
time.sleep(1)
except ZoeException as e:
log.error(str(e))
except KeyboardInterrupt:
log.info('CTRL-C detected, exiting...')
finally:
sched.terminate()
obs.terminate()
#!/usr/bin/python3
# Copyright (c) 2016, Daniele Venzano
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from zoe_observer.entrypoint import main
if __name__ == '__main__':
main()
#!/usr/bin/python3
# Copyright (c) 2016, Daniele Venzano
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from zoe_scheduler.entrypoint import main
if __name__ == '__main__':
main()
#!/usr/bin/env python3
# Copyright (c) 2016, Daniele Venzano
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from zoe_web.entrypoint import zoe_web_main
if __name__ == "__main__":
zoe_web_main()
#!/usr/bin/env python3
# Copyright (c) 2016, Daniele Venzano
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from zoe_cmd.entrypoint import zoe
if __name__ == "__main__":
zoe()
This diff is collapsed.
import os
def zoe_url():
return os.environ['ZOE_URL']
def zoe_user():
return os.environ['ZOE_USER']
def zoe_pass():
return os.environ['ZOE_PASS']
"""
This package contains all the Zoe client API.
"""
import requests
import requests.exceptions
from zoe_lib.version import ZOE_API_VERSION
from zoe_lib.exceptions import ZoeAPIException
class ZoeAPIBase:
def __init__(self, url, user, password):
self.url = url
self.user = user
self.password = password
def _rest_get(self, path: str):
url = self.url + '/api/' + ZOE_API_VERSION + path
try:
r = requests.get(url, auth=(self.user, self.password))
except requests.exceptions.Timeout:
raise ZoeAPIException('HTTP connection timeout')
except requests.exceptions.HTTPError:
raise ZoeAPIException('Invalid HTTP response')
except requests.exceptions.ConnectionError as e:
raise ZoeAPIException('Connection error: {}'.format(e))
try:
data = r.json()
except ValueError:
data = None
return data, r.status_code
def _rest_post(self, path, payload):
url = self.url + '/api/' + ZOE_API_VERSION + path
try:
r = requests.post(url, auth=(self.user, self.password), json=payload)
except requests.exceptions.Timeout:
raise ZoeAPIException('HTTP connection timeout')
except requests.exceptions.HTTPError:
raise ZoeAPIException('Invalid HTTP response')
except requests.exceptions.ConnectionError as e:
raise ZoeAPIException('Connection error: {}'.format(e))
try:
data = r.json()
except ValueError:
data = None
return data, r.status_code
def _rest_delete(self, path):
url = self.url + '/api/' + ZOE_API_VERSION + path
try:
r = requests.delete(url, auth=(self.user, self.password))
except requests.exceptions.Timeout:
raise ZoeAPIException('HTTP connection timeout')
except requests.exceptions.HTTPError:
raise ZoeAPIException('Invalid HTTP response')
except requests.exceptions.ConnectionError as e:
raise ZoeAPIException('Connection error: {}'.format(e))
try:
data = r.json()
except ValueError:
data = None
return data, r.status_code
# Copyright (c) 2015, Daniele Venzano
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module all application-related API calls for Zoe clients.
Applications are tracked by client code, in the client database. The Zoe scheduler component
will receive a valid application description and has no need to access or modify the
application state.
"""
import logging
from zoe_lib import ZoeAPIBase
from zoe_lib.exceptions import ZoeAPIException, InvalidApplicationDescription
log = logging.getLogger(__name__)
class ZoeApplicationAPI(ZoeAPIBase):
def get(self, application_id: int) -> dict:
"""
Return an Application object
:param application_id: the identifier of the application
:return: the application dict
"""
data, status_code = self._rest_get('/application/' + str(application_id))
if status_code == 200:
return data
else:
raise ZoeAPIException(data['message'])
def create(self, description: dict) -> int:
"""
Create a new application and commit it to the database.
:param description: the application description
:return: the new application ID
"""
self._app_check(description)
data, status_code = self._rest_post('/application', description)
if status_code != 201:
raise ZoeAPIException(data['message'])
return data['application_id']
def delete(self, application_id: int):
"""
If the application does not exists an error will be logged.
:param application_id: the application to delete
"""
data, status_code = self._rest_delete('/application/' + str(application_id))
if status_code != 204:
raise ZoeAPIException(data['message'])
def _app_check(self, data):
required_keys = ['name', 'will_end', 'priority', 'requires_binary', 'version']
for k in required_keys:
if k not in data:
raise InvalidApplicationDescription(msg="Missing required key: %s" % k)
try:
int(data["version"])
except ValueError:
raise InvalidApplicationDescription(msg="version field should be an int")
try:
bool(data['will_end'])
except ValueError:
raise InvalidApplicationDescription(msg="will_end field must be a boolean")
try:
bool(data['requires_binary'])
except ValueError:
raise InvalidApplicationDescription(msg="requires_binary field must be a boolean")
try:
priority = int(data['priority'])
except ValueError:
raise InvalidApplicationDescription(msg="priority field must be an int")
if priority < 0 or priority > 1024:
raise InvalidApplicationDescription(msg="priority must be between 0 and 1024")
for p in data['processes']:
self._process_check(p)
found_monitor = False
for p in data['processes']:
if p['monitor']:
found_monitor = True
break
if not found_monitor:
raise InvalidApplicationDescription(msg="at least one process should have monitor set to True")
def _process_check(self, data):
required_keys = ['name', 'docker_image', 'monitor', 'ports', 'required_resources']
for k in required_keys:
if k not in data:
raise InvalidApplicationDescription(msg="Missing required key: %s" % k)
try:
bool(data['monitor'])
except ValueError:
raise InvalidApplicationDescription(msg="monitor field should be a boolean")
if not hasattr(data['ports'], '__iter__'):
raise InvalidApplicationDescription(msg='ports should be a list')
for pp in data['ports']:
self._port_check(pp)
if not isinstance(data['required_resources'], dict):
raise InvalidApplicationDescription(msg="required_resources should be a dictionary")
if 'memory' not in data['required_resources']:
raise InvalidApplicationDescription(msg="Missing required key: required_resources -> memory")
try:
int(data['required_resources']['memory'])
except ValueError:
raise InvalidApplicationDescription(msg="required_resources -> memory field should be an int")
if 'environment' in data:
if not hasattr(data['environment'], '__iter__'):
raise InvalidApplicationDescription(msg='environment should be an iterable')
for e in data['environment']:
if len(e) != 2:
raise InvalidApplicationDescription(msg='environment variable should have a name and a value')
if not isinstance(e[0], str):
raise InvalidApplicationDescription(msg='environment variable names must be strings: {}'.format(e[0]))
if not isinstance(e[1], str):
raise InvalidApplicationDescription(msg='environment variable values must be strings: {}'.format(e[1]))
def _port_check(self, data):
required_keys = ['name', 'protocol', 'port_number', 'is_main_endpoint']
for k in required_keys:
if k not in data:
raise InvalidApplicationDescription(msg="Missing required key: %s" % k)
try:
int(data['port_number'])
except ValueError:
raise InvalidApplicationDescription(msg="port_number field should be an integer")
try:
bool(data['is_main_endpoint'])
except ValueError:
raise InvalidApplicationDescription(msg="is_main_endpoint field should be a boolean")
This diff is collapsed.
# Copyright (c) 2016, Daniele Venzano
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module contains all container-related API calls that a Zoe client can use.
"""
import logging
from zoe_lib import ZoeAPIBase
from zoe_lib.exceptions import ZoeAPIException
log = logging.getLogger(__name__)
class ZoeContainerAPI(ZoeAPIBase):
def get(self, container_id: int) -> dict:
"""
Retrieve container state.
:param container_id: the container to query
:return:
"""
c, status_code = self._rest_get('/container/' + str(container_id))
if status_code == 200:
return c
elif status_code == 404:
raise ZoeAPIException('container "{}" not found'.format(container_id))
else:
raise ZoeAPIException('error retrieving container {}'.format(container_id))
def log(self, container_id: int) -> str:
"""
Get the standard output/error of the processes running in the given container.
:param container_id: the container to examine
:return: a string containing the log
"""
q = {
'what': 'container logs',
'filters': {'id': container_id}
}
data, status_code = self._rest_post('/query', q)
if status_code != 200:
raise ZoeAPIException(data['message'])
else:
return data[0]
def stats(self, container_id: int) -> dict:
"""
Get low-level statistics about a container. These come directly from Docker.
:param container_id: The container to examine
:return: the statistics. The format of this dictionary is not set in stone and could change.
"""
q = {
'what': 'container stats',
'filters': {'id': container_id}
}
data, status_code = self._rest_post('/query', q)
if status_code != 200:
raise ZoeAPIException(data['message'])
else:
return data
class ZoeException(Exception):
def __init__(self, value='Something happened'):
self.value = value
def __str__(self):
return self.value
class ZoeAPIException(Exception):
def __init__(self, message):
self.message = message
def __str__(self):
return repr(self.message)
class InvalidApplicationDescription(ZoeAPIException):
def __init__(self, msg):
self.message = msg
class ZoeRestAPIException(ZoeException):
def __init__(self, message, status_code=400, headers=None):
super().__init__(value=message)
self.status_code = status_code
self.headers = headers
# Copyright (c) 2016, Daniele Venzano
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module contains all application execution-related API calls for Zoe clients.
Executions are the domain of the Zoe Scheduler, so all calls in this module require
IPC to the Zoe Scheduler.
"""
import logging
from zoe_lib import ZoeAPIBase
from zoe_lib.exceptions import ZoeAPIException
from zoe_lib.query import ZoeQueryAPI
log = logging.getLogger(__name__)
class ZoeExecutionsAPI(ZoeAPIBase):
def terminate(self, execution_id: int) -> bool:
"""
Terminates an execution.
:param execution_id: the execution to delete
:return: True is the operation was successful, False otherwise
"""
data, status_code = self._rest_delete('/execution/' + str(execution_id))
if status_code == 204:
return
else:
raise ZoeAPIException(data['message'])
def list(self):
"""
Returns a list of all executions for the calling user, all of them if the user is admin.
:return:
"""
data, status_code = self._rest_get('/execution')
if status_code == 200:
return data
else:
raise ZoeAPIException(data['message'])
def execution_get(self, execution_id: int) -> dict:
"""
Retrieve the Execution object for an existing execution.
:param execution_id: the execution to load from the scheduler
:return: the Execution object, or None
"""
data, status_code = self._rest_get('/execution/' + str(execution_id))
if status_code == 200:
return data
else:
return None
def execution_start(self, name: str, application_name: str) -> int:
"""
Submit an application to the scheduler to start a new execution.
:param name: user-provided name of the execution
:param application_name: the application to start
:return: the new Execution object, or None in case of error
"""
api_query = ZoeQueryAPI(self.url, self.user, self.password)
data = api_query.query('application', name=application_name)
if len(data) == 0:
raise ZoeAPIException('No such application')
app = data[0]
execution = {
"application_id": app['id'],
'name': name
}
data, status_code = self._rest_post('/execution', execution)
if status_code != 201:
raise ZoeAPIException(data['message'])
else:
return data['execution_id']
# def execution_exposed_url(execution: Execution) -> str:
# """
# Get the first main endpoint for a given application execution formatted as a URL.
#
# :param execution: the execution to use to build the URL