Commit 5b48b761 authored by Daniele Venzano's avatar Daniele Venzano
Browse files

Merge branch 'master' into new-scheduler

Conflicts:
	zoe_lib/exec_logs.py
	zoe_lib/swarm_client.py
	zoe_master/execution_manager.py
parents 3b1f53b8 016d9d21
......@@ -2,11 +2,12 @@
## Version 0.10.1
* Service discovery API endpoint: simple, read-only, unauthenticated access to a list DNS names of services. Needed for frameworks that need a list of hosts for configuration, can be used by scripts in the images.
* Add `start -s` option to the Zoe commandline client to have it wait for execution termination and stream the log meanwhile.
* Service discovery API endpoint: simple, read-only, unauthenticated access to a list of DNS names of services. Needed for frameworks that need a list of hosts for configuration, can be used by scripts in the images.
* Add `-s` option to the `start` command of the commandline client to have it wait for execution termination and stream the log meanwhile.
* Add the `workspace-deployment-path` option in the configuration file. It should be used when the workspace path should be build not with the deployment name, but with something else.
* All errors generated by docker while creating containers are now considered as fatal, except the "not enough free resource" one. When a fatal error is generated, Zoe will not try to start the execution again.
* Error messages generated by Docker are now exposed to the user
* (experimental) if the configuration option `service-log-path` is set, container output is saved from docker into the specified directory. If the logs are big, this can have a significant impact on execution termination time
## Version 0.10.0
......
#!/usr/bin/env bash
set -e
export ZOE_USER=admin
export ZOE_PASS=changeme
export ZOE_URL=http://localhost:4850
echo "Get statistics"
./zoe.py stats
echo "Get superuser"
./zoe.py user-get zoeadmin
echo "Create a new user"
./zoe.py user-new --name test --password test --role guest
echo "Delete a user"
./zoe.py user-rm test
echo "Export an application template"
./zoe.py pre-app-export hdfs > /tmp/zoe-hdfs.json
#!/usr/bin/env bash
PYTHONPATH=. sphinx-build -nW -b html -d docs/_build/doctrees docs/ docs/_build/html
......@@ -21,6 +21,8 @@ import os
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, os.path.abspath(os.path.join(os.path.basename(__file__), "..", "..")))
from zoe_lib.version import ZOE_VERSION
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
......@@ -58,7 +60,7 @@ author = 'Daniele Venzano'
# built documents.
#
# The short X.Y version.
version = '0.8.92'
version = ZOE_VERSION
# The full version, including alpha/beta/rc tags.
release = version
......
.. _contributing:
How to contribute
=================
Contributing to Zoe
===================
Zoe applications
Zoe is an open source and project: we welcome any kind of contribution to the code base, the documentation, on the general architecture. Bug reports and features requests are also accepted and treasured.
To better work together we have established some rules on how to contribute.
Bug reports and feature requests
--------------------------------
Bug reports and feature requests are handled through the GitHub issue system.
For Zoe itself, issues should be reported here: `https://github.com/DistributedSystemsGroup/zoe/issues <https://github.com/DistributedSystemsGroup/zoe/issues>`_
For the sample Zoe applications we provide, reports should be sent here: `https://github.com/DistributedSystemsGroup/zoe-applications/issues <https://github.com/DistributedSystemsGroup/zoe-applications/issues>`_
The issue system should be used for only for bug reports or feature requests. If you have more general questions, you need help setting up Zoe or running some application, please use the mailing list.
The mailing list
----------------
Zoe applications are maintained in the `zoe-applications <https://github.com/DistributedSystemsGroup/zoe-applications>`_ repository, feel free to fork it and generate pull requests for new applications, frameworks and services.
The first step is to subscribe to the mailing list: `http://www.freelists.org/list/zoe <http://www.freelists.org/list/zoe>`_
Check also the :ref:`howto_zapp` document for help on building ZApps from the already-available services and the :ref:`zapp_format`.
Use the mailing list to stay up-to-date with what other developers are working on, to discuss and propose your ideas. We prefer small and incremental contributions, so it is important to keep in touch with the rest of the community to receive feedback. This way your contribution will be much more easily accepted.
Developer documentation
-----------------------
Code and documentation contributions
------------------------------------
:ref:`modindex`
To contribute code and/or documentation you should follow this workflow:
.. toctree::
:maxdepth: 2
1. announce your idea on the mailing list, to prevent duplicated work
2. fork the Zoe repository via GitHub (if you don't already have a fork)
3. create a branch that will hold your changes
4. ... develop and debug ...
5. generate a pull request via GitHub
developer/introduction
developer/zapp_format
developer/rest-api
developer/auth
developer/api-endpoint
developer/master-api
developer/scheduler
Zoe maintainers will review pull requests, give constructive feedback and eventually merge them.
Zoe - Container-based Analytics as a Service
============================================
Zoe uses `Docker Swarm <https://docs.docker.com/swarm/>`_ to run Analytics as a Service applications.
Zoe is a user facing software that hides the complexities of managing resources, configuring and deploying complex distributed applications on private clouds. The main focus are data analysis applications, such as `Spark <http://spark.apache.org/>`_, but Zoe has a very flexible application description format that lets you easily describe any kind of application.
Zoe is fast: it can create a fully-functional Spark cluster of 20 nodes in less than five seconds.
Zoe uses containerization technology to provide fast startup times and process isolation. A smart scheduler is able to prioritize executions according to several policies, maximising the use of the available capacity and maintaining a queue of executions that are ready to run.
Zoe is easy to use: just a few clicks on a web interface is all that is needed to configure and start a variety of data-intensive applications. Applications are flexible compositions of Frameworks: for example Jupyter and Spark can be composed to form a Zoe application (a ZApp!).
Zoe is open: applications can be described by a JSON file, anything that can run in a Docker container can be run within Zoe (but we concentrate on data intensive applications).
Zoe is smart: not everyone has infinite resources like Amazon or Google, Zoe is built for small clouds, physical or virtual, and is built to maximize the use of available capacity.
Zoe can use a Docker Swarm located anywhere, on Amazon or in your own private cloud, and does not need exclusive access to it, meaning your Swarm could also be running other services: Zoe will not interfere with them. Zoe is meant as a private service, adding data-analytics capabilities to existing, or new, Docker clusters.
Zoe currently supports Docker Swarm as the container backend. It can be located anywhere, on Amazon or in your own private cloud, and Zoe does not need exclusive access to it, meaning your Swarm could also be running other services: Zoe will not interfere with them. Zoe is meant as a private service, adding data-analytics capabilities to new or existing clusters.
The core components of Zoe are application-independent and users are free to create and execute application descriptions for any kind of service combination. Zoe targets analytics services in particular: we offer a number of tested sample ZApps and Frameworks that can be used as starting examples.
......@@ -28,7 +22,18 @@ A number of predefined applications for testing and customization can be found a
Have a look at the :ref:`vision` and at the :ref:`roadmap` to see what we are currently planning and feel free to `contact us <daniele.venzano@eurecom.fr>`_ via email or through the `GitHub issue tracker <https://github.com/DistributedSystemsGroup/zoe/issues>`_ to pose questions or suggest ideas and new features.
Contents:
A note on terminology (needs to be updated)
-------------------------------------------
We are spending a lot of effort to use consistent naming throughout the documentation, the software, the website and all the other resources associated with Zoe. Check the :ref:`architecture` document for the details, but here is a quick reference:
* Zoe Components: the Zoe processes, the Master, the API and the service monitor
* Zoe Applications: a composition of Zoe Frameworks, is the highest-level entry in application descriptions that the use submits to Zoe, can be abbreviated in ZApp(s).
* Zoe Frameworks: a composition of Zoe Services, is used to describe re-usable pieces of Zoe Applications, like a Spark cluster
* Zoe Services: one to one with a Docker container, describes a single service/process tree running in an isolated container
Contents
--------
.. toctree::
:maxdepth: 2
......@@ -38,30 +43,50 @@ Contents:
logging
monitoring
architecture
howto_zapp
rest-api
vision
motivations
roadmap
contributing
Zoe applications
----------------
A note on terminology
---------------------
:ref:`modindex`
We are spending a lot of effort to use consistent naming throughout the documentation, the software, the website and all the other resources associated with Zoe. Check the :ref:`architecture` document for the details, but here is a quick reference:
.. toctree::
:maxdepth: 2
* Zoe Components: the Zoe processes, the Master, the API and the service monitor
* Zoe Applications: a composition of Zoe Frameworks, is the highest-level entry in application descriptions that the use submits to Zoe, can be abbreviated in ZApp(s).
* Zoe Frameworks: a composition of Zoe Services, is used to describe re-usable pieces of Zoe Applications, like a Spark cluster
* Zoe Services: one to one with a Docker container, describes a single service/process tree running in an isolated container
zapps/classification
zapps/howto_zapp
zapps/zapp_format
zapps/contributing
Developer documentation
-----------------------
:ref:`modindex`
.. toctree::
:maxdepth: 2
developer/introduction
developer/rest-api
developer/auth
developer/api-endpoint
developer/master-api
developer/scheduler
Contacts
========
`Zoe website <http://zoe-analytics.eu>`_
Zoe is developed as part of the research activities of the `Distributed Systems Group <http://distsysgroup.wordpress.com>`_ at `Eurecom <http://www.eurecom.fr>`_, in
Sophia Antipolis, France.
`Zoe mailing list <http://www.freelists.org/list/zoe>`_
About us
========
Zoe is developed as part of the research activities of the `Distributed Systems Group <http://distsysgroup.wordpress.com>`_ at `Eurecom <http://www.eurecom.fr>`_, in Sophia Antipolis, France.
The main discussion area for issues, questions and feature requests is the `GitHub issue tracker <https://github.com/DistributedSystemsGroup/zoe/issues>`_.
......@@ -3,11 +3,15 @@
Container logs
==============
By default Zoe does not involve itself with the output from container processes. The logs can be retrieved with the usual Docker command ``docker logs`` while a container is alive and then they are lost forever when the container is deleted. This solution however does not scale very well: users need to have access to the docker commandline tools and when containers produce lots of output Docker will fill-up the disk of whatever Docker host the container is running in.
By default Zoe does not involve itself with the output from container processes. The logs can be retrieved with the usual Docker command ``docker logs`` while a container is alive, they are lost forever when the container is deleted. This solution however does not scale very well: to examine logs, users need to have access to the docker commandline tools and to the Swarm they are running in.
Using the ``gelf-address`` option of the Zoe Master process, Zoe can configure Docker to send the container outputs to an external destination in GELF format. GELF is the richest format supported by Docker and can be ingested by a number of tools such as Graylog and Logstash. When that option is set all containers created by Zoe will send their output (standard output and standard error) to the destination specified.
To setup a more convenient loggin solution, Zoe provides two alternatives:
Docker is instructed to add all Zoe-defined tags to the GELF messages, so that they can be aggregated by Zoe execution, Zoe user, etc.
1. Using the ``gelf-address`` option, Zoe can configure Docker to send the container outputs to an external destination in GELF format. GELF is the richest format supported by Docker and can be ingested by a number of tools such as Graylog and Logstash. When that option is set all containers created by Zoe will send their output (standard output and standard error) to the destination specified. Docker is instructed to add all Zoe-defined tags to the GELF messages, so that they can be aggregated by Zoe execution, Zoe user, etc. A popular logging stack that supports GELF is `ELK <https://www.elastic.co/products>`_.
2. Using the ``service-log-path`` option: logs will be stored in the directory specified when the execution terminates. The directory can be exposed via http or NFS to give access to users. On the other hand, if the log are too big, Zoe will spend a big amount of time saving the data and resources will not be freed until the the copying process has not finished.
In our experience, web interfaces like Kibana or Graylog are not useful to the Zoe users: they want to quickly dig through logs of their executions to find an error or an interesting number to correlate to some other number in some other log. The web interfaces (option 1) are slow and cluttered compared to using grep on a text file (option 2).
Which alternative is good for you depends on the usage pattern of your users, your log auditing requirements, etc.
Optional Kafka support
----------------------
......@@ -17,3 +21,5 @@ Zoe also provides a Zoe Logger process, in case you prefer to use Kafka in your
The logger process is very small and simple, you can modify it to suit your needs and convert logs in any format to any destination you prefer. It lives in its own repository, here: https://github.com/DistributedSystemsGroup/zoe-logger
If you are interested in sending container output to Kafka, please make your voice heard at `this Docker issue <https://github.com/docker/docker/issues/21271>`_ for a more production-friendly Docker-Kafka integration.
Please note that the ``zoe-logger`` is more or less a toy and can be used as a starting point to develop a more robust and scalable solution. Also, it is currently unmaintained.
.. _motivation:
The motivation behind Zoe
=========================
The fundamental idea of Zoe is that a user who wants run data analytics applications should not be bothered by systems details, such as how to configure the amount of RAM a Spark Executor should use, how many cores are available in the system or even how many worker nodes should be used to meet an execution deadline.
Moreover final users require a lot of flexibility, they want ot test new analytics systems and algorithms as soon as possible, without having to wait for some approval procedure to go through the IT department. Zoe proposes a flexible model for applications descriptions: their management can be left entirely to the final user, but they can also prepared very quickly in all or in part by an IT department, who sometimes is more knowledgeable of resource limits and environment variables. We also plan to offer a number of building blocks (Zoe Frameworks) that can be composed to make Zoe Applications.
Finally we feel that there is a lack of solutions in the field of private clouds, where resources are not infinite and data layers (data-sets) may be shared between different users. All the current Open Source solutions we are aware of target the public cloud use case and try, more or less, to mimic what Amazon and other big names are doing in their data-centers.
Zoe strives to satisfy the following requirements:
* easy to use for the end-user
* easy to manage for the system administrator, easy to integrate in existing data-centers/clouds/VM deployments
* short (a few seconds) reaction times to user requests or other system events
* smart queuing and scheduling of applications when resources are critical
Kubernetes, OpenStack Sahara, Mesos and YARN are the projects that, each in its own way, come near Zoe, without solving the needs we have.
Kubernetes (Borg)
-----------------
Kubernetes is a very complex system, both to deploy and to use. It takes some of the architectural principles from Google Borg and targets data centers with vast amounts of resources. We feel that while Kubernetes can certainly run analytic services in containers, it does so at a very high complexity cost for smaller setups. Moreover, in our opinion, certain scheduler choices in how preemption is managed do not apply well to environments with a limited set of users and compute resources, causing a less than optimal resource usage.
OpenStack Sahara
----------------
We know well `OpenStack Sahara <https://wiki.openstack.org/wiki/Sahara>`_, as we have been using it since 2013 and we contributed the Spark plugin. We feel that Sahara has limitations in:
* software support: Sahara plugins support a limited set of data-intensive frameworks, adding a new one means writing a new Sahara plugin and even adding support for a new version requires going through a one-two week (on average) review process.
* lack of scheduling: Sahara makes the assumption that you have infinite resources. When you try to launch a new cluster and there are not enough resources available, the request fails and the user is left doing application and resources scheduling by hand.
* usability: setting up everything that is needed to run an EDP job is cumbersome and error-prone. The user has to provide too many details in too many different places.
Moreover changes to Sahara needs to go through a lengthy review process, that on one side tries to ensure high quality, but on the other side slows down development, especially of major architectural changes, like the ones needed to address the concerns listed above.
Mesos
-----
Mesos is marketing itself as a data-center operating system. Zoe has no such high profile objective: while Zoe schedules distributed applications, it has no knowledge of the applications it is scheduling and, even more importantly, does not require any change in the applications themselves to be run in Zoe.
Mesos requires that each application provides two Mesos-specific components: a scheduler and an executor. Zoe has no such requirements and runs applications unmodified.
YARN
----
YARN, from our point of view, has many similarities with Mesos. It requires application support. Moreover it is integrated in the Hadoop distribution and, while recent efforts are pushing toward making YARN stand up on its own, it is currently tailored for Hadoop applications.
.. _vision:
The motivation behind Zoe
=========================
Vision for Zoe
==============
The fundamental idea of Zoe is that a user who wants run data analytics applications should not be bothered by systems details, such as how to configure the amount of RAM a Spark Executor should use, how many cores are available in the system or even how many worker nodes should be used to meet an execution deadline.
Zoe focus is data analytics. This focus helps defining a clear set of objectives and priorities for the project and avoid the risk of competing directly with generic infrastructure managers like Kubernetes or Swarm. Zoe instead sits on top of these "cloud managers" to provide a simpler interface to end users who have no interest in the intricacies of container infrastructures.
Moreover final users require a lot of flexibility, they want ot test new analytics systems and algorithms as soon as possible, without having to wait for some approval procedure to go through the IT department. Zoe proposes a flexible model for applications descriptions: their management can be left entirely to the final user, but they can also prepared very quickly in all or in part by an IT department, who sometimes is more knowledgeable of resource limits and environment variables. We also plan to offer a number of building blocks (Zoe Frameworks) that can be composed to make Zoe Applications.
Data analytic applications do not work in isolation. They need data, that may be stored or streamed, and they generate logs, that may have to be analyzed for debugging or stored for auditing. Data layers, in turn, need health monitoring. All these tools, frameworks, distributed filesystems, object stores, form a galaxy that revolves around analytic applications. For simplicity we will call these "support applications".
Finally we feel that there is a lack of solutions in the field of private clouds, where resources are not infinite and data layers (data-sets) may be shared between different users. All the current Open Source solutions we are aware of target the public cloud use case and try, more or less, to mimic what Amazon and other big names are doing in their data-centers.
To offer an integrated, modern solution to data analysis Zoe must support both analytic and support applications, even if they have very different users and requirements.
Zoe strives to satisfy the following requirements:
The users of Zoe
----------------
* easy to use for the end-user
* easy to manage for the system administrator, easy to integrate in existing data-centers/clouds/VM deployments
* short (a few seconds) reaction times to user requests or other system events
* smart queuing and scheduling of applications when resources are critical
- users: the real end-users, who submit non-interactive analytic jobs or use interactive applications
- admins: systems administrators who maintain Zoe, the data layers, monitoring and logging for the infrastructure and the apps being run
Kubernetes, OpenStack Sahara, Mesos and YARN are the projects that, each in its own way, come near Zoe, without solving the needs we have.
Deviation from the current ZApp terminology
-------------------------------------------
Kubernetes (Borg)
-----------------
Kubernetes is a very complex system, both to deploy and to use. It takes some of the architectural principles from Google Borg and targets data centers with vast amounts of resources. We feel that while Kubernetes can certainly run analytic services in containers, it does so at a very high complexity cost for smaller setups. Moreover, in our opinion, certain scheduler choices in how preemption is managed do not apply well to environments with a limited set of users and compute resources, causing a less than optimal resource usage.
In the current Zoe implementation (0.10.x) ZApps are self-contained descriptions of a set of cooperating processes. They get submitted once, to request the start-up of an execution. This fits well the model of a single spark job or of a throw-away jupyter notebook.
OpenStack Sahara
----------------
We know well `OpenStack Sahara <https://wiki.openstack.org/wiki/Sahara>`_, as we have been using it since 2013 and we contributed the Spark plugin. We feel that Sahara has limitations in:
We need to revise a bit this terminology: ZApps remain the top level, user-visible entity. The ZApp building blocks, analytic engines or support tools, are called frameworks. A framework, by itself, cannot be run. It lacks configuration or a user-provided binary to execute, for example. Each framework is composed of one or more processes, that come with some metadata of the needed configuration and a container image.
A few examples:
- A Jupyter notebook, by itself, is a framework in Zoe terminology. It lacks configuration that tells it which kernels to enable or on which port to listen to. It is a framework composed by just one process.
- A Spark cluster is another framework. By itself it does nothing. It can be connected to a notebook, or it can be given a jar file and some data to process.
- A "Jupyter with R listening on port 8080" ZApp is a Jupyter framework that has been configured with certain options and made runnable
To create a ZApp you need to put together one or more frameworks and add some configuration (framework-dependent) that tells them how to behave.
A ZApp shop could contain both frameworks (that the user must combine together) and full-featured ZApps.
Nothing prevents certain ZApp attributes to be "templated", like resource requests or elastic process counts, for example.
Kinds of applications
---------------------
See :ref:`zapp_classification`.
Architecture
------------
.. image:: /figures/extended_arch.png
This architecture extends the current one by adding a number of pluggable modules that implement additional (and optional) features. The additional modules, in principle, are created as more Zoe frameworks, that will be instantiated in ZApps and run through the scheduler together with all the other ZApps.
The core of Zoe remains very simple to understand, while opening the door to more capabilities. The additional modules will have the ability of communicating with the backend, submit, modify and terminate executions via the scheduler and report information back to the user. Their actions will be driven by additional fields written in the ZApps descriptions.
In the figure there are three examples of such modules:
* software support: Sahara plugins support a limited set of data-intensive frameworks, adding a new one means writing a new Sahara plugin and even adding support for a new version requires going through a one-two week (on average) review process.
* lack of scheduling: Sahara makes the assumption that you have infinite resources. When you try to launch a new cluster and there are not enough resources available, the request fails and the user is left doing application and resources scheduling by hand.
* usability: setting up everything that is needed to run an EDP job is cumbersome and error-prone. The user has to provide too many details in too many different places.
- Zoe-HA: monitors and performs tasks related to high availability, both for Zoe components and for running user executions. This modules could take care of maintaining a certain replication factor, or making sure a certain service is restarted in case of failures, updating a load balancer or a DNS entry
- Zoe rolling upgrades: performs rolling upgrades of long-running ZApps
- Workflows (see the section below)
Moreover changes to Sahara needs to go through a lengthy review process, that on one side tries to ensure high quality, but on the other side slows down development, especially of major architectural changes, like the ones needed to address the concerns listed above.
Other examples could be:
Mesos
-----
- Zoe-storage: for managing volumes and associated constraints
Mesos is marketing itself as a data-center operating system. Zoe has no such high profile objective: while Zoe schedules distributed applications, it has no knowledge of the applications it is scheduling and, even more importantly, does not require any change in the applications themselves to be run in Zoe.
The modules should try to re-use as much as possible the functionality already available in the backends. A simple Zoe installation could run on a single Docker engine available locally and provide a reduced set of features, while a full-fledged install could run on top of Kubernetes and provide all the large-scale deployment features that such platform already implements.
Mesos requires that each application provides two Mesos-specific components: a scheduler and an executor. Zoe has no such requirements and runs applications unmodified.
Workflows
^^^^^^^^^
A few examples of workflows:
YARN
----
- run a job every hour (triggered by time)
- run a set of jobs in series or in parallel (triggered by the state of other jobs)
- run a job whenever the size of a file/directory/bucket on a certain data layer reaches 5GB (more complex triggers)
- combinations of all the above
YARN, from our point of view, has many similarities with Mesos. It requires application support. Moreover it is integrated in the Hadoop distribution and, while recent efforts are pushing toward making YARN stand up on its own, it is currently tailored for Hadoop applications.
A complete workflow system for data analytic is very complex. There is a lot of theory on this topic and zoe-workflow is a project of the same size as Zoe itself. An example of a full-featured workflow system for Hadoop is http://oozie.apache.org/
.. _zapp_classification:
Classification
==============
Zoe runs processes inside containers and the Zoe application description is very generic, allowing any kind of application to be described in Zoe and submitted for execution. While the main focus of Zoe are so-called "analytic applications", there are many other tools that can be run on the same cluster, for monitoring, storage, log management, history servers, etc. These applications can be described in Zoe and executed, but they have quite different scheduling constraint.
Please note that in this context an "elastic" service is a service that "can be automatically resized". HDFS can be resized, but it is done as an administrative operation that requires setting up partitions and managing the network and disk load caused by rebalancing. For this reason we do not consider it as "elastic".
- Long running: potentially will never terminate
- Non elastic
- Storage: need to have access to non-container storage (volumes or disk partitions)
- HDFS
- Cassandra
- ElasticSearch
- Interactive: need to expose web interfaces to the end user
- Jupyter
- Spark, Hadoop, Tensorflow, etc history servers
- Kibana
- Graylog (web interface only)
- Streaming:
- Logstash
- User access
- Proxies and SSH gateways
- Elastic (can be automatically resized)
- Streaming:
- Spark streaming user jobs
- Storm
- Flink streaming
- Kafka
- Ephemeral: will eventually finish by themselves
- Elastic:
- Spark classic batch jobs
- Hadoop MapReduce
- Flink
- Non elastic:
- MPI
- Tensorflow
All the applications in the **long-running** category need to be deployed, managed, upgraded and monitored since they are part of the cluster infrastructure. The Jupyter notebook at first glance may seem out of place, but in fact it is an interface to access different computing systems and languages, sometimes integrated in Jupyter itself, but also distributed in other nodes, with Spark or Tensorflow backends. As an interface the user may expect for it to be always there, making it part of the infrastructure.
The **elastic, long-running** applications have a degree more of flexibility, that can be taken into account by Zoe. They all have the same needs as the non-elastic applications, but they can also be scaled according to many criteria (priority, latency, data volume).
The applications in the **ephemeral** category, instead, will eventually terminate by themselves: a batch job is a good example of such applications.
.. _zapps_contributing:
Contributing ZApps
------------------
Zoe applications are maintained in the `zoe-applications <https://github.com/DistributedSystemsGroup/zoe-applications>`_ repository, feel free to fork it and generate pull requests for new applications, frameworks and services.
Check also the :ref:`howto_zapp` document for help on building ZApps from the already-available services and the :ref:`zapp_format`.
......@@ -24,7 +24,7 @@ In this guide we are going to use Python because it is a very easy language to u
We are planning graphical tools and a packaging system for ZApps, so stay tuned for updates! In the `Zoe Applications repository <https://github.com/DistributedSystemsGroup/zoe-applications>`_ there is already a very simple web interface we use internally for our users.
.. image:: figures/zapp_structure.png
.. image:: /figures/zapp_structure.png
A ZApp is a tree of nested dictionaries (other languages call them maps or hashmaps). The actual JSON tree is flattened because Zoe does not need to know about Frameworks, it is a logical subdivision that helps the user.
......
#!/bin/bash
echo "The images for this script are not yet available on the Docker Hub"
exit 1
if [ ! `which tmux` ]; then
echo "This script uses the tmux terminal multiplexer, but it is not available in PATH"
exit 1
fi
# Address for the Swarm API endpoint
SWARM_ADDRESS="swarm:2380"
sudo docker -H ${SWARM_ADDRESS} run -i -t --rm=true -e ZOE_MASTER_SWARM=${SWARM_ADDRESS} zoerepo/zoe-master
sudo docker -H ${SWARM_ADDRESS} run -i -t --rm=true zoerepo/zoe-client ./zoe-web.py
......@@ -101,9 +101,7 @@ class APIEndpoint:
raise zoe_api.exceptions.ZoeAuthException()
if e.is_active():
status, message = self.execution_terminate(uid, role, exec_id)
if not status:
raise zoe_api.exceptions.ZoeException(message)
raise zoe_api.exceptions.ZoeException('Cannot delete an active execution')
status, message = self.master.execution_delete(exec_id)
if status:
......
......@@ -27,7 +27,7 @@ log = logging.getLogger(__name__)
def _path_from_execution(execution: Execution):
return os.path.join(get_conf().service_log_path, get_conf().deployment_name, str(execution.id))
return os.path.join(get_conf().service_log_path, get_conf().deployment_name, str(execution.id))
def _init(execution: Execution):
......@@ -60,11 +60,14 @@ def save(execution: Execution):
swarm = SwarmClient(get_conf())
log_gen = swarm.logs(service.docker_id, stream=True, follow=False)
if log_gen is None:
continue
with open(fpath, 'wb') as fp:
for line in log_gen:
fp.write(line)
_shutdown()
return
try:
with open(fpath, 'wb') as out_fp:
for line in log_gen:
out_fp.write(line)
except FileNotFoundError:
log.error("Could not create file {}".format(fpath))
_shutdown()
......
......@@ -134,7 +134,7 @@ class SwarmClient:
else:
raise ZoeLibException('Unsupported URL scheme for Swarm')
log.debug('Connecting to Swarm at {}'.format(manager))
self.cli = docker.Client(base_url=manager)
self.cli = docker.Client(base_url=manager, version="auto")
def info(self) -> SwarmStats:
"""Retrieve Swarm statistics. The Docker API returns a mess difficult to parse."""
......@@ -298,16 +298,32 @@ class SwarmClient:
:type delete: bool
:return: None
"""
if delete:
try:
self.cli.remove_container(docker_id, force=True)
except docker.errors.NotFound:
log.warning("cannot remove a non-existent service")
else:
try:
self.cli.kill(docker_id)
except docker.errors.NotFound:
log.warning("cannot remove a non-existent service")
retries = 5
while retries > 0:
if delete:
try:
self.cli.remove_container(docker_id, force=True)
break
except docker.errors.NotFound:
log.warning("cannot remove a non-existent service")
break
except requests.exceptions.ReadTimeout:
log.error("Read timeout trying to delete a container")
retries -= 1
continue
else:
try:
self.cli.kill(docker_id)
break
except docker.errors.NotFound:
log.warning("cannot remove a non-existent service")
break
except requests.exceptions.ReadTimeout:
log.error("Read timeout trying to delete a container")
retries -= 1
continue
if retries == 0:
log.error("Giving up trying to terminate container {}".format(docker_id))
def event_listener(self, callback: Callable[[str], bool]) -> None:
"""An infinite loop that listens for events from Swarm."""
......
......@@ -15,6 +15,6 @@
"""Versions."""
ZOE_VERSION = '0.10.1-beta'
ZOE_VERSION = '0.10.2-beta'
ZOE_API_VERSION = '0.6'
ZOE_APPLICATION_FORMAT_VERSION = 2
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment