Commit cb7a1bcd authored by Daniele Venzano's avatar Daniele Venzano

Put the ZApp shop sample files directly in the repository in the contrib directory

parent 0a275427
# BOINC ZApp
Maintainer: Daniele Venzano <daniele.venzano@eurecom.fr>
URL: [https://gitlab.eurecom.fr/zoe-apps/zapp-boinc](https://gitlab.eurecom.fr/zoe-apps/zapp-boinc)
A ZApp for running a single [BOINC](https://boinc.berkeley.edu/) task. Run as many as you want, in parallel!
{
"name": "boinc",
"services": [
{
"command": null,
"environment": [
[
"PROJECT_URL",
"www.worldcommunitygrid.org"
],
[
"PROJECT_KEY",
"Your key here"
]
],
"essential_count": 1,
"image": "docker-registry:5000/zapps/boinc:4503",
"monitor": true,
"name": "boinc-client",
"ports": [],
"replicas": 1,
"resources": {
"cores": {
"max": 1,
"min": 1
},
"memory": {
"max": 536870912,
"min": 536870912
}
},
"startup_order": 0,
"total_count": 1,
"volumes": []
}
],
"size": 128,
"version": 3,
"will_end": false
}
\ No newline at end of file
{
"version": 1,
"zapps": [
{
"category": "Non-interactive",
"name": "Boinc client",
"description": "boinc.json",
"readable_descr": "README.md",
"parameters": [
{
"kind": "environment",
"name": "PROJECT_URL",
"readable_name": "Project URL",
"description": "The BOINC project URL",
"type": "string",
"default": "www.worldcommunitygrid.org"
},
{
"kind": "environment",
"name": "PROJECT_KEY",
"readable_name": "Project key",
"description": "The BOINC project key, needed to upload results and statistics to the BOINC server",
"type": "string",
"default": null
}
]
}
]
}
# Jupyter Notebook Data Science Stack
URL: [https://hub.docker.com/r/jupyter/datascience-notebook/](https://hub.docker.com/r/jupyter/datascience-notebook/)
* Jupyter Notebook 5.0.x
* Conda Python 3.x environment
* pandas, matplotlib, scipy, seaborn, scikit-learn, scikit-image, sympy, cython, patsy, statsmodel, cloudpickle, dill, numba, bokeh pre-installed
* Conda R v3.3.x and channel
* plyr, devtools, shiny, rmarkdown, forecast, rsqlite, reshape2, nycflights13, caret, rcurl, and randomforest pre-installed
* The tidyverse R packages are also installed, including ggplot2, dplyr, tidyr, readr, purrr, tibble, stringr, lubridate, and broom
* Julia v0.5.x with Gadfly, RDatasets and HDF5 pre-installed
Please note that you need to retrieve the secret key from the service logs to be able to access the notebooks.
# Jupyter Notebook R Stack
URL: [https://hub.docker.com/r/jupyter/r-notebook/](https://hub.docker.com/r/jupyter/r-notebook/)
* Jupyter Notebook 5.0.x
* Conda R v3.3.x and channel
* plyr, devtools, shiny, rmarkdown, forecast, rsqlite, reshape2, nycflights13, caret, rcurl, and randomforest pre-installed
* The tidyverse R packages are also installed, including ggplot2, dplyr, tidyr, readr, purrr, tibble, stringr, lubridate, and broom
Please note that you need to retrieve the secret key from the service logs to be able to access the notebooks.
# Jupyter Notebook Scientific Python Stack
URL: [https://hub.docker.com/r/jupyter/scipy-notebook/](https://hub.docker.com/r/jupyter/scipy-notebook/)
* Jupyter Notebook 5.0.x
* Conda Python 3.x environment
* pandas, matplotlib, scipy, seaborn, scikit-learn, scikit-image, sympy, cython, patsy, statsmodel, cloudpickle, dill, numba, bokeh, vincent, beautifulsoup, xlrd pre-installed
Please note that you need to retrieve the secret key from the service logs to be able to access the notebooks.
# Jupyter Notebook Scientific Python Stack + Tensorflow
Maintainer: Daniele Venzano <daniele.venzano@eurecom.fr>
URL: [https://hub.docker.com/r/jupyter/tensorflow-notebook/](https://hub.docker.com/r/jupyter/tensorflow-notebook/)
* Everything in [Scipy](https://github.com/jupyter/docker-stacks/tree/master/scipy-notebook) Notebook
* Tensorflow and Keras for Python 3.x (without GPU support)
Please note that you need to retrieve the secret key from the service logs to be able to access the notebooks.
{
"name": "datasci-notebook",
"services": [
{
"command": null,
"environment": [],
"essential_count": 1,
"image": "jupyter/datascience-notebook",
"monitor": true,
"name": "jupyter",
"ports": [
{
"name": "Jupyter Notebook interface",
"port_number": 8888,
"protocol": "tcp",
"url_template": "http://{ip_port}/"
}
],
"replicas": 1,
"resources": {
"cores": {
"max": 4,
"min": 4
},
"memory": {
"max": 4294967296,
"min": 4294967296
}
},
"startup_order": 0,
"total_count": 1,
"volumes": []
}
],
"size": 512,
"version": 3,
"will_end": false
}
\ No newline at end of file
{
"version": 1,
"zapps": [
{
"category": "Jupyter notebooks",
"readable_descr": "README-datascience.md",
"name": "Data science notebook",
"description": "datasci-notebook.json",
"parameters": []
},
{
"category": "Jupyter notebooks",
"readable_descr": "README-r.md",
"name": "R notebook",
"description": "r-notebook.json",
"parameters": []
},
{
"category": "Jupyter notebooks",
"readable_descr": "README-scipy.md",
"name": "SciPy notebook",
"description": "scipy-notebook.json",
"parameters": []
},
{
"category": "Jupyter notebooks",
"readable_descr": "README-tensorflow.md",
"name": "TensorFlow notebook",
"description": "tf-notebook.json",
"parameters": []
}
]
}
{
"name": "r-notebook",
"services": [
{
"command": null,
"environment": [],
"essential_count": 1,
"image": "jupyter/r-notebook",
"monitor": true,
"name": "jupyter",
"ports": [
{
"name": "Jupyter Notebook interface",
"port_number": 8888,
"protocol": "tcp",
"url_template": "http://{ip_port}/"
}
],
"replicas": 1,
"resources": {
"cores": {
"max": 4,
"min": 4
},
"memory": {
"max": 4294967296,
"min": 4294967296
}
},
"startup_order": 0,
"total_count": 1,
"volumes": []
}
],
"size": 512,
"version": 3,
"will_end": false
}
\ No newline at end of file
{
"name": "scipy-notebook",
"services": [
{
"command": null,
"environment": [],
"essential_count": 1,
"image": "jupyter/scipy-notebook",
"monitor": true,
"name": "jupyter",
"ports": [
{
"name": "Jupyter Notebook interface",
"port_number": 8888,
"protocol": "tcp",
"url_template": "http://{ip_port}/"
}
],
"replicas": 1,
"resources": {
"cores": {
"max": 4,
"min": 4
},
"memory": {
"max": 4294967296,
"min": 4294967296
}
},
"startup_order": 0,
"total_count": 1,
"volumes": []
}
],
"size": 512,
"version": 3,
"will_end": false
}
\ No newline at end of file
{
"name": "tf-notebook",
"services": [
{
"command": null,
"environment": [],
"essential_count": 1,
"image": "jupyter/tensorflow-notebook",
"monitor": true,
"name": "jupyter",
"ports": [
{
"name": "Jupyter Notebook interface",
"port_number": 8888,
"protocol": "tcp",
"url_template": "http://{ip_port}/"
}
],
"replicas": 1,
"resources": {
"cores": {
"max": 4,
"min": 4
},
"memory": {
"max": 4294967296,
"min": 4294967296
}
},
"startup_order": 0,
"total_count": 1,
"volumes": []
}
],
"size": 512,
"version": 3,
"will_end": false
}
\ No newline at end of file
# Jupyter Notebook image
This image contains the Jupyter notebook configured with Pythen and a Spark client. It is used by Zoe, the Container Analytics as a
Service system to create on-demand notebooks connected to containerized Spark clusters.
Zoe can be found at: https://github.com/DistributedSystemsGroup/zoe
## Setup
The Dockerfile runs a start script that configures the Notebook using these environment variables:
* SPARK\_MASTER\_IP: IP address of the Spark master this notebook should use for its kernel
* PROXY\_ID: string to use as a prefix for URL paths, for reverse proxying
* SPARK\_EXECUTOR\_RAM: How much RAM to use for each executor spawned by the notebook
# Spark Scala master image
This image contains the Scala master process. It is used by Zoe, the Container Analytics as a
Service system to create on-demand Spark clusers in Spark standalone mode.
Zoe can be found at: https://github.com/DistributedSystemsGroup/zoe
## Setup
The Dockerfile automatically starts the Spark master process when the container is run.
# Spark worker image
This image contains the Scala worker process. It is used by Zoe, the Container Analytics as a
Service system to create on-demand Spark clusters in standalone mode.
Zoe can be found at: https://github.com/DistributedSystemsGroup/zoe
## Setup
The Dockerfile runs the worker process when run. The following options can be passed via environment variables:
* SPARK\_MASTER\_IP: IP address of the Spark master this notebook should use for its kernel
* SPARK\_WORKER\_RAM: How much RAM the worker can use (default is 4g)
* SPARK\_WORKER\_CORES: How many cores can be used by the worker process (default is 4)
# Spark ZApp
URL: [https://gitlab.eurecom.fr/zoe-apps/zapp-spark](https://gitlab.eurecom.fr/zoe-apps/zapp-spark)
Combine the full power of a distributed [Apache Spark](http://spark.apache.org) cluster with Python Jupyter Notebooks.
The Spark shell can be used from the built-in terminal in the notebook ZApp.
Spark is configured in stand-alone, distributed mode. This ZApp contains Spark version 2.1.0.
# Spark ZApp
URL: [https://gitlab.eurecom.fr/zoe-apps/zapp-spark](https://gitlab.eurecom.fr/zoe-apps/zapp-spark)
Traditional Spark submit jobs. Use the command-line parameter to specify which Python or JAR file to execute from your workspace.
{
"version": 1,
"zapps": [
{
"category": "Spark",
"name": "Distributed PySpark notebook",
"description": "spark-jupyter.json",
"readable_descr": "README-jupyter.md",
"parameters": [
{
"kind": "environment",
"name": "NAMENODE_HOST",
"readable_name": "NameNode host",
"description": "Hostname of the HDFS NameNode",
"type": "string",
"default": "hdfs-namenode.zoe"
}
]
},
{
"category": "Spark",
"name": "Spark submit",
"description": "spark-submit.json",
"readable_descr": "README-submit.md",
"parameters": [
{
"kind": "environment",
"name": "NAMENODE_HOST",
"readable_name": "NameNode host",
"description": "Hostname of the HDFS NameNode",
"type": "string",
"default": "hdfs-namenode.zoe"
},
{
"kind": "command",
"name": "spark-submit",
"readable_name": "Spark submit commandline",
"description": "The Spark submit command line that tells Spark what to execute. Some options are already passed (like --master)",
"type": "string",
"default": "wordcount.py hdfs://192.168.45.157/datasets/gutenberg_big_2x.txt hdfs://192.168.45.157/tmp/wcount-out"
}
]
}
]
}
{
"name": "spark-jupyter",
"services": [
{
"command": null,
"environment": [
[
"SPARK_MASTER_IP",
"{dns_name#self}"
],
[
"HADOOP_USER_NAME",
"{user_name}"
],
[
"PYTHONHASHSEED",
"42"
]
],
"essential_count": 1,
"image": "docker-registry:5000/zapps/spark2-master:4508",
"monitor": false,
"name": "spark-master",
"ports": [
{
"name": "Spark master web interface",
"port_number": 8080,
"protocol": "tcp",
"url_template": "http://HOST:PORT/"
}
],
"replicas": 1,
"resources": {
"cores": {
"max": null,
"min": null
},
"memory": {
"max": 536870912,
"min": 536870912
}
},
"startup_order": 0,
"total_count": 1,
"volumes": []
},
{
"command": null,
"environment": [
[
"SPARK_WORKER_CORES",
"6"
],
[
"SPARK_WORKER_RAM",
"11273240064"
],
[
"SPARK_MASTER_IP",
"{dns_name#spark-master0}"
],
[
"SPARK_LOCAL_IP",
"{dns_name#self}"
],
[
"PYTHONHASHSEED",
"42"
],
[
"HADOOP_USER_NAME",
"{user_name}"
]
],
"essential_count": 1,
"image": "docker-registry:5000/zapps/spark2-worker:4508",
"monitor": false,
"name": "spark-worker",
"ports": [],
"replicas": 1,
"resources": {
"cores": {
"max": null,
"min": null
},
"memory": {
"max": 12884901888,
"min": 12884901888
}
},
"startup_order": 1,
"total_count": 2,
"volumes": []
},
{
"command": null,
"environment": [
[
"SPARK_MASTER",
"spark://{dns_name#spark-master0}:7077"
],
[
"SPARK_EXECUTOR_RAM",
"11273240064"
],
[
"SPARK_DRIVER_RAM",
"2147483648"
],
[
"HADOOP_USER_NAME",
"{user_name}"
],
[
"NB_USER",
"{user_name}"
],
[
"PYTHONHASHSEED",
"42"
],
[
"NAMENODE_HOST",
"hdfs-namenode.zoe"
]
],
"essential_count": 1,
"image": "docker-registry:5000/zapps/spark2-jupyter-notebook:4508",
"monitor": true,
"name": "spark-jupyter",
"ports": [
{
"name": "Jupyter Notebook interface",
"port_number": 8888,
"protocol": "tcp",
"url_template": "http://HOST:PORT/"
}
],
"replicas": 1,
"resources": {
"cores": {
"max": null,
"min": null
},
"memory": {
"max": 12884901888,
"min": 12884901888
}
},
"startup_order": 0,
"total_count": 1,
"volumes": []
}
],
"size": 512,
"version": 3,
"will_end": false
}
\ No newline at end of file
{
"name": "spark-jupyter",
"services": [
{
"command": null,
"environment": [
[
"SPARK_MASTER_IP",
"{dns_name#self}"
],
[
"HADOOP_USER_NAME",
"{user_name}"
],
[
"PYTHONHASHSEED",
"42"
]
],
"essential_count": 1,
"image": "docker-registry:5000/zapps/spark2-master:4508",
"monitor": false,
"name": "spark-master",
"ports": [
{
"name": "Spark master web interface",
"port_number": 8080,
"protocol": "tcp",
"url_template": "http://HOST:PORT/"
}
],
"replicas": 1,
"resources": {
"cores": {
"max": null,
"min": null
},
"memory": {
"max": 536870912,
"min": 536870912
}
},
"startup_order": 0,
"total_count": 1,
"volumes": []
},
{
"command": null,
"environment": [
[
"SPARK_WORKER_CORES",
"6"
],
[
"SPARK_WORKER_RAM",
"11273240064"
],
[
"SPARK_MASTER_IP",
"{dns_name#spark-master0}"
],
[
"SPARK_LOCAL_IP",
"{dns_name#self}"
],
[
"PYTHONHASHSEED",
"42"
],
[
"HADOOP_USER_NAME",
"{user_name}"
]
],
"essential_count": 1,
"image": "docker-registry:5000/zapps/spark2-worker:4508",
"monitor": false,
"name": "spark-worker",
"ports": [],
"replicas": 1,
"resources": {
"cores": {
"max": null,
"min": null
},
"memory": {
"max": 12884901888,
"min": 12884901888
}
},
"startup_order": 1,
"total_count": 2,
"volumes": []
},
{
"command": "wordcount.py hdfs://192.168.45.157/datasets/gutenberg_big_2x.txt hdfs://192.168.45.157/tmp/wcount-out",
"environment": [
[
"SPARK_MASTER",
"spark://{dns_na