Commit 73197082 authored by Daniele Venzano's avatar Daniele Venzano
Browse files

Fix Spark json and add the manifest

parent 430287bb
Pipeline #4495 passed with stage
in 20 seconds
# Spark ZApp
URL: [https://gitlab.eurecom.fr/zoe-apps/zapp-spark](https://gitlab.eurecom.fr/zoe-apps/zapp-spark)
Combine the full power of a distributed [Apache Spark](http://spark.apache.org) cluster with Python Jupyter Notebooks.
The Spark shell can be used from the built-in terminal in the notebook ZApp.
Spark is configured in stand-alone, distributed mode. This ZApp contains Spark version 2.1.0.
# Spark ZApp
URL: [https://gitlab.eurecom.fr/zoe-apps/zapp-spark](https://gitlab.eurecom.fr/zoe-apps/zapp-spark)
Traditional Spark submit jobs. Use the command-line parameter to specify which Python or JAR file to execute from your workspace.
# Spark ZApp
PySpark Jupyter Notebook and traditional Spark submit jobs. The Spark shell can be used from the built-in terminal in the notebook ZApp.
......@@ -166,7 +166,7 @@ def spark_jupyter_notebook_service(mem_limit, worker_mem_limit, hdfs_namenode):
'name': "spark-jupyter",
'image': NOTEBOOK_IMAGE,
'monitor': True,
'required_resources': {
'resources': {
"memory": {
"min": mem_limit,
"max": mem_limit
......@@ -215,7 +215,7 @@ def spark_submit_service(mem_limit, worker_mem_limit, hdfs_namenode, command):
'name': "spark-submit",
'image': SUBMIT_IMAGE,
'monitor': True,
'required_resources': {
'resources': {
"memory": {
"min": mem_limit,
"max": mem_limit
......
{
"version": 1,
"zapps": [
{
"category": "Jupyter notebooks",
"name": "PySpark notebook",
"description": "spark-jupyter.json",
"readable_descr": "README-jupyter.md",
"parameters": [
{
"kind": "environment",
"name": "NAMENODE_HOST",
"readable_name": "NameNode host",
"description": "Hostname of the HDFS NameNode",
"type": "string",
"default": "hdfs-namenode.zoe"
}
]
},
{
"category": "Non-interactive",
"name": "Spark submit",
"description": "spark-submit.json",
"readable_descr": "README-submit.md",
"parameters": [
{
"kind": "environment",
"name": "NAMENODE_HOST",
"readable_name": "NameNode host",
"description": "Hostname of the HDFS NameNode",
"type": "string",
"default": "hdfs-namenode.zoe"
},
{
"kind": "command",
"name": "spark-submit",
"readable_name": "Spark submit commandline",
"description": "The Spark submit command line that tells Spark what to execute. Some options are already passed (like --master)",
"type": "string",
"default": "wordcount.py hdfs://192.168.45.157/datasets/gutenberg_big_2x.txt hdfs://192.168.45.157/tmp/wcount-out"
}
]
}
]
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment