Commit 2815cf62 authored by Daniele Venzano's avatar Daniele Venzano
Browse files

Move images scripts to a separate repository

parent 0519e434
#!/bin/sh
SPARK_VER=1.4.1
HADOOP_VER=hadoop2.4
IMAGE_VER=1.2
python ./gen_dockerfiles.py ${SPARK_VER} ${HADOOP_VER}
for d in master worker shell submit notebook; do
cd $d
docker build -t 10.0.0.2:5000/zoe/spark-$d-${SPARK_VER}:${IMAGE_VER} .
docker push 10.0.0.2:5000/zoe/spark-$d-${SPARK_VER}:${IMAGE_VER}
cd ..
docker -H 10.0.0.2:2380 pull 10.0.0.2:5000/zoe/spark-$d-${SPARK_VER}:${IMAGE_VER}
done
#!/usr/bin/env bash
sed '1d' /etc/hosts > tmpHosts
cat tmpHosts > /etc/hosts
rm tmpHosts
spark.driver.port 7001
spark.fileserver.port 7002
spark.broadcast.port 7003
spark.replClassServer.port 7004
spark.blockManager.port 7005
spark.executor.port 7006
spark.ui.port 4040
spark.broadcast.factory org.apache.spark.broadcast.HttpBroadcastFactory
#!/usr/bin/env bash
export SPARK_MASTER_IP=`awk 'NR==1 {print $1}' /etc/hosts`
export SPARK_LOCAL_IP=`awk 'NR==1 {print $1}' /etc/hosts`
export SPARK_MASTER_PORT=7077
export SPARK_MASTER_WEBUI_PORT=8080
cd /opt/spark
./bin/spark-class org.apache.spark.deploy.master.Master \
--host $SPARK_MASTER_IP --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT \
$@
#!/usr/bin/env bash
cd /opt/spark/
./bin/spark-shell --master spark://${SPARK_MASTER_IP}:7077 --executor-memory ${SPARK_EXECUTOR_RAM} "$@"
#!/usr/bin/env bash
cd /opt/spark
export SPARK_LOCAL_IP=`awk 'NR==1 {print $1}' /etc/hosts`
./bin/spark-class org.apache.spark.deploy.worker.Worker \
spark://${SPARK_MASTER_IP}:7077 --cores ${SPARK_WORKER_CORES:-4} --memory ${SPARK_WORKER_RAM:-4g} \
-h ${SPARK_LOCAL_IP} \
"$@"
#!/usr/bin/env bash
if [ -z ${APPLICATION_ID} ]; then
echo "No application ID provided, cannot continue"
exit 1
fi
mkdir /tmp/${APPLICATION_ID}
cd /tmp/${APPLICATION_ID}
redis-cli ${REDIS_CLI_OPTIONS} get app-${APPLICATION_ID} > app.zip
unzip app.zip
/opt/spark/bin/spark-submit --master spark://${SPARK_MASTER_IP}:7077 --executor-memory=${SPARK_EXECUTOR_RAM} ${SPARK_OPTIONS} "$@"
#!/usr/bin/python
import sys
import shutil
from jinja2 import Environment, FileSystemLoader
def copyfile(fname, dest):
shutil.copy("files/" + fname, dest + "/files/" + fname)
if len(sys.argv) < 3:
print("Usage: {} <spark version> <hadoop version>".format(sys.argv[0]))
sys.exit(1)
ji_env = Environment(loader=FileSystemLoader('templates'))
common_tmpl = ji_env.get_template('common.tmpl')
common = common_tmpl.render(spark_version=sys.argv[1], hadoop_version=sys.argv[2])
# Master
master_tmpl = ji_env.get_template('master.tmpl')
master = common + "\n" + master_tmpl.render()
open("master/Dockerfile", "w").write(master)
for f in ["remove_alias.sh", "start-master.sh"]:
copyfile(f, "master")
# Worker
worker_tmpl = ji_env.get_template('worker.tmpl')
worker = common + "\n" + worker_tmpl.render()
open("worker/Dockerfile", "w").write(worker)
for f in ["remove_alias.sh", "start-worker.sh"]:
copyfile(f, "worker")
# Shell
shell_tmpl = ji_env.get_template('shell.tmpl')
shell = common + "\n" + shell_tmpl.render()
open("shell/Dockerfile", "w").write(shell)
for f in ["remove_alias.sh", "start-shell.sh"]:
copyfile(f, "shell")
# Submit
submit_tmpl = ji_env.get_template('submit.tmpl')
submit = common + "\n" + submit_tmpl.render()
open("submit/Dockerfile", "w").write(submit)
for f in ["remove_alias.sh", "submit.sh"]:
copyfile(f, "submit")
# Notebook
# nb_tmpl = ji_env.get_template('notebook.tmpl')
# nb = nb_tmpl.render()
# open("notebook/Dockerfile", "w").write(nb)
FROM ubuntu:14.04
MAINTAINER Daniele Venzano <venza@brownhat.org>
ENV SPARK_VERSION 1.4.1
ENV HADOOP_VERSION hadoop2.4
RUN apt-get update && apt-get install -y --force-yes software-properties-common python-software-properties
RUN apt-add-repository -y ppa:webupd8team/java
RUN /bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections
RUN apt-get update && apt-get -y install oracle-java7-installer oracle-java7-set-default curl
RUN curl -s http://mirrors.ircam.fr/pub/apache/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-${HADOOP_VERSION}.tgz | tar -xz -C /opt/
WORKDIR /opt
RUN ln -s spark-${SPARK_VERSION}-bin-${HADOOP_VERSION} spark
ENV SPARK_HOME /opt/spark
ENV PATH /opt/spark/bin:/opt/spark/sbin:${PATH}
COPY files/* /opt/
EXPOSE 8080 7077
CMD /opt/start-master.sh
#ENV SPARK_MASTER_OPTS="-Dspark.driver.port=7001 -Dspark.fileserver.port=7002 -Dspark.broadcast.port=7003 -Dspark.replClassServer.port=7004 -Dspark.blockManager.port=7005 -Dspark.executor.port=7006 -Dspark.ui.port=4040 -Dspark.broadcast.factory=org.apache.spark.broadcast.HttpBroadcastFactory"
#ENV SPARK_WORKER_OPTS="-Dspark.driver.port=7001 -Dspark.fileserver.port=7002 -Dspark.broadcast.port=7003 -Dspark.replClassServer.port=7004 -Dspark.blockManager.port=7005 -Dspark.executor.port=7006 -Dspark.ui.port=4040 -Dspark.broadcast.factory=org.apache.spark.broadcast.HttpBroadcastFactory"
#ENV SPARK_MASTER_PORT 7077
#ENV SPARK_MASTER_WEBUI_PORT 8080
#ENV SPARK_WORKER_PORT 8888
#ENV SPARK_WORKER_WEBUI_PORT 8081
#EXPOSE 8080 7077 8888 8081 4040 7001 7002 7003 7004 7005 7006
\ No newline at end of file
#!/usr/bin/env bash
sed '1d' /etc/hosts > tmpHosts
cat tmpHosts > /etc/hosts
rm tmpHosts
#!/usr/bin/env bash
export SPARK_LOCAL_IP=`awk 'NR==1 {print $1}' /etc/hosts`
/remove_alias.sh # problems with hostname alias, see https://issues.apache.org/jira/browse/SPARK-6680
cd /opt/
./bin/spark-shell \
--master spark://${SPARK_MASTER_PORT_7077_TCP_ADDR}:7077 \
-i ${SPARK_LOCAL_IP} \
"$@"
#!/usr/bin/env bash
export SPARK_MASTER_IP=`awk 'NR==1 {print $1}' /etc/hosts`
export SPARK_LOCAL_IP=`awk 'NR==1 {print $1}' /etc/hosts`
export SPARK_MASTER_PORT=7077
export SPARK_MASTER_WEBUI_PORT=8080
cd /opt/spark
./bin/spark-class org.apache.spark.deploy.master.Master \
--host ${SPARK_MASTER_IP} --port ${SPARK_MASTER_PORT} --webui-port ${SPARK_MASTER_WEBUI_PORT} $@
FROM ubuntu:14.04
MAINTAINER Daniele Venzano <venza@brownhat.org>
ENV SCALA_VERSION 2.10.4
ENV NOTEBOOK_VERSION 0.6.0
ENV SPARK_VERSION 1.4.1
ENV HADOOP_VERSION hadoop-2.5.0-cdh5.3.0
RUN apt-get update && apt-get install -y --force-yes software-properties-common python-software-properties
RUN apt-add-repository -y ppa:webupd8team/java
RUN /bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections
RUN apt-get update && apt-get -y install oracle-java7-installer oracle-java7-set-default curl
RUN curl -s https://s3.eu-central-1.amazonaws.com/spark-notebook/tgz/spark-notebook-${NOTEBOOK_VERSION}-scala-${SCALA_VERSION}-spark-${SPARK_VERSION}-${HADOOP_VERSION}.tgz | tar -xz -C /opt/
WORKDIR /opt
RUN ln -s spark-notebook-${NOTEBOOK_VERSION}-scala-${SCALA_VERSION}-spark-${SPARK_VERSION}-${HADOOP_VERSION} spark-notebook
ENV PATH /opt/spark-notebook/bin:${PATH}
COPY files/* /opt/
RUN chmod +x /opt/*.sh
EXPOSE 9000
CMD /opt/start-notebook.sh
application.secret = "nTnOIy6^yFM5o[Z_T6jBriIYm7id43TGeLJC1U?bxt?PhfMJeCYX@s;RcNqX]xeA"
application.langs = "en"
logger.root = ERROR
logger.play = INFO
logger.application = DEBUG
manager {
notebooks {
dir = ./notebooks
custom {
sparkConf = {
spark.master: "spark://SPARK_MASTER_IP:7077"
spark.executor.memory: SPARK_EXEC_RAM
}
}
}
name = "Spark Notebook"
maxBytesInFlight = 5M
kernel {
permGen = 1024m
compilerArgs=[-deprecation]
}
clusters {
}
tachyon {
enabled = true #false if commented out / not present
}
}
notebook-server {
akka {
loggers = ["akka.event.slf4j.Slf4jLogger"]
loglevel = "DEBUG"
stdout-loglevel = "DEBUG"
log-config-on-start = off
daemonic = false
debug {
}
actor {
provider = "akka.remote.RemoteActorRefProvider"
default-stash-dispatcher {
mailbox-type = "akka.dispatch.UnboundedDequeBasedMailbox"
}
}
remote {
enabled-transports = ["akka.remote.netty.tcp"]
netty.tcp {
hostname = "127.0.0.1"
port = 0
maximum-frame-size = "1 GiB"
}
}
}
}
#!/usr/bin/env bash
cd /opt/spark-notebook
cat ../application.conf | sed -e "s/SPARK_MASTER_IP/$SPARK_MASTER_IP/" -e "s/SPARK_EXEC_RAM/$SPARK_EXECUTOR_RAM/" > conf/application.conf
./bin/spark-notebook -Dconfig.file=./conf/application.conf -Dapplication.context=/proxy/$PROXY_ID
FROM ubuntu:14.04
MAINTAINER Daniele Venzano <venza@brownhat.org>
ENV SPARK_VERSION 1.4.1
ENV HADOOP_VERSION hadoop2.4
RUN apt-get update && apt-get install -y --force-yes software-properties-common python-software-properties
RUN apt-add-repository -y ppa:webupd8team/java
RUN /bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections
RUN apt-get update && apt-get -y install oracle-java7-installer oracle-java7-set-default curl
RUN curl -s http://mirrors.ircam.fr/pub/apache/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-${HADOOP_VERSION}.tgz | tar -xz -C /opt/
WORKDIR /opt
RUN ln -s spark-${SPARK_VERSION}-bin-${HADOOP_VERSION} spark
ENV SPARK_HOME /opt/spark
ENV PATH /opt/spark/bin:/opt/spark/sbin:${PATH}
COPY files/* /opt/
CMD /opt/start-shell.sh
#ADD start-master.sh /start-master.sh
#ADD scripts/start-worker /start-worker.sh
#ADD scripts/spark-shell.sh /spark-shell.sh
#ADD spark-defaults.conf /spark-defaults.conf
#ADD remove_alias.sh /remove_alias.sh
#ENV SPARK_MASTER_OPTS="-Dspark.driver.port=7001 -Dspark.fileserver.port=7002 -Dspark.broadcast.port=7003 -Dspark.replClassServer.port=7004 -Dspark.blockManager.port=7005 -Dspark.executor.port=7006 -Dspark.ui.port=4040 -Dspark.broadcast.factory=org.apache.spark.broadcast.HttpBroadcastFactory"
#ENV SPARK_WORKER_OPTS="-Dspark.driver.port=7001 -Dspark.fileserver.port=7002 -Dspark.broadcast.port=7003 -Dspark.replClassServer.port=7004 -Dspark.blockManager.port=7005 -Dspark.executor.port=7006 -Dspark.ui.port=4040 -Dspark.broadcast.factory=org.apache.spark.broadcast.HttpBroadcastFactory"
#ENV SPARK_MASTER_PORT 7077
#ENV SPARK_MASTER_WEBUI_PORT 8080
#ENV SPARK_WORKER_PORT 8888
#ENV SPARK_WORKER_WEBUI_PORT 8081
#EXPOSE 8080 7077 8888 8081 4040 7001 7002 7003 7004 7005 7006
\ No newline at end of file
#!/usr/bin/env bash
sed '1d' /etc/hosts > tmpHosts
cat tmpHosts > /etc/hosts
rm tmpHosts
#!/usr/bin/env bash
cd /opt/spark/
./bin/spark-shell --master spark://${SPARK_MASTER_IP}:7077 --executor-memory ${SPARK_EXECUTOR_RAM} "$@"
FROM ubuntu:14.04
MAINTAINER Daniele Venzano <venza@brownhat.org>
ENV SPARK_VERSION 1.4.1
ENV HADOOP_VERSION hadoop2.4
RUN apt-get update && apt-get install -y --force-yes software-properties-common python-software-properties
RUN apt-add-repository -y ppa:webupd8team/java
RUN /bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections
RUN apt-get update && apt-get -y install oracle-java7-installer oracle-java7-set-default curl unzip redis-tools
RUN curl -s http://mirrors.ircam.fr/pub/apache/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-${HADOOP_VERSION}.tgz | tar -xz -C /opt/
WORKDIR /opt
RUN ln -s spark-${SPARK_VERSION}-bin-${HADOOP_VERSION} spark
ENV SPARK_HOME /opt/spark
ENV PATH /opt/spark/bin:/opt/spark/sbin:${PATH}
COPY files/* /opt/
CMD /opt/submit.sh
#ADD start-master.sh /start-master.sh
#ADD scripts/start-worker /start-worker.sh
#ADD scripts/spark-shell.sh /spark-shell.sh
#ADD spark-defaults.conf /spark-defaults.conf
#ADD remove_alias.sh /remove_alias.sh
#ENV SPARK_MASTER_OPTS="-Dspark.driver.port=7001 -Dspark.fileserver.port=7002 -Dspark.broadcast.port=7003 -Dspark.replClassServer.port=7004 -Dspark.blockManager.port=7005 -Dspark.executor.port=7006 -Dspark.ui.port=4040 -Dspark.broadcast.factory=org.apache.spark.broadcast.HttpBroadcastFactory"
#ENV SPARK_WORKER_OPTS="-Dspark.driver.port=7001 -Dspark.fileserver.port=7002 -Dspark.broadcast.port=7003 -Dspark.replClassServer.port=7004 -Dspark.blockManager.port=7005 -Dspark.executor.port=7006 -Dspark.ui.port=4040 -Dspark.broadcast.factory=org.apache.spark.broadcast.HttpBroadcastFactory"
#ENV SPARK_MASTER_PORT 7077
#ENV SPARK_MASTER_WEBUI_PORT 8080
#ENV SPARK_WORKER_PORT 8888
#ENV SPARK_WORKER_WEBUI_PORT 8081
#EXPOSE 8080 7077 8888 8081 4040 7001 7002 7003 7004 7005 7006
\ No newline at end of file
#!/usr/bin/env bash
sed '1d' /etc/hosts > tmpHosts
cat tmpHosts > /etc/hosts
rm tmpHosts
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment