Commit a417e08c authored by Daniele Venzano's avatar Daniele Venzano

Jupyter notebook ZApp

parent a237180c
# Created by .ignore support plugin (hsz.mobi)
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
# Translations
*.mo
*.pot
# Django stuff:
*.log
# Sphinx documentation
docs/_build/
# PyBuilder
target/
.idea/
state.zoe
/zoe*.conf
zoepass.csv
stages:
- deploy
variables:
VERSION: $CI_BUILD_ID
images:
image: docker:latest
stage: deploy
before_script:
- mkdir -p /etc/docker/certs.d/$DOCKER_REGISTRY
- cp /registry-ca.crt /etc/docker/certs.d/$DOCKER_REGISTRY/ca.crt
- mkdir -p $HOME/.docker
- echo $DOCKER_AUTH_CONFIG > $HOME/.docker/config.json
- apk update
- apk add python findutils bash
script:
- bash build_all.sh
- python gen_json.py
artifacts:
paths:
- "*.json"
only:
- master
#!/usr/bin/env bash
set -e
if [ ! -d docker ]; then
exit
fi
REPOSITORY=${REPOSITORY:-zapps}
DOCKER_REGISTRY=${DOCKER_REGISTRY:-docker-registry:5000}
VERSION=${VERSION:-`date +%Y%m%d%H%M%S`}
built_images=''
for d in `find docker -mindepth 1 -maxdepth 1 -type d -printf '%f '`; do
pushd docker/${d}
docker build -t ${DOCKER_REGISTRY}/${REPOSITORY}/${d}:${VERSION} .
docker push ${DOCKER_REGISTRY}/${REPOSITORY}/${d}:${VERSION}
popd
built_images+="${DOCKER_REGISTRY}/${REPOSITORY}/${d}:${VERSION}\n"
done
echo "-------------END SCRIPT-----------------"
echo "Images built:"
printf ${built_images}
echo
FROM ubuntu:14.04
MAINTAINER Daniele Venzano <venza@brownhat.org>
RUN apt-get update && apt-get install -y --force-yes software-properties-common python-software-properties
RUN apt-add-repository -y ppa:webupd8team/java
RUN /bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections
RUN apt-get update && apt-get -y install oracle-java7-installer oracle-java7-set-default curl
ENV JAVA_HOME /usr/lib/jvm/java-7-oracle/
RUN sudo sh -c 'echo "deb http://cran.rstudio.com/bin/linux/ubuntu trusty/" >> /etc/apt/sources.list'
RUN apt-key adv --recv-keys --keyserver keyserver.ubuntu.com E084DAB9
WORKDIR /opt
RUN apt-get update && apt-get install -y --force-yes --no-install-recommends \
git \
vim \
wget \
build-essential \
python-dev \
ca-certificates \
bzip2 \
unzip \
libsm6 \
pandoc \
texlive-latex-base \
texlive-latex-extra \
texlive-fonts-extra \
texlive-fonts-recommended \
texlive-generic-recommended \
sudo \
locales \
libxrender1 \
libopenblas-dev \
r-base \
libcurl4-openssl-dev \
libssl-dev \
&& apt-get clean
RUN locale-gen en_US.UTF-8
# Install Tini
RUN wget --quiet https://github.com/krallin/tini/releases/download/v0.6.0/tini && \
echo "d5ed732199c36a1189320e6c4859f0169e950692f451c03e7854243b95f4234b *tini" | sha256sum -c - && \
mv tini /usr/local/bin/tini && \
chmod +x /usr/local/bin/tini
# Configure environment
ENV CONDA_DIR /opt/conda
ENV PATH $CONDA_DIR/bin:$PATH
ENV SHELL /bin/bash
ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
RUN mkdir /root/work && \
mkdir /root/.jupyter && \
mkdir /root/.local
RUN cd /tmp && \
mkdir -p $CONDA_DIR && \
wget http://repo.continuum.io/miniconda/Miniconda3-3.9.1-Linux-x86_64.sh && \
echo "6c6b44acdd0bc4229377ee10d52c8ac6160c336d9cdd669db7371aa9344e1ac3 *Miniconda3-3.9.1-Linux-x86_64.sh" | sha256sum -c - && \
/bin/bash Miniconda3-3.9.1-Linux-x86_64.sh -f -b -p $CONDA_DIR && \
rm Miniconda3-3.9.1-Linux-x86_64.sh && \
$CONDA_DIR/bin/conda install --yes conda==3.14.1
RUN conda install --yes \
'notebook=4.1*' \
terminado \
'ipywidgets=4.1*' \
'pandas=0.18*' \
'matplotlib=1.5*' \
'scipy=0.17*' \
'seaborn=0.6*' \
'scikit-learn=0.17*' \
'statsmodels=0.6.1' \
'ipyparallel' \
'basemap' \
'pillow' \
&& conda clean -yt
RUN /opt/conda/bin/pip install thunder-python showit
RUN wget https://oss.sonatype.org/content/repositories/snapshots/com/github/alexarchambault/jupyter/jupyter-scala-cli_2.11.6/0.2.0-SNAPSHOT/jupyter-scala_2.11.6-0.2.0-SNAPSHOT.tar.xz
RUN tar xvfJ jupyter-scala_2.11.6-0.2.0-SNAPSHOT.tar.xz && rm jupyter-scala_2.11.6-0.2.0-SNAPSHOT.tar.xz
RUN jupyter-scala_2.11.6-0.2.0-SNAPSHOT/bin/jupyter-scala
RUN R -e "install.packages(c('pbdZMQ', 'repr', 'devtools'), repos = 'http://cran.rstudio.com/')"
RUN R -e "devtools::install_github('irkernel/IRdisplay')"
RUN R -e "devtools::install_github('irkernel/IRkernel')"
RUN R -e "IRkernel::installspec(user = FALSE)"
# Configure container startup as root
EXPOSE 8888
WORKDIR /mnt/workspace
ENTRYPOINT ["tini", "--"]
CMD ["start-notebook.sh"]
# Add local files as late as possible to avoid cache busting
COPY files/start-notebook.sh /usr/local/bin/
RUN chmod 755 /usr/local/bin/start-notebook.sh
COPY files/jupyter_notebook_config.py /root/.jupyter/
RUN mkdir -p /root/.ipython/profile_default/startup/
# Jupyter Notebook image
This image contains the Jupyter notebook with a number of useful kernels. It is used by Zoe, the Container Analytics as a
Service system to create on-demand notebooks connected to containerized Spark clusters.
Zoe can be found at: https://github.com/DistributedSystemsGroup/zoe
## Setup
The Dockerfile runs a start script that configures the Notebook using these environment variables:
* SPARK\_MASTER\_IP: IP address of the Spark master this notebook should use for its kernel
* PROXY\_ID: string to use as a prefix for URL paths, for reverse proxying
* SPARK\_EXECUTOR\_RAM: How much RAM to use for each executor spawned by the notebook
# Copyright (c) Jupyter Development Team.
from jupyter_core.paths import jupyter_data_dir
import subprocess
import os
import errno
import stat
PEM_FILE = os.path.join(jupyter_data_dir(), 'notebook.pem')
c = get_config()
c.NotebookApp.ip = '*'
c.NotebookApp.port = 8888
c.NotebookApp.open_browser = False
# Set a certificate if USE_HTTPS is set to any value
if 'USE_HTTPS' in os.environ:
if not os.path.isfile(PEM_FILE):
# Ensure PEM_FILE directory exists
dir_name = os.path.dirname(PEM_FILE)
try:
os.makedirs(dir_name)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(dir_name):
pass
else: raise
# Generate a certificate if one doesn't exist on disk
subprocess.check_call(['openssl', 'req', '-new',
'-newkey', 'rsa:2048', '-days', '365', '-nodes', '-x509',
'-subj', '/C=XX/ST=XX/L=XX/O=generated/CN=generated',
'-keyout', PEM_FILE, '-out', PEM_FILE])
# Restrict access to PEM_FILE
os.chmod(PEM_FILE, stat.S_IRUSR | stat.S_IWUSR)
c.NotebookApp.certfile = PEM_FILE
# Set a password if PASSWORD is set
if 'PASSWORD' in os.environ:
from IPython.lib import passwd
c.NotebookApp.password = passwd(os.environ['PASSWORD'])
del os.environ['PASSWORD']
#!/usr/bin/env bash
set -x
R -e "IRkernel::installspec()"
ipcluster nbextension enable
rm -f jupyter_notebook_config.json
exec jupyter notebook $*
# Copyright (c) 2016, Daniele Venzano
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Spark-Jupyter Zoe application description generator."""
import json
import sys
import os
APP_NAME = 'jupyter-notebook'
ZOE_APPLICATION_DESCRIPTION_VERSION = 3
options = {
'core_limit': {
'value': 4,
'description': 'Notebook core limit'
},
'memory_limit': {
'value': 4 * (1024**3),
'description': 'Notebook memory limit (bytes)'
}
}
REGISTRY = os.getenv("DOCKER_REGISTRY", default="docker-engine:5000")
REPOSITORY = os.getenv("REPOSITORY", default="zapps")
VERSION = os.getenv("VERSION", default="latest")
IMAGE = REGISTRY + '/' + REPOSITORY + '/jupyter-notebook:' + VERSION
def boinc_service(memory_limit, core_limit):
"""
:rtype: dict
"""
service = {
'name': "jupyter",
'image': IMAGE,
'monitor': True,
'resources': {
"memory": {
"min": memory_limit,
"max": memory_limit
},
"cores": {
"min": core_limit,
"max": core_limit
}
},
'ports': [
{
'name': 'Jupyter Notebook interface',
'protocol': 'tcp',
'port_number': 8888,
'url_template': 'http://{ip_port}/'
}
],
'environment': [
["NB_USER", "{user_name}"]
],
'volumes': [],
'command': None,
'total_count': 1,
'essential_count': 1,
'startup_order': 0
}
return service
if __name__ == '__main__':
app = {
'name': APP_NAME,
'version': ZOE_APPLICATION_DESCRIPTION_VERSION,
'will_end': False,
'size': 512,
'services': [
boinc_service(options["memory_limit"]["value"], options["core_limit"]["value"])
]
}
json.dump(app, open("jupyter.json", "w"), sort_keys=True, indent=4)
print("ZApp written")
logo.png

12.5 KB

Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment