From 8dcc37ce18d9dcb5cf4726662e12bd01c63392fb Mon Sep 17 00:00:00 2001 From: Daniele Venzano Date: Thu, 7 Sep 2017 10:47:23 +0200 Subject: [PATCH] Document the new logging system and fix pylint issues --- README.rst | 2 +- docs/config_file.rst | 19 ++++++++++++++---- docs/logging.rst | 39 +++++++++++++++++++++++++++++++------ zoe_api/api_endpoint.py | 1 - zoe_api/rest_api/service.py | 3 +++ zoe_api/web/websockets.py | 8 ++++++++ zoe_master/entrypoint.py | 1 - zoe_master/gelf_listener.py | 1 - 8 files changed, 60 insertions(+), 14 deletions(-) diff --git a/README.rst b/README.rst index 0847b93..5e3079c 100644 --- a/README.rst +++ b/README.rst @@ -11,7 +11,7 @@ Resources: - Documentation: http://docs.zoe-analytics.eu - Roadmap: https://gitlab.eurecom.fr/zoe/main/wikis/home - Mailing list: http://www.freelists.org/list/zoe -- Issue tracker: https://gitlab.eurecom.fr/zoe/main/issues +- Issue tracker: https://github.com/DistributedSystemsGroup/zoe/issues - Stable releases: https://github.com/DistributedSystemsGroup/zoe Zoe applications (ZApps): diff --git a/docs/config_file.rst b/docs/config_file.rst index 44cd7c5..f907fce 100644 --- a/docs/config_file.rst +++ b/docs/config_file.rst @@ -17,14 +17,24 @@ Common options: * ``debug = `` : enable or disable debug log output * ``api-listen-uri = tcp://*:4850`` : ZeroMQ server connection string, used for the master listening endpoint -* ``deployment-name = devel`` : name of this Zoe deployment. Can be used to have multiple Zoe deployments using the same Swarm (devel and prod, for example) +* ``deployment-name = devel`` : name of this Zoe deployment. Can be used to have multiple Zoe deployments using the same back-end (devel and prod, for example) + +Workspaces: + * ``workspace-deployment-path`` : path appended to the ``workspace-base-path`` to distinguish this deployment. If left unspecified it is equal to the deployment name +* ``workspace-base-path = /mnt/zoe-workspaces`` : Base directory where user workspaces will be created. This directory should reside on a shared filesystem visible by all Docker hosts. + +Metrics: + * ``influxdb-dbname = zoe`` : Name of the InfluxDB database to use for storing metrics * ``influxdb-url = http://localhost:8086`` : URL of the InfluxDB service (ex. ) * ``influxdb-enable = False`` : Enable metric output toward influxDB -* ``workspace-base-path = /mnt/zoe-workspaces`` : Base directory where user workspaces will be created. This directory should reside on a shared filesystem visible by all Docker hosts. -* ``overlay-network-name = zoe`` : name of the pre-configured Docker overlay network Zoe should use (Swarm backend) -* ``backend = Swarm`` : ' Name of the backend to enable and use + +Service logs: + +* ``gelf-address = 7896``, help='Enable Docker GELF log output to this destination (ex. udp://1.2.3.4:7896)', default='') + argparser.add_argument('--gelf-listener', type=int, help='Enable the internal GELF log listener on this port, set to 0 to disable', default='7896') + argparser.add_argument('--service-logs-base-path', PostgresQL database options: @@ -65,6 +75,7 @@ Swarm backend options: * ``backend-swarm-tls-cert = cert.pem`` : Docker TLS certificate file * ``backend-swarm-tls-key = key.pem`` : Docker TLS private key file * ``backend-swarm-tls-ca = ca.pem`` : Docker TLS CA certificate file +* ``overlay-network-name = zoe`` : name of the pre-configured Docker overlay network Zoe should use (Swarm backend) Kubernetes backend: diff --git a/docs/logging.rst b/docs/logging.rst index 61212fa..a1cd33f 100644 --- a/docs/logging.rst +++ b/docs/logging.rst @@ -1,11 +1,38 @@ .. _logging: -Container logs -============== +Zoe logs and service output +=========================== -By design Zoe does not involve itself with the output from container processes. The logs can be retrieved with the usual Docker command ``docker logs`` while a container is alive, they are lost forever when the container is deleted. This solution however does not scale very well: to examine logs, users need to have access to the docker commandline tools and to the Swarm they are running in. +Zoe daemons outputs log information on the standard error stream. More verbose output can be enabled by setting the option ``debug`` to ``true``. -In production we recommend to configure your backend to manage the logs according to your policies. Docker Engines, for example, can be configured to send standard output and error to a remote destination in GELF format (others are supported), as soon as they are generated. +The command-line option ``--log-file`` can be used to specify a file where the output should be written. -A popular logging stack that supports GELF is `ELK `_. However, in our experience, web interfaces like Kibana or Graylog are not useful to the Zoe users: they want to quickly dig through logs of their executions to find an error or an interesting number to correlate to some other number in some other log. The web interfaces are slow and cluttered compared to using grep on a text file. -Which alternative is good for you depends on the usage pattern of your users, your log auditing requirements, etc. +Service logs +------------ + +In this section we focus on the output produced by the ZApps and their services. + +Companies and users have a wide variety of requirements for this kind of output: + + * It may need to be stored for auditing or research + * Users need to access it for debugging or to check progress of their executions + * ZApps may generate a lot of output in a very short time: it may become a lot of data moving around + +Because of this in Zoe we decided to leave the maximum freedom to administrators deploying Zoe. By default Zoe does not configure the container back-ends to do anything special with the output of containers, so whatever is configured there is respected by Zoe. + +In this case the logs command line, API and web interface will not be operational. + +Swarm-only integrated log management +------------------------------------ + +When using the Swarm back-end, however, Zoe can configure the containers to produce the output in UDP GELF format and send them to a configured destination, via the ``gelf-address`` option. Each messages is enriched with labels to help matching each log line to the ZApp and service that produced it. + +GELF is understood by many tools, like Graylog or the `ELK `_ and it is possible to store the service output in Elasticsearch and make it searchable via Kibana, for example. + +Additionally the Zoe master can itself be configured to act as a log collector. This is enabled by setting the option ``gelf-listener`` to the port number specified in ``gelf-address``. In this case the Zoe Master will activate a thread that listens on that UDP port. Logs will be stored in files, in a directory hierarchy built as follows:: + + ///.txt + +In this case the logs command line, API and web interface will work normally. + +Please note that the GELF listener implemented in the Zoe Master process is not built to manage high loads of incoming log messages. If the incoming rate is too high, UDP packets (and hence log lines) may be dropped and lost. diff --git a/zoe_api/api_endpoint.py b/zoe_api/api_endpoint.py index f34474f..1d343fc 100644 --- a/zoe_api/api_endpoint.py +++ b/zoe_api/api_endpoint.py @@ -26,7 +26,6 @@ import zoe_lib.applications import zoe_lib.exceptions import zoe_lib.state from zoe_lib.config import get_conf -from zoe_master.backends.swarm.api_client import SwarmClient log = logging.getLogger(__name__) diff --git a/zoe_api/rest_api/service.py b/zoe_api/rest_api/service.py index cd6096c..ce5d92a 100644 --- a/zoe_api/rest_api/service.py +++ b/zoe_api/rest_api/service.py @@ -64,6 +64,9 @@ class ServiceLogsAPI(RequestHandler): """Initializes the request handler.""" self.api_endpoint = kwargs['api_endpoint'] # type: APIEndpoint self.connection_closed = False + self.service_id = None + self.stream = None + self.log_obj = None def set_default_headers(self): """Set up the headers for enabling CORS.""" diff --git a/zoe_api/web/websockets.py b/zoe_api/web/websockets.py index fe3f7ae..b4009dc 100644 --- a/zoe_api/web/websockets.py +++ b/zoe_api/web/websockets.py @@ -38,6 +38,10 @@ class WebSocketEndpointWeb(tornado.websocket.WebSocketHandler): """Initializes the request handler.""" super().initialize() self.api_endpoint = kwargs['api_endpoint'] # type: APIEndpoint + self.uid = None + self.role = None + self.log_obj = None + self.stream = None @catch_exceptions def open(self, *args, **kwargs): @@ -111,3 +115,7 @@ class WebSocketEndpointWeb(tornado.websocket.WebSocketHandler): def on_close(self): """Invoked when the WebSocket is closed.""" log.debug("WebSocket closed") + + def data_received(self, chunk): + """Not implemented as we do not use stream uploads""" + pass diff --git a/zoe_master/entrypoint.py b/zoe_master/entrypoint.py index ff73aa9..fea67de 100644 --- a/zoe_master/entrypoint.py +++ b/zoe_master/entrypoint.py @@ -102,4 +102,3 @@ def main(): metrics.quit() if gelf_listener is not None: gelf_listener.quit() - diff --git a/zoe_master/gelf_listener.py b/zoe_master/gelf_listener.py index 94fb17c..2796be5 100644 --- a/zoe_master/gelf_listener.py +++ b/zoe_master/gelf_listener.py @@ -82,4 +82,3 @@ class GELFListener: """Stops the GELF server.""" self.server.shutdown() self.th.join(0.1) - -- GitLab