diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b5f5a08 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +.DS_Store +**/.DS_Store +examples +_env* +**/_env* diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..08ec465 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +# credentials +_env* +manta* + +# macos frustration +.DS_Store + diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6e4f7a2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,63 @@ +FROM telegraf:1.4-alpine + +# Reset to root user to do some installs +USER root + +# Install packages +RUN apk add --no-cache \ + bash \ + curl \ + jq \ + unzip + +# Add ContainerPilot and its configuration +# Releases at https://github.com/joyent/containerpilot/releases +ENV CONTAINERPILOT_VER 2.3.0 +ENV CONTAINERPILOT file:///etc/containerpilot.json + +RUN export CONTAINERPILOT_CHECKSUM=ec9dbedaca9f4a7a50762f50768cbc42879c7208 \ + && curl --retry 7 --fail -Lso /tmp/containerpilot.tar.gz \ + "https://github.com/joyent/containerpilot/releases/download/${CONTAINERPILOT_VER}/containerpilot-${CONTAINERPILOT_VER}.tar.gz" \ + && echo "${CONTAINERPILOT_CHECKSUM} /tmp/containerpilot.tar.gz" | sha1sum -c \ + && tar zxf /tmp/containerpilot.tar.gz -C /usr/local/bin \ + && rm /tmp/containerpilot.tar.gz + +# The our helper/glue scripts and configuration for this specific app +COPY bin /usr/local/bin +COPY etc /etc + +# Install Consul +# Releases at https://releases.hashicorp.com/consul +RUN export CONSUL_VERSION=0.6.4 \ + && export CONSUL_CHECKSUM=abdf0e1856292468e2c9971420d73b805e93888e006c76324ae39416edcf0627 \ + && curl --retry 7 --fail -vo /tmp/consul.zip "https://releases.hashicorp.com/consul/${CONSUL_VERSION}/consul_${CONSUL_VERSION}_linux_amd64.zip" \ + && echo "${CONSUL_CHECKSUM} /tmp/consul.zip" | sha256sum -c \ + && unzip /tmp/consul -d /usr/local/bin \ + && rm /tmp/consul.zip \ + && mkdir /config + +# Create empty directories for Consul config and data +RUN mkdir -p /etc/consul \ + && chown -R root /etc/consul \ + && mkdir -p /var/lib/consul \ + && chown -R root /var/lib/consul + +# Install Consul template +# Releases at https://releases.hashicorp.com/consul-template/ +RUN export CONSUL_TEMPLATE_VERSION=0.14.0 \ + && export CONSUL_TEMPLATE_CHECKSUM=7c70ea5f230a70c809333e75fdcff2f6f1e838f29cfb872e1420a63cdf7f3a78 \ + && curl --retry 7 --fail -Lso /tmp/consul-template.zip "https://releases.hashicorp.com/consul-template/${CONSUL_TEMPLATE_VERSION}/consul-template_${CONSUL_TEMPLATE_VERSION}_linux_amd64.zip" \ + && echo "${CONSUL_TEMPLATE_CHECKSUM} /tmp/consul-template.zip" | sha256sum -c \ + && unzip /tmp/consul-template.zip -d /usr/local/bin \ + && rm /tmp/consul-template.zip + +# Reset entrypoint from base image +ENTRYPOINT [] + +# Run telegraf +USER root +CMD ["/usr/local/bin/containerpilot", \ + "/entrypoint.sh", \ + "telegraf", \ + "-config", \ + "/etc/telegraf.conf"] diff --git a/README.md b/README.md index fddc0ec..2cdad8b 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,94 @@ # telegraf Work in progress, not stable, expect force pushes of this repo + +containerized telegraf server, based on the official telegraf/1.0 Docker image, adding [ContainerPilot](https://www.joyent.com/containerpilot) to announce this container's telegraf service to a Service Discovery layer, such as Consul or etcd. + +### Usage +Include this image in your Docker Compose project, query Consul for it's IP address and use it in your configurations, easily done via [Consul-Template](https://github.com/hashicorp/consul-template). The default ContainerPilot configuration talks to Consul and assumes the IP address to access consul is passed to the container in an envrionment varible, $CONSUL (or via docker link consul) + +Configuration of telegraf is managed via ContainerPilot `preStart` or `onChange` handlers. + +Telegraf output is convigured with InfluxDB output plugin. By default telegraf is looking for InfluxDB container started in the same cluster, but it's possible to point Telegraf to remove InfluxDB server by uncommenting and setting up INFLUXDB_HOST variable in env.telegraf file + +Telegraf input sources configured with prometheus input plugin and represent a list of urls pointing to container-pilot telemetry endpoints (http://container-ip:9090/metrics). Input sources reloaded automatically with `onChange` event handler. + +### Configuration + +Please run setup.sh to generate required _env file and configure CONSUL env variable. + +You can also check _env.telegraf file. By setting INFLUXDB_HOST variable there you can point telegraf to already running instance of InfluxDB (you have to remove influxdb section from docker-compose.yml in this case). + +### Hello world example + +1. [Get a Joyent account](https://my.joyent.com/landing/signup/) and [add your SSH key](https://docs.joyent.com/public-cloud/getting-started). +1. Install the [Docker Toolbox](https://docs.docker.com/installation/mac/) (including `docker` and `docker-compose`) on your laptop or other environment, as well as the [Joyent Triton CLI](https://www.joyent.com/blog/introducing-the-triton-command-line-tool) (`triton` replaces our old `sdc-*` CLI tools). +1. [Configure Docker and Docker Compose for use with Joyent.](https://docs.joyent.com/public-cloud/api-access/docker) + +Check that everything is configured correctly by running `./setup.sh`. This will check that your environment is setup correctly and will create an `_env` file that includes injecting an environment variable for the Consul hostname into the Telegraf and Nginx containers so we can take advantage of [Triton Container Name Service (CNS)](https://www.joyent.com/blog/introducing-triton-container-name-service). + +Start everything: + +```bash +docker-compose build +docker-compose up -d +``` +In result we'll have 4 containers running: +- consul +- telegraf_nginx_1 - nginx web-server is used just for demo purposes to scale and provide telemetry +- influxdb - currently running locally, but it's possible to connect with existing influxdb server +- telegraf + +To verify telegraf container status you can check container log (there should be a list of records, which indicate attempts to join new input source): +```bash +docker logs telegraf 2>&1 | grep EventMemberJoin +``` +it should display a list of members(input sources) recently added. + +Also you check the list of input source urls for telemetry currently used by telegraf with the following command: +```bash +docker exec -i -t telegraf /bin/grep :9090 /etc/telegraf.conf +``` +the list of urls includes consul container(first one in outout), telegraf container(localhost) and all other urls are nginx-container urls. +So you can check the number of urls in output, substruct 2 and it should give you a number of nginx containers + + +Lets scale up number of nginx containers to 3, wait for 15 seconds (give some time to telegraf to reconfigure itself) and check the number of input urls (or EventMemberJoin events in logs) +```bash +docker-compose scale nginx=3 +sleep 15 + +# check source urls +docker exec -i -t telegraf /bin/grep :9090 /etc/telegraf.conf + +# check logs +docker logs telegraf 2>&1 | grep EventMemberJoin +``` + +Lets scale down number of nginx containers to 1, wait for 15 seconds and check the number of input urls again: +```bash +docker-compose scale nginx=1 +sleep 15 + +# check source urls +docker exec -i -t telegraf /bin/grep :9090 /etc/telegraf.conf +``` + +Finally you can check actual result of telemery aggregation(via telegraf) on InfluxDB server. +You have to open InfluxDB UI with the following command: +```bash +open "http://$(triton ip influxdb):8083/" +``` +choose 'telegraf' database in dropdown located on the top-right corner, type and execute a query +``` +SHOW MEASUREMENTS +``` +there should be a record like 'nginx_connections_load' which represents data coming from nginx telemetry. +And the following query should display a list of nginx specfic telemetry recorods collected during last 5 minutes: +``` +SELECT * FROM nginx_connections_load WHERE time > now() - 5m +``` + +At the end of test you can shutdown containers with the following command: +``` +docker-compose kill +``` diff --git a/bin/reload.sh b/bin/reload.sh new file mode 100755 index 0000000..4c3dc66 --- /dev/null +++ b/bin/reload.sh @@ -0,0 +1,82 @@ +#!/bin/bash + +# Render Telegraf configuration template using values from Consul, +# but do not reload because Telegraf has't started yet +preStart() { + # Do we have env vars for Triton discovery? + # Copy creds from env vars to files on disk + if [ -n ${!TRITON_CREDS_PATH} ] \ + && [ -n ${!TRITON_CA} ] \ + && [ -n ${!TRITON_CERT} ] \ + && [ -n ${!TRITON_KEY} ] + then + mkdir -p ${TRITON_CREDS_PATH} + echo -e "${TRITON_CA}" | tr '#' '\n' > ${TRITON_CREDS_PATH}/ca.pem + echo -e "${TRITON_CERT}" | tr '#' '\n' > ${TRITON_CREDS_PATH}/cert.pem + echo -e "${TRITON_KEY}" | tr '#' '\n' > ${TRITON_CREDS_PATH}/key.pem + fi + + # Are we on Triton? Do we _not_ have a user-defined DC? + # Set the DC automatically from mdata + if [ -n ${TRITON_DC} ] \ + && [ -f "/native/usr/sbin/mdata-get" ] + then + export TRITON_DC=$(/native/usr/sbin/mdata-get sdc:datacenter_name) + fi + + # Create Telegraf config + consul-template \ + -once \ + -dedup \ + -consul-addr ${CONSUL}:8500 \ + -template "/etc/telegraf.ctmpl:/etc/telegraf.conf" +} + + + +# Render Telegraf configuration template using values from Consul, +# then gracefully reload Telegraf +onChange() { + consul-template \ + -once \ + -dedup \ + -consul-addr ${CONSUL}:8500 \ + -template "/etc/telegraf.ctmpl:/etc/telegraf.conf:/usr/local/bin/reload.sh reloadConfig" +} + + + +# SIGHUP to reload the Telegraf config +# However: if if we fire the SIGHUP to Telegraf before it has a chance to +# register the signal handler, then it will immediately exit. +# This checks that Telgraf is listening on port 8094, which should only +# be true after the signal handler is loaded. +reloadConfig() { + while : + do + netstat -ln | grep -q 8094 \ + && pkill -SIGHUP telegraf \ + && break + done +} + + + +help() { + echo "Usage: ./reload.sh preStart => first-run configuration for Telegraf" + echo " ./reload.sh onChange => [default] update Telegraf config on upstream changes" + echo " ./reload.sh reloadConfig => reload Telegraf config on upstream changes" +} + +until + cmd=$1 + if [ -z "$cmd" ]; then + onChange + fi + shift 1 + $cmd "$@" + [ "$?" -ne 127 ] +do + onChange + exit +done diff --git a/bin/sensor.sh b/bin/sensor.sh new file mode 100755 index 0000000..6c1d595 --- /dev/null +++ b/bin/sensor.sh @@ -0,0 +1,36 @@ +#!/bin/bash +set -e + +help() { + echo 'Uses cli tools free and top to determine current CPU and memory usage' + echo 'for the telemetry service.' +} + +# memory usage in percent +sys_memory() { + # awk oneliner to get memory usage + # free -m | awk 'NR==2{printf "Memory Usage: %s/%sMB (%.2f%%)\n", $3,$2,$3*100/$2 }' + # output: + # Memory Usage: 15804/15959MB (99.03%) + (>&2 echo "sys memory check fired") + local memory=$(free -m | awk 'NR==2{printf "%.2f", $3*100/$2 }') + echo ${memory} +} + +# cpu load +sys_cpu() { + # oneliner to display cpu load + # top -bn1 | grep load | awk '{printf "CPU Load: %.2f\n", $(NF-2)}' + (>&2 echo "sys cpu check fired") + local cpuload=$(top -bn1 | grep load | awk '{printf "%.2f", $(NF-2)}') + echo ${cpuload} +} + +cmd=$1 +if [ ! -z "$cmd" ]; then + shift 1 + $cmd "$@" + exit +fi + +help diff --git a/etc/containerpilot.json b/etc/containerpilot.json new file mode 100644 index 0000000..154cbde --- /dev/null +++ b/etc/containerpilot.json @@ -0,0 +1,50 @@ +{ + "consul": "{{ if .CONSUL_AGENT }}localhost{{ else }}{{ .CONSUL }}{{ end }}:8500", + "preStart": "/usr/local/bin/reload.sh preStart", + "services": [ + { + "name": "telegraf", + "port": 8094, + "health": "nc -vz localhost 8094", + "poll": 10, + "ttl": 25 + } + ], + "backends": [ + { + "name": "nginx", + "poll": 7, + "onChange": "/usr/local/bin/reload.sh onChange" + } + ], + "coprocesses": [{{ if .CONSUL_AGENT }} + { + "command": ["/usr/local/bin/consul", "agent", + "-data-dir=/var/lib/consul", + "-config-dir=/etc/consul", + "-rejoin", + "-retry-join", "{{ .CONSUL }}", + "-retry-max", "10", + "-retry-interval", "10s"], + "restarts": "unlimited" + }{{ end }}], + "telemetry": { + "port": 9090, + "sensors": [ + { + "name": "telegraf_sys_memory_percent", + "help": "percentage of memory used", + "type": "gauge", + "poll": 5, + "check": ["/usr/local/bin/sensor.sh", "sys_memory"] + }, + { + "name": "telegraf_sys_cpu_load", + "help": "cpu load", + "type": "gauge", + "poll": 5, + "check": ["/usr/local/bin/sensor.sh", "sys_cpu"] + } + ] + } +} diff --git a/etc/telegraf.conf b/etc/telegraf.conf new file mode 100644 index 0000000..52a5815 --- /dev/null +++ b/etc/telegraf.conf @@ -0,0 +1,215 @@ +# Telegraf Configuration + +# Global tags can be specified here in key="value" format. +[global_tags] + # dc = "us-east-1" # will tag all metrics with dc=us-east-1 + # rack = "1a" + ## Environment variables can be used as tags, and throughout the config file + # user = "$USER" + + +# Configuration for telegraf agent +[agent] + ## Default data collection interval for all inputs + interval = "10s" + ## Rounds collection interval to 'interval' + ## ie, if interval="10s" then always collect on :00, :10, :20, etc. + round_interval = true + + ## Telegraf will send metrics to outputs in batches of at + ## most metric_batch_size metrics. + metric_batch_size = 1000 + ## For failed writes, telegraf will cache metric_buffer_limit metrics for each + ## output, and will flush this buffer on a successful write. Oldest metrics + ## are dropped first when this buffer fills. + metric_buffer_limit = 10000 + + ## Collection jitter is used to jitter the collection by a random amount. + ## Each plugin will sleep for a random time within jitter before collecting. + ## This can be used to avoid many plugins querying things like sysfs at the + ## same time, which can have a measurable effect on the system. + collection_jitter = "0s" + + ## Default flushing interval for all outputs. You shouldn't set this below + ## interval. Maximum flush_interval will be flush_interval + flush_jitter + flush_interval = "10s" + ## Jitter the flush interval by a random amount. This is primarily to avoid + ## large write spikes for users running a large number of telegraf instances. + ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s + flush_jitter = "0s" + + ## By default, precision will be set to the same timestamp order as the + ## collection interval, with the maximum being 1s. + ## Precision will NOT be used for service inputs, such as logparser and statsd. + ## Valid values are "ns", "us" (or "µs"), "ms", "s". + precision = "" + ## Run telegraf in debug mode + debug = false + ## Run telegraf in quiet mode + quiet = false + ## Override default hostname, if empty use os.Hostname() + hostname = "" + ## If set to true, do no set the "host" tag in the telegraf agent. + omit_hostname = false + +############################################################################### +# OUTPUT PLUGINS # +############################################################################### + +# Configuration for influxdb server to send metrics to +[[outputs.influxdb]] + ## The full HTTP or UDP endpoint URL for your InfluxDB instance. + ## Multiple urls can be specified as part of the same cluster, + ## this means that only ONE of the urls will be written to each interval. + # urls = ["udp://localhost:8089"] # UDP endpoint example + urls = ["http://influxdb:8086"] # required (default) + + ## The target database for metrics (telegraf will create it if not exists). + database = "telegraf" # required + + ## Retention policy to write to. Empty string writes to the default rp. + retention_policy = "" + ## Write consistency (clusters only), can be: "any", "one", "quorum", "all" + write_consistency = "any" + + ## Write timeout (for the InfluxDB client), formatted as a string. + ## If not provided, will default to 5s. 0s means no timeout (not recommended). + timeout = "5s" + # username = "telegraf" + # password = "metricsmetricsmetricsmetrics" + ## Set the user agent for HTTP POSTs (can be useful for log differentiation) + # user_agent = "telegraf" + ## Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes) + # udp_payload = 512 + + ## Optional SSL Config + # ssl_ca = "/etc/telegraf/ca.pem" + # ssl_cert = "/etc/telegraf/cert.pem" + # ssl_key = "/etc/telegraf/key.pem" + ## Use SSL but skip chain & host verification + # insecure_skip_verify = false + +############################################################################### +# INPUT PLUGINS # +############################################################################### + +# Read metrics about cpu usage +[[inputs.cpu]] + ## Whether to report per-cpu stats or not + percpu = true + ## Whether to report total system cpu stats or not + totalcpu = true + ## Comment this line if you want the raw CPU time metrics + fielddrop = ["time_*"] + + +# Read metrics about disk usage by mount point +[[inputs.disk]] + ## By default, telegraf gather stats for all mountpoints. + ## Setting mountpoints will restrict the stats to the specified mountpoints. + # mount_points = ["/"] + + ## Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually + ## present on /run, /var/run, /dev/shm or /dev). + ignore_fs = ["tmpfs", "devtmpfs"] + + +# Read metrics about disk IO by device +[[inputs.diskio]] + ## By default, telegraf will gather stats for all devices including + ## disk partitions. + ## Setting devices will restrict the stats to the specified devices. + # devices = ["sda", "sdb"] + ## Uncomment the following line if you need disk serial numbers. + # skip_serial_number = false + + +# Get kernel statistics from /proc/stat +[[inputs.kernel]] + # no configuration + + +# Read metrics about memory usage +[[inputs.mem]] + # no configuration + + +# Get the number of processes and group them by status +[[inputs.processes]] + # no configuration + + +# Read metrics about swap memory usage +[[inputs.swap]] + # no configuration + + +# Read metrics about system load & uptime +[[inputs.system]] + # no configuration + +# # Generic TCP listener +[[inputs.tcp_listener]] + ## Address and port to host TCP listener on + service_address = ":8094" +# +# ## Number of TCP messages allowed to queue up. Once filled, the +# ## TCP listener will start dropping packets. + allowed_pending_messages = 10000 +# +# ## Maximum number of concurrent TCP connections to allow + max_tcp_connections = 250 +# +# ## Data format to consume. +# ## Each data format has it's own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "influx" + + +# # Generic UDP listener +[[inputs.udp_listener]] + ## Address and port to host UDP listener on + service_address = ":8092" +# +# ## Number of UDP messages allowed to queue up. Once filled, the +# ## UDP listener will start dropping packets. + allowed_pending_messages = 10000 +# +# ## Data format to consume. +# ## Each data format has it's own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "influx" + + +# # Gather health check statuses from services registered in Consul +# [[inputs.consul]] +# ## Most of these values defaults to the one configured on a Consul's agent level. +# ## Optional Consul server address (default: "localhost") +# # address = "localhost" +# ## Optional URI scheme for the Consul server (default: "http") +# # scheme = "http" +# ## Optional ACL token used in every request (default: "") +# # token = "" +# ## Optional username used for request HTTP Basic Authentication (default: "") +# # username = "" +# ## Optional password used for HTTP Basic Authentication (default: "") +# # password = "" +# ## Optional data centre to query the health checks from (default: "") +# # datacentre = "" + +# # Read metrics from one or many prometheus clients +[[inputs.prometheus]] + ## An array of urls to scrape metrics from. + urls = ["http://localhost:9090/metrics"] # default + +# ## Use bearer token for authorization +# # bearer_token = /path/to/bearer/token +# +# ## Optional SSL Config +# # ssl_ca = /path/to/cafile +# # ssl_cert = /path/to/certfile +# # ssl_key = /path/to/keyfile +# ## Use SSL but skip chain & host verification +# # insecure_skip_verify = false diff --git a/etc/telegraf.ctmpl b/etc/telegraf.ctmpl new file mode 100644 index 0000000..68deeee --- /dev/null +++ b/etc/telegraf.ctmpl @@ -0,0 +1,120 @@ +# Telegraf Configuration + +# Global tags can be specified here in key="value" format. +[global_tags] + # dc = "us-east-1" # will tag all metrics with dc=us-east-1 + # rack = "1a" + ## Environment variables can be used as tags, and throughout the config file + # user = "$USER" + + +# Configuration for telegraf agent +[agent] + ## Default data collection interval for all inputs + interval = "10s" + ## Rounds collection interval to 'interval' + ## ie, if interval="10s" then always collect on :00, :10, :20, etc. + round_interval = true + + ## Telegraf will send metrics to outputs in batches of at + ## most metric_batch_size metrics. + metric_batch_size = 1000 + ## For failed writes, telegraf will cache metric_buffer_limit metrics for each + ## output, and will flush this buffer on a successful write. Oldest metrics + ## are dropped first when this buffer fills. + metric_buffer_limit = 10000 + + ## Collection jitter is used to jitter the collection by a random amount. + ## Each plugin will sleep for a random time within jitter before collecting. + ## This can be used to avoid many plugins querying things like sysfs at the + ## same time, which can have a measurable effect on the system. + collection_jitter = "0s" + + ## Default flushing interval for all outputs. You shouldn't set this below + ## interval. Maximum flush_interval will be flush_interval + flush_jitter + flush_interval = "10s" + ## Jitter the flush interval by a random amount. This is primarily to avoid + ## large write spikes for users running a large number of telegraf instances. + ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s + flush_jitter = "0s" + + ## By default, precision will be set to the same timestamp order as the + ## collection interval, with the maximum being 1s. + ## Precision will NOT be used for service inputs, such as logparser and statsd. + ## Valid values are "ns", "us" (or "µs"), "ms", "s". + precision = "" + ## Run telegraf in debug mode + debug = false + ## Run telegraf in quiet mode + quiet = false + ## Override default hostname, if empty use os.Hostname() + hostname = "" + ## If set to true, do no set the "host" tag in the telegraf agent. + omit_hostname = false + +############################################################################### +# OUTPUT PLUGINS # +############################################################################### + +# Configuration for influxdb server to send metrics to +[[outputs.influxdb]] + ## The full HTTP or UDP endpoint URL for your InfluxDB instance. + ## Multiple urls can be specified as part of the same cluster, + ## this means that only ONE of the urls will be written to each interval. + # urls = ["udp://localhost:8089"] # UDP endpoint example + {{ $influxdbhost := env "INFLUXDB_HOST" }} + {{ if $influxdbhost }} + urls = ["http://{{ $influxdbhost }}:8086"] # required + {{ else }} + urls = ["http://influxdb:8086"] # required + {{ end }} + + ## The target database for metrics (telegraf will create it if not exists). + {{ $influxdbdatabase := env "INFLUXDB_DATABASE" }} + {{ if $influxdbdatabase }} + database = "{{ $influxdbdatabase }}" # required + {{else}} + database = "telegraf" # required (default) + {{ end }} + + ## Retention policy to write to. Empty string writes to the default rp. + retention_policy = "" + ## Write consistency (clusters only), can be: "any", "one", "quorum", "all" + write_consistency = "any" + + ## Write timeout (for the InfluxDB client), formatted as a string. + ## If not provided, will default to 5s. 0s means no timeout (not recommended). + timeout = "5s" + # username = "telegraf" + # password = "metricsmetricsmetricsmetrics" + ## Set the user agent for HTTP POSTs (can be useful for log differentiation) + # user_agent = "telegraf" + ## Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes) + # udp_payload = 512 + + ## Optional SSL Config + # ssl_ca = "/etc/telegraf/ca.pem" + # ssl_cert = "/etc/telegraf/cert.pem" + # ssl_key = "/etc/telegraf/key.pem" + ## Use SSL but skip chain & host verification + # insecure_skip_verify = false + +############################################################################### +# INPUT PLUGINS # +############################################################################### + +# # Use Prometheus input plugin for Triton Container Monitor metrics +[[inputs.prometheus]] + ## An array of urls to scrape metrics from. + {{ if service "containerpilot" }} + urls = [{{range $i, $e := service "containerpilot"}}{{if ne $i 0}}, {{end}}"http://{{$e.Address}}:{{$e.Port}}/metrics"{{end}}, "http://localhost:9090/metrics"] + {{ else }} + urls = ["http://localhost:9090/metrics"] + {{ end }} + +# ## Optional SSL Config + ssl_ca = "/root/.triton/ca.pem" + ssl_cert = "/root/.triton/cert.pem" + ssl_key = "/root/.triton/key.pem" +# ## Use SSL but skip chain & host verification + insecure_skip_verify = false diff --git a/examples/triton/docker-compose.yml b/examples/triton/docker-compose.yml new file mode 100644 index 0000000..9b1ef7e --- /dev/null +++ b/examples/triton/docker-compose.yml @@ -0,0 +1,64 @@ +version: '2.1' +# Telegraf autopilotpattern demo + +services: + # Telegraf should autodiscovery all Autopilot Pattern containers, + # as well as all instances in Triton Container Monitor (https://docs.joyent.com/public-cloud/api-access/prometheus), + # collect telemetry from there, and send output to influxdb. + telegraf: + image: autopilotpattern/telegraf:${TAG:-latest} + # Use a 128MB instance (also see com.joyent.package label below) + mem_limit: 128m + # Joyent recommends setting instances to always restart on Triton + restart: always + labels: + # This label sets the CNS name, Triton's automatic DNS + # Learn more at https://docs.joyent.com/public-cloud/network/cns + - triton.cns.services=telegraf + # This label selects the proper Joyent resource package + # https://www.joyent.com/blog/optimizing-docker-on-triton#ram-cpu-and-disk-resources-for-your-containers + - com.joyent.package=g4-highcpu-128M + network_mode: bridge + ports: + # You may not want these port declarations for production. Without them, Prometheus will only + # listen on the private network. This will also result in a public prometheus CNS record being created, + # in the triton.zone domain. + - 8094 + - 9090 + env_file: _env + environment: + - CONSUL_AGENT=1 + - CONSUL=telegraph-consul.svc.${TRITON_CNS_SEARCH_DOMAIN_PRIVATE} + + # InfluxDB + influxdb: + image: influxdb:latest + mem_limit: 512m + network_mode: bridge + ports: + - "8083:8083" + - "8086:8086" + - "8090:8090" + env_file: + - '_env' + + # Consul is the service catalog + consul: + image: autopilotpattern/consul:0.7.2-r0.8 + command: > + /usr/local/bin/containerpilot + /bin/consul agent -server + -bootstrap-expect 1 + -config-dir=/etc/consul + -ui-dir /ui + # Change "-bootstrap" to "-bootstrap-expect 3", then scale to 3 or more to + # turn this into an HA Consul raft. + restart: always + mem_limit: 128m + network_mode: bridge + ports: + # As above, this port delcaration should not be made for production. + - 8500 + labels: + - triton.cns.services=telegraph-consul + network_mode: bridge diff --git a/examples/triton/setup.sh b/examples/triton/setup.sh new file mode 100755 index 0000000..0aa976b --- /dev/null +++ b/examples/triton/setup.sh @@ -0,0 +1,124 @@ +#!/bin/bash +set -e -o pipefail + +help() { + echo 'Usage ./setup.sh [-f docker-compose.yml] [-p project]' + echo + echo 'Checks that your Triton and Docker environment is sane and configures' + echo 'an environment file to use.' + echo + echo 'Optional flags:' + echo ' -f use this file as the docker-compose config file' + echo ' -p use this name as the project prefix for docker-compose' +} + + +# default values which can be overriden by -f or -p flags +export COMPOSE_PROJECT_NAME=telegraf +export COMPOSE_FILE= + +# --------------------------------------------------- +# Top-level commands + + +# Check for correct configuration +check() { + + # check for Triton Docker CLI + command -v triton-compose >/dev/null 2>&1 || { + echo + tput rev # reverse + tput bold # bold + echo 'Triton Docker CLI tools are required, but do not appear to be installed.' + tput sgr0 # clear + echo 'See https://github.com/joyent/triton-docker-cli' + exit 1 + } + + # check for Triton CLI tool (it should be installed, given the above, but...) + command -v triton >/dev/null 2>&1 || { + echo + tput rev # reverse + tput bold # bold + echo 'Error! Joyent Triton CLI is required, but does not appear to be installed.' + tput sgr0 # clear + echo 'See https://www.joyent.com/blog/introducing-the-triton-command-line-tool' + exit 1 + } + + # set env vars for everything else that follows + eval "$(triton env ${TRITON_PROFILE})" + TRITON_DC=$(echo $SDC_URL | awk -F"/" '{print $3}' | awk -F'.' '{print $1}') + TRITON_ACCOUNT_UUID=$(triton account get | awk -F": " '/id:/{print $2}') + + # make sure CNS is enabled + local triton_cns_enabled=$(triton account get | awk -F": " '/cns/{print $2}') + if [ ! "true" == "$triton_cns_enabled" ]; then + echo + tput rev # reverse + tput bold # bold + echo 'Error! Triton CNS is required and not enabled.' + tput sgr0 # clear + echo + exit 1 + fi + + + + echo '# Autopilot Pattern Telegraf configuration' > _env + echo >> _env + + echo '# Telegraf output plugin: InfluxDB ' >> _env + echo '# (uncomment to change defaults) ' >> _env + echo '#INFLUXDB_HOST=influxdb # docker alias or real hostname' >> _env + echo '#INFLUXDB_DATABASE=telegraf' >> _env + echo '#INFLUXDB_DATA_ENGINE=tsm1' >> _env + echo >> _env + + echo '# Triton Container Monitor (uses Prometheus input plugin in Telegraf)' >> _env + echo TRITON_ACCOUNT_UUID=${TRITON_ACCOUNT_UUID} >> _env + echo '# This works for Triton Public Cloud, but change it for other clouds:' >> _env + echo TRITON_CNS_SUFFIX=.triton.zone >> _env + echo '# Leave empty or unset and Autopilot Pattern Telegraf will automatically detect the DC:' >> _env + echo '#TRITON_DC=' >> _env + echo >> _env + + echo '# Triton Container Monitor authentication' >> _env + TRITON_CREDS_PATH=/root/.triton + echo TRITON_CREDS_PATH=${TRITON_CREDS_PATH} >> _env + echo TRITON_CA=$(cat "${DOCKER_CERT_PATH}"/ca.pem | tr '\n' '#') >> _env + echo TRITON_CA_PATH=${TRITON_CREDS_PATH}/ca.pem >> _env + echo TRITON_KEY=$(cat "${DOCKER_CERT_PATH}"/key.pem | tr '\n' '#') >> _env + echo TRITON_KEY_PATH=${TRITON_CREDS_PATH}/key.pem >> _env + echo TRITON_CERT=$(cat "${DOCKER_CERT_PATH}"/cert.pem | tr '\n' '#') >> _env + echo TRITON_CERT_PATH=${TRITON_CREDS_PATH}/cert.pem >> _env + echo >> _env +} + +# --------------------------------------------------- +# parse arguments + +while getopts "f:p:h" optchar; do + case "${optchar}" in + f) export COMPOSE_FILE=${OPTARG} ;; + p) export COMPOSE_PROJECT_NAME=${OPTARG} ;; + esac +done +shift $(expr $OPTIND - 1 ) + +until + cmd=$1 + if [ ! -z "$cmd" ]; then + shift 1 + $cmd "$@" + if [ $? == 127 ]; then + help + fi + exit + fi +do + echo +done + +# default behavior +check diff --git a/makefile b/makefile new file mode 100644 index 0000000..27f2897 --- /dev/null +++ b/makefile @@ -0,0 +1,51 @@ +# Makefile for building and shipping the container image. + +MAKEFLAGS += --warn-undefined-variables +.DEFAULT_GOAL := build +.PHONY: * + +# we get these from CI environment if available, otherwise from git +GIT_COMMIT ?= $(shell git rev-parse --short HEAD) +GIT_BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD) + +namespace ?= autopilotpattern +tag := branch-$(shell basename $(GIT_BRANCH)) +image := $(namespace)/telegraf + +## Display this help message +help: + @awk '/^##.*$$/,/[a-zA-Z_-]+:/' $(MAKEFILE_LIST) | awk '!(NR%2){print $$0p}{p=$$0}' | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' | sort + + +# ------------------------------------------------ +# Container builds + +## Builds the application container image locally +build: + docker build -t=$(image):$(tag) . + +## Push the current application container images to the Docker Hub +push: + docker push $(image):$(tag) + +## Tag the current images as 'latest' and push them to the Docker Hub +ship: + docker tag $(image):$(tag) $(image):latest + docker push $(image):$(tag) + docker push $(image):latest + + +# ------------------------------------------------ +# Test running + +## Print environment for build debugging +debug: + @echo GIT_COMMIT=$(GIT_COMMIT) + @echo GIT_BRANCH=$(GIT_BRANCH) + @echo namespace=$(namespace) + @echo tag=$(tag) + @echo image=$(image) + +check_var = $(foreach 1,$1,$(__check_var)) +__check_var = $(if $(value $1),,\ + $(error Missing $1 $(if $(value 2),$(strip $2))))