From 622f77cb3aaad2be4223e8fa30cfe1b751da2664 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 10 Jun 2025 13:55:41 +0000 Subject: [PATCH 1/9] Add test for quickstart scripts --- .devcontainer/devcontainer.json | 3 +- .github/workflows/observability-docker.yml | 49 +++++++++++++++++ .../examples/full/full-quickstart.sh | 9 +++- .../scrape-configs/exporters/exporters.yml | 10 ++-- observability/examples/simple/quickstart.sh | 4 +- observability/grafana-alloy/Dockerfile | 2 + .../prometheus/Dockerfile.prometheus | 3 ++ observability/test/health-check.py | 53 +++++++++++++++++++ observability/test/requirements.txt | 5 ++ observability/test/test-quickstart-full.sh | 25 +++++++++ observability/test/test-quickstart-simple.sh | 26 +++++++++ 11 files changed, 179 insertions(+), 10 deletions(-) create mode 100644 observability/test/health-check.py create mode 100644 observability/test/requirements.txt create mode 100644 observability/test/test-quickstart-full.sh create mode 100644 observability/test/test-quickstart-simple.sh diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 87a4344..368810b 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -5,7 +5,8 @@ // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile "image": "mcr.microsoft.com/devcontainers/base:jammy", "features": { - "ghcr.io/devcontainers/features/docker-outside-of-docker:1": {} + "ghcr.io/devcontainers/features/docker-outside-of-docker:1": {}, + "ghcr.io/devcontainers/features/python:1": {} }, // Features to add to the dev container. More info: https://containers.dev/features. diff --git a/.github/workflows/observability-docker.yml b/.github/workflows/observability-docker.yml index 95927e9..3071eea 100644 --- a/.github/workflows/observability-docker.yml +++ b/.github/workflows/observability-docker.yml @@ -66,3 +66,52 @@ jobs: push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} + + + + + test-observability-quickstart-scripts: + runs-on: ubuntu-latest + name: Test observability Quickstart scripts + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.9' + + - name: Install Python dependencies + run: pip install -r observability/test/requirements.txt + + - name: Run Simple quickstart test + run: | + echo "πŸ§ͺ Running Simple quickstart test..." + bash observability/test/test-quickstart-simple.sh + + - name: Report test result + if: always() + run: | + if [ $? -eq 0 ]; then + echo "βœ… Full quickstart test PASSED" + else + echo "❌ Full quickstart test FAILED" + exit 1 + fi + + - name: Run full quickstart test + run: | + echo "πŸ§ͺ Running full quickstart test..." + ./observability/test/test-quickstart-full.sh + + - name: Report test result + if: always() + run: | + if [ $? -eq 0 ]; then + echo "βœ… Full quickstart test PASSED" + else + echo "❌ Full quickstart test FAILED" + exit 1 + \ No newline at end of file diff --git a/observability/examples/full/full-quickstart.sh b/observability/examples/full/full-quickstart.sh index 319c67e..d373932 100644 --- a/observability/examples/full/full-quickstart.sh +++ b/observability/examples/full/full-quickstart.sh @@ -20,10 +20,15 @@ download_to docker-compose.yml download_to exporters.docker-compose.yml download_to exporters.elastic.docker-compose.yml -download_to alloy/probers/probe-internal.yml +download_to alloy/probers/probe-observability.yml download_to alloy/probers/probe-external.yml download_to prometheus/scrape-configs/exporters/exporters.yml download_to prometheus/scrape-configs/recording-rules/slo.yml - echo "Setup complete in cogstack-observability/" + +echo "Starting the observability stack" + +docker compose up -d + +echo "Please open http://localhost/grafana in your browser" \ No newline at end of file diff --git a/observability/examples/full/prometheus/scrape-configs/exporters/exporters.yml b/observability/examples/full/prometheus/scrape-configs/exporters/exporters.yml index 004fc0e..122e733 100644 --- a/observability/examples/full/prometheus/scrape-configs/exporters/exporters.yml +++ b/observability/examples/full/prometheus/scrape-configs/exporters/exporters.yml @@ -1,6 +1,6 @@ # Scrape metrics from any targets that are exposing Prometheus formatted data. Default is to call the /metrics API. -- targets: - - cogstack-observability-node-exporter-1:9100 - labels: - job: node_exporter - host: localhost \ No newline at end of file +# - targets: +# - cogstack-observability-node-exporter-1:9100 +# labels: +# job: node_exporter +# host: localhost \ No newline at end of file diff --git a/observability/examples/simple/quickstart.sh b/observability/examples/simple/quickstart.sh index 577430b..8d38c05 100644 --- a/observability/examples/simple/quickstart.sh +++ b/observability/examples/simple/quickstart.sh @@ -9,8 +9,8 @@ curl -fsSL -o docker-compose.yml \ https://raw.githubusercontent.com/CogStack/cogstack-platform-toolkit/main/observability/examples/simple/docker-compose.yml echo "Downloading probe-simple.yml into alloy/probers/..." -curl -fsSL -o probers/probe-observability.yml \ - https://raw.githubusercontent.com/CogStack/cogstack-platform-toolkit/main/observability/examples/simple/probers/probe-observability.yml +curl -fsSL -o alloy/probers/probe-observability.yml \ + https://raw.githubusercontent.com/CogStack/cogstack-platform-toolkit/main/observability/examples/simple/alloy/probers/probe-observability.yml echo "Setup complete in observability-simple/" diff --git a/observability/grafana-alloy/Dockerfile b/observability/grafana-alloy/Dockerfile index 525e85f..c4353de 100644 --- a/observability/grafana-alloy/Dockerfile +++ b/observability/grafana-alloy/Dockerfile @@ -3,6 +3,8 @@ FROM grafana/alloy:latest LABEL traefik.enable="true" \ traefik.http.routers.alloy.rule="PathPrefix(`/alloy`)" +EXPOSE 12345 + RUN mkdir -p /etc/alloy/probers COPY ./defaults /etc/alloy diff --git a/observability/prometheus/Dockerfile.prometheus b/observability/prometheus/Dockerfile.prometheus index a743fcb..2a5b6fe 100644 --- a/observability/prometheus/Dockerfile.prometheus +++ b/observability/prometheus/Dockerfile.prometheus @@ -3,6 +3,9 @@ FROM prom/prometheus LABEL traefik.enable="true" \ traefik.http.routers.prometheus-path-router.rule="PathPrefix(`/prometheus`)" +RUN mkdir -p /etc/prometheus/cogstack/site/scrape-targets/probers +RUN mkdir -p /etc/prometheus/cogstack/site/scrape-targets/exporters + COPY ./defaults /etc/prometheus/cogstack/defaults CMD [ \ diff --git a/observability/test/health-check.py b/observability/test/health-check.py new file mode 100644 index 0000000..0fe28cd --- /dev/null +++ b/observability/test/health-check.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +import requests +import time +import sys +from urllib3.exceptions import InsecureRequestWarning + +# Disable SSL warnings for localhost +requests.packages.urllib3.disable_warnings(InsecureRequestWarning) + +def check_service_health(url, service_name, timeout=120): + """Check if a service is responding with HTTP 200""" + print(f"πŸ” Checking {service_name} at {url}...") + + start_time = time.time() + while time.time() - start_time < timeout: + try: + print(f"Calling {url}...") + response = requests.get(url, timeout=10, verify=False) + if response.status_code == 200: + print(f"βœ… {service_name} is responding (HTTP {response.status_code})") + return True + except requests.exceptions.RequestException as e: + print("Failed to call URL", e) + pass + + print(f"⏳ Waiting for {service_name}... ({int(time.time() - start_time)}s)") + time.sleep(5) + + print(f"❌ {service_name} failed to respond after {timeout}s") + return False + +def main(): + services = [ + ("http://host.docker.internal/grafana", "Grafana"), + ("http://host.docker.internal/prometheus", "Prometheus"), + ("http://host.docker.internal/alloy", "Prometheus"), + ] + + all_healthy = True + for url, name in services: + if not check_service_health(url, name): + all_healthy = False + + if all_healthy: + print("\nπŸŽ‰ All services are running successfully!") + print("\nπŸ“Š Access your services:") + return 0 + else: + print("\n❌ Some services failed to start properly") + return 1 + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/observability/test/requirements.txt b/observability/test/requirements.txt new file mode 100644 index 0000000..73c1849 --- /dev/null +++ b/observability/test/requirements.txt @@ -0,0 +1,5 @@ +certifi==2025.4.26 +charset-normalizer==3.4.2 +idna==3.10 +requests==2.32.4 +urllib3==2.4.0 diff --git a/observability/test/test-quickstart-full.sh b/observability/test/test-quickstart-full.sh new file mode 100644 index 0000000..c6a186e --- /dev/null +++ b/observability/test/test-quickstart-full.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -e +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +echo "πŸš€ Starting deployment test..." + +# Run the quickstart script +echo "πŸ“¦ Running quickstart..." +cd ${SCRIPT_DIR}../examples/full/ +bash ./full-quickstart.sh + +# Run the health check +echo "πŸ” Running health check..." +python ../../test/health-check.py + +# Check if health check was successful +if [ $? -eq 0 ]; then + echo "βœ… Success! All services are running." + docker compose down + exit 0 +else + echo "❌ Health check failed." + exit 1 +fi + diff --git a/observability/test/test-quickstart-simple.sh b/observability/test/test-quickstart-simple.sh new file mode 100644 index 0000000..192ca38 --- /dev/null +++ b/observability/test/test-quickstart-simple.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -e +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + + +echo "πŸš€ Starting deployment test..." + +# Run the quickstart script +echo "πŸ“¦ Running quickstart..." +cd ${SCRIPT_DIR}/../examples/simple/ +bash ./quickstart.sh + +# Run the health check +echo "πŸ” Running health check..." +python ../../test/health-check.py + +# Check if health check was successful +if [ $? -eq 0 ]; then + echo "βœ… Success! All services are running." + docker compose down + exit 0 +else + echo "❌ Health check failed." + exit 1 +fi + From 0f7827d1a7f6f3566a02e56e7c8ad3b1b2b9162b Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 10 Jun 2025 14:08:42 +0000 Subject: [PATCH 2/9] Add env var for localhost --- observability/test/health-check.py | 10 +++++++--- observability/test/test-quickstart-full.sh | 2 +- observability/test/test-quickstart-simple.sh | 1 - 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/observability/test/health-check.py b/observability/test/health-check.py index 0fe28cd..b8a37e1 100644 --- a/observability/test/health-check.py +++ b/observability/test/health-check.py @@ -2,6 +2,7 @@ import requests import time import sys +import os from urllib3.exceptions import InsecureRequestWarning # Disable SSL warnings for localhost @@ -30,10 +31,13 @@ def check_service_health(url, service_name, timeout=120): return False def main(): + ''' Check if services are running. For use in a container, first export HEALTH_CHECK_URL env var to be host.docker.internal ''' + + localhost_url = os.environ.get('HEALTH_CHECK_URL', "localhost") services = [ - ("http://host.docker.internal/grafana", "Grafana"), - ("http://host.docker.internal/prometheus", "Prometheus"), - ("http://host.docker.internal/alloy", "Prometheus"), + (f"http://{localhost_url}/grafana", "Grafana"), + (f"http://{localhost_url}/prometheus", "Prometheus"), + (f"http://{localhost_url}/alloy", "Prometheus"), ] all_healthy = True diff --git a/observability/test/test-quickstart-full.sh b/observability/test/test-quickstart-full.sh index c6a186e..0a06ca8 100644 --- a/observability/test/test-quickstart-full.sh +++ b/observability/test/test-quickstart-full.sh @@ -6,7 +6,7 @@ echo "πŸš€ Starting deployment test..." # Run the quickstart script echo "πŸ“¦ Running quickstart..." -cd ${SCRIPT_DIR}../examples/full/ +cd ${SCRIPT_DIR}/../examples/full/ bash ./full-quickstart.sh # Run the health check diff --git a/observability/test/test-quickstart-simple.sh b/observability/test/test-quickstart-simple.sh index 192ca38..725a958 100644 --- a/observability/test/test-quickstart-simple.sh +++ b/observability/test/test-quickstart-simple.sh @@ -2,7 +2,6 @@ set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - echo "πŸš€ Starting deployment test..." # Run the quickstart script From 9fff2b2c96b430aa5987f170a2af51aa5a45569e Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 10 Jun 2025 14:13:59 +0000 Subject: [PATCH 3/9] Add test for documentation examples --- observability/test/health-check.py | 2 +- observability/test/test-quickstart-full.sh | 2 +- observability/test/test-quickstart-simple.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/observability/test/health-check.py b/observability/test/health-check.py index b8a37e1..7fbc896 100644 --- a/observability/test/health-check.py +++ b/observability/test/health-check.py @@ -37,7 +37,7 @@ def main(): services = [ (f"http://{localhost_url}/grafana", "Grafana"), (f"http://{localhost_url}/prometheus", "Prometheus"), - (f"http://{localhost_url}/alloy", "Prometheus"), + # (f"http://{localhost_url}/alloy", "Alloy"), ] all_healthy = True diff --git a/observability/test/test-quickstart-full.sh b/observability/test/test-quickstart-full.sh index 0a06ca8..093a038 100644 --- a/observability/test/test-quickstart-full.sh +++ b/observability/test/test-quickstart-full.sh @@ -11,7 +11,7 @@ bash ./full-quickstart.sh # Run the health check echo "πŸ” Running health check..." -python ../../test/health-check.py +python -u ../../test/health-check.py # Check if health check was successful if [ $? -eq 0 ]; then diff --git a/observability/test/test-quickstart-simple.sh b/observability/test/test-quickstart-simple.sh index 725a958..4e35a31 100644 --- a/observability/test/test-quickstart-simple.sh +++ b/observability/test/test-quickstart-simple.sh @@ -11,7 +11,7 @@ bash ./quickstart.sh # Run the health check echo "πŸ” Running health check..." -python ../../test/health-check.py +python -u ../../test/health-check.py # Check if health check was successful if [ $? -eq 0 ]; then From f8ceeb134242a411f91c0d97db38ce497ed06301 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 10 Jun 2025 14:19:00 +0000 Subject: [PATCH 4/9] Add test for documentation examples --- .github/workflows/observability-docker.yml | 21 ++------- docs/observability/setup/production-setup.md | 46 ++++++++++---------- observability/test/test-quickstart-full.sh | 25 ----------- observability/test/test-quickstart-simple.sh | 25 ----------- 4 files changed, 26 insertions(+), 91 deletions(-) delete mode 100644 observability/test/test-quickstart-full.sh delete mode 100644 observability/test/test-quickstart-simple.sh diff --git a/.github/workflows/observability-docker.yml b/.github/workflows/observability-docker.yml index 3071eea..bc32193 100644 --- a/.github/workflows/observability-docker.yml +++ b/.github/workflows/observability-docker.yml @@ -89,29 +89,14 @@ jobs: - name: Run Simple quickstart test run: | echo "πŸ§ͺ Running Simple quickstart test..." - bash observability/test/test-quickstart-simple.sh + bash observability/test/test-quickstart.sh - name: Report test result if: always() run: | if [ $? -eq 0 ]; then - echo "βœ… Full quickstart test PASSED" + echo "βœ… quickstart test PASSED" else - echo "❌ Full quickstart test FAILED" + echo "❌ quickstart test FAILED" exit 1 fi - - - name: Run full quickstart test - run: | - echo "πŸ§ͺ Running full quickstart test..." - ./observability/test/test-quickstart-full.sh - - - name: Report test result - if: always() - run: | - if [ $? -eq 0 ]; then - echo "βœ… Full quickstart test PASSED" - else - echo "❌ Full quickstart test FAILED" - exit 1 - \ No newline at end of file diff --git a/docs/observability/setup/production-setup.md b/docs/observability/setup/production-setup.md index 054f11b..f5bb772 100644 --- a/docs/observability/setup/production-setup.md +++ b/docs/observability/setup/production-setup.md @@ -7,31 +7,13 @@ If you're new, we recommend completing the [Quickstart Tutorial](../get-started/ By the end of the tutorial, you will have a complete stack offering all the observability features, customized to your usage. We will run the stack and then: -- Configure *Telemetry* like VM memory usage, and Elasticsearch index size, by running Exporters -- Enable *Alerting* based on our availability and a defined Service Level Objective (SLO) - Setup further *Probing* of our running services to get availability metrics - +- Configure *Telemetry* like VM memory usage, and Elasticsearch index size, by running Grafana Alloy +- Enable *Alerting* based on our availability and a defined Service Level Objective (SLO) --- -## Step 1: Understand the Folder Structure - -Your project configuration should follow this structure: - -``` -observability.docker-compose.yml -exporters.docker-compose.yml -alloy/ - probers/ # HTTP endpoints to check availability - blackbox-exporter/ # (Optional) Custom Probe configuration -prometheus/ - scrape-configs/ - exporters/ # Targets that expose metrics (e.g. Elasticsearch, Docker, VMs) - recording-rules/ # Prometheus recording rules (e.g. for SLOs, summaries) -grafana/ # (Optional) Custom Grafana dashboards and config -``` - -## Step 2: Initialise the project +## Step 1: Initialise the project Run: ```bash @@ -56,18 +38,36 @@ Downloads the configurations: -Inspect the results in your local directory, and see that it matches the folder layout defined in step 1. +## Step 2: Understand the Folder structure + +Your project configuration will be created with follow this structure: + +``` +observability.docker-compose.yml +exporters.docker-compose.yml +alloy/ + probers/ # HTTP endpoints to check availability + blackbox-exporter/ # (Optional) Custom Probe configurations like auth details +prometheus/ + scrape-configs/ + exporters/ # Targets that expose metrics (e.g. Elasticsearch, Docker, VMs) + recording-rules/ # Prometheus recording rules (e.g. for SLOs, summaries) +grafana/ # (Optional) Custom Grafana dashboards and config +``` + +Inspect the results of the script and see that it matches this layout + ## Step 3: Run the Stack The files come with basic defaults, so we can now run the stack - ``` docker compose up -d ``` This will launch Prometheus, Grafana, and Alloy +Navigate to the dashboard urls on `http://localhost/grafana` to view the dashboards. ## Step 4: Create Site-Specific Config Files diff --git a/observability/test/test-quickstart-full.sh b/observability/test/test-quickstart-full.sh deleted file mode 100644 index 093a038..0000000 --- a/observability/test/test-quickstart-full.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -set -e -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -echo "πŸš€ Starting deployment test..." - -# Run the quickstart script -echo "πŸ“¦ Running quickstart..." -cd ${SCRIPT_DIR}/../examples/full/ -bash ./full-quickstart.sh - -# Run the health check -echo "πŸ” Running health check..." -python -u ../../test/health-check.py - -# Check if health check was successful -if [ $? -eq 0 ]; then - echo "βœ… Success! All services are running." - docker compose down - exit 0 -else - echo "❌ Health check failed." - exit 1 -fi - diff --git a/observability/test/test-quickstart-simple.sh b/observability/test/test-quickstart-simple.sh deleted file mode 100644 index 4e35a31..0000000 --- a/observability/test/test-quickstart-simple.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -set -e -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -echo "πŸš€ Starting deployment test..." - -# Run the quickstart script -echo "πŸ“¦ Running quickstart..." -cd ${SCRIPT_DIR}/../examples/simple/ -bash ./quickstart.sh - -# Run the health check -echo "πŸ” Running health check..." -python -u ../../test/health-check.py - -# Check if health check was successful -if [ $? -eq 0 ]; then - echo "βœ… Success! All services are running." - docker compose down - exit 0 -else - echo "❌ Health check failed." - exit 1 -fi - From aa16a953e3da159e1519443c211ca8235b64a866 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 10 Jun 2025 14:19:08 +0000 Subject: [PATCH 5/9] Add test for documentation examples --- observability/test/test-quickstart.sh | 45 +++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 observability/test/test-quickstart.sh diff --git a/observability/test/test-quickstart.sh b/observability/test/test-quickstart.sh new file mode 100644 index 0000000..4c57a3b --- /dev/null +++ b/observability/test/test-quickstart.sh @@ -0,0 +1,45 @@ +#!/bin/bash +set -e +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +echo "πŸš€ Starting deployment test..." + +# Run the simple quickstart script +echo "πŸ“¦ Running quickstart..." +cd ${SCRIPT_DIR}/../examples/simple/ +bash ./quickstart.sh + +# Run the health check +echo "πŸ” Running health check..." +python -u ../../test/health-check.py + +# Check if health check was successful +if [ $? -eq 0 ]; then + echo "βœ… Success! All services are running for Simple Quickstart." + docker compose down + exit 0 +else + echo "❌ Health check failed for simple quickstart." + exit 1 +fi + + +# Run the quickstart script +echo "πŸ“¦ Running full quickstart..." +cd ${SCRIPT_DIR}/../examples/full/ +bash ./full-quickstart.sh + +# Run the health check +echo "πŸ” Running health check..." +python -u ../../test/health-check.py + +# Check if health check was successful +if [ $? -eq 0 ]; then + echo "βœ… Success! All services are running for Full Quickstart." + docker compose down + exit 0 +else + echo "❌ Health check failed for full quickstart." + exit 1 +fi + From 869849a7e824e4c91ba451bf1aa9ffd9622dc8ba Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 10 Jun 2025 14:30:03 +0000 Subject: [PATCH 6/9] Update documentation --- .../customization/blackbox-exporter-config.md | 15 ++++------ docs/observability/setup/probing.md | 8 +++--- docs/observability/setup/production-setup.md | 8 +++--- docs/observability/setup/telemetry.md | 28 ++++++++++++++++++- 4 files changed, 41 insertions(+), 18 deletions(-) diff --git a/docs/observability/customization/blackbox-exporter-config.md b/docs/observability/customization/blackbox-exporter-config.md index dbc1a24..79e72f2 100644 --- a/docs/observability/customization/blackbox-exporter-config.md +++ b/docs/observability/customization/blackbox-exporter-config.md @@ -11,7 +11,7 @@ You will need to create a new file, and then mount it over the existing provided 1. Create a new file: ``` - prometheus/blackbox-exporter/custom-blackbox-config.yml + alloy/blackbox-exporter.yml ``` 2. Add the existing defaults @@ -67,13 +67,10 @@ In your probe YAML file, reference the module in the `module` field of the `labe You lastly need to mount the new config file and refer to it in docker compose ``` - blackbox-exporter: - image: cogstacksystems/cogstack-observability-blackbox-exporter:latest - restart: unless-stopped - networks: - - observability + alloy: + image: cogstacksystems/cogstack-observability-alloy:latest +... volumes: - - ./prometheus/blackbox-exporter:/config - command: - - "--config.file=/config/custom-blackbox-config.yml" + - ${BASE_DIR-.}/alloy/blackbox-exporter.yml:/etc/alloy/blackbox-exporter.yml +... ``` diff --git a/docs/observability/setup/probing.md b/docs/observability/setup/probing.md index 5555986..dd3e51f 100644 --- a/docs/observability/setup/probing.md +++ b/docs/observability/setup/probing.md @@ -1,6 +1,6 @@ # Availability -This guide explains how to configure HTTP probers using Blackbox Exporter to monitor the availability of your services. These probers generate uptime and latency metrics, which can then be visualized in Grafana. +This guide explains how to configure HTTP probers using Blackbox Exporter in Grafana Alloy to monitor the availability of your services. These probers generate uptime and latency metrics, which can then be visualized in Grafana. See the [Reference](../reference/understanding-metrics.md) for an explanation of the metrics this generates. @@ -13,7 +13,7 @@ To add a new prober target: 1. Navigate to the folder: ``` - prometheus/scrape-configs/probers/ + alloy/probers/ ``` 2. Create a new YAML file (e.g., `probe.my-services.yml`) with the following structure: @@ -27,10 +27,10 @@ To add a new prober target: job: my-services # Mandatory - used to group probes in dashboards ip_address: "10.0.0.12" # Optional - IP of the host being probed host: service-hostname # Optional - Human-readable hostname - region: eu-west # Optional - Any additional metadata label + any_custom_field: anything # Optional - Add as many labels as desired here ``` -3. Ensure the folder is mounted in docker under `/etc/prometheus/cogstack/site/prometheus/scrape-configs/probers`, which it should be by default if you've followed the setup guids. Any valid `.yml` files in this folder will be automatically picked up and used as Blackbox targets. +3. Ensure the folder is mounted in docker under `/etc/alloy/probers`, which it should be by default if you've followed the setup guids. Any valid `.yml` files in this folder will be automatically picked up and used as Blackbox targets. --- diff --git a/docs/observability/setup/production-setup.md b/docs/observability/setup/production-setup.md index f5bb772..17bb80d 100644 --- a/docs/observability/setup/production-setup.md +++ b/docs/observability/setup/production-setup.md @@ -70,7 +70,7 @@ This will launch Prometheus, Grafana, and Alloy Navigate to the dashboard urls on `http://localhost/grafana` to view the dashboards. -## Step 4: Create Site-Specific Config Files +## Step 4: Create Site-Specific Probing files You must provide your own scrape and recording rules to tell Prometheus what to monitor. This is probably the hardest step: You will actually need to know what is running, and where it is! Building out these config files will give you that inventory, and give a real definition of what is running where. @@ -79,9 +79,6 @@ This is probably the hardest step: You will actually need to know what is runnin - Add files in `alloy/probers/*.yml` - [Configure Probers](./probing.md) -- Telemetry: Run Grafana Alloy on every VM you want telemetry from - - [Configure Telemetry](./probing.md) - - Recording Rules: Define uptime goals or custom aggregations - Add files in `recording-rules/*.yml` - [Enable Alerting](./alerting.md) @@ -94,6 +91,9 @@ Use the example docker compose file in [exporters.docker-compose.yml](../../../o ``` docker compose -f exporters.docker-compose.yml up -d ``` + +See [Configure Telemetry](./telemetry.md) for the full details + --- ## What’s Next? diff --git a/docs/observability/setup/telemetry.md b/docs/observability/setup/telemetry.md index 3544e60..121af5b 100644 --- a/docs/observability/setup/telemetry.md +++ b/docs/observability/setup/telemetry.md @@ -12,6 +12,32 @@ Grafana Alloy is used to get telemetry. These features are configured by default ## How to get Telemetry -- Copy this docker compose file: (exporters.docker-compose.yml)[observability/examples/full/exporters.docker-compose.yml] +- Copy this docker compose file: [exporters.docker-compose.yml](observability/examples/full/exporters.docker-compose.yml) - Edit the environment variables to point to your prometheus URL - Run `docker compose -f exporters.docker-compose.yml up -d ` on every VM you want metrics from + + +### Elastic Search Metrics +To get elasticsearch metrics we have to mount an alloy config file into the image. + +- Copy this docker compose file: [exporters.elastic.docker-compose.yml](observability/examples/full/exporters.elastic.docker-compose.yml) +- Copy this configuration file [elasticsearch.alloy](../../../observability/examples/full/alloy/elasticsearch.alloy) into `alloy/elasticsearch.alloy` + +In the docker compose file, we can see there are two changes to the usual exporter: + +```yaml + volumes: + - ${BASE_DIR-.}/alloy/elasticsearch.alloy:/etc/alloy/elasticsearch.alloy # Enable Elastic Exporter + ... + environment: + - ELASTICSEARCH_URL=${ELASTICSEARCH_URL-https://elasticsearch-1:9200} + - ELASTICSEARCH_USERNAME=${ELASTICSEARCH_USERNAME-user} # Used to get metrics from Elasticsearch + - ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_PASSWORD-pass} # Used to get metrics from Elasticsearch +``` + +By adding the alloy config file, and the elasticsearch environment details, Alloy will run and get metrics from elasticsearch. + +This pattern can be used to customize grafana alloy fully: mount any config files you want to in `/etc/alloy` and it will read them. + +## Next Steps +- You can fully customise Grafana Alloy by mounting your own alloy config files \ No newline at end of file From d16103948971fa4f6929646f447550328fc2ac0c Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 10 Jun 2025 14:38:40 +0000 Subject: [PATCH 7/9] Minor cleanup of docs --- .../customization/blackbox-exporter-config.md | 8 ++++---- docs/observability/reference/quickstart-manual.md | 14 +++++--------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/docs/observability/customization/blackbox-exporter-config.md b/docs/observability/customization/blackbox-exporter-config.md index 79e72f2..a44ce24 100644 --- a/docs/observability/customization/blackbox-exporter-config.md +++ b/docs/observability/customization/blackbox-exporter-config.md @@ -16,7 +16,7 @@ You will need to create a new file, and then mount it over the existing provided 2. Add the existing defaults -``` +```yaml modules: http_get_200: prober: http @@ -31,7 +31,7 @@ modules: ``` 3. Add your own module to the modules in that file -``` +```yaml http_2xx_custom: prober: http timeout: 5s @@ -55,7 +55,7 @@ This example adds a module named `http_2xx_custom` that adds some basic auth cre In your probe YAML file, reference the module in the `module` field of the `labels` section: -``` +```yaml - targets: - https://myservice.example.com/health labels: @@ -66,7 +66,7 @@ In your probe YAML file, reference the module in the `module` field of the `labe ### 3. Mount the config file You lastly need to mount the new config file and refer to it in docker compose -``` +```yaml alloy: image: cogstacksystems/cogstack-observability-alloy:latest ... diff --git a/docs/observability/reference/quickstart-manual.md b/docs/observability/reference/quickstart-manual.md index 9349606..bbf00f6 100644 --- a/docs/observability/reference/quickstart-manual.md +++ b/docs/observability/reference/quickstart-manual.md @@ -1,5 +1,4 @@ # Manual Quickstart -//TODO The quickstart page uses a script to setup the folders for you. This page instead details how to do it manually, to provide clarity. @@ -7,16 +6,13 @@ This page instead details how to do it manually, to provide clarity. ## Step 1: Setup directory Create the necessary directory structure ``` -mkdir -p observability-simple/prometheus/scrape-configs/probers -mkdir -p observability-simple/prometheus/scrape-configs/exporters +mkdir -p observability-simple/alloy/probers ``` - Something - -Download these two files from github, and place in the right folder +Download these two files, and place in the right folder -- [docker-compose.yml](https://raw.githubusercontent.com/CogStack/cogstack-platform-toolkit/main/observability/examples/simple/docker-compose.yml) in observability-simple/ -- [prometheus/scrape-configs/probers/probe-simple.yml](https://raw.githubusercontent.com/CogStack/cogstack-platform-toolkit/main/observability/examples/simple/prometheus/scrape-configs/probers/probe-simple.yml) +- [docker-compose.yml](../../../observability/examples/simple/docker-compose.yml) in observability-simple/ +- [probe-observability.yml](../../../observability/examples/simple/alloy/probers/probe-observability.yml) into observability-simple/alloy/probers ### Step 2: Start the stack @@ -29,7 +25,7 @@ docker compose up -d ### Step 3: Access the dashboards Open your web browser and go to: -`localhost/grafana` +`http://localhost/grafana` You should see the Grafana dashboard displaying the availability of the sample web page. From 5d75f2f15f500adc8c79f0fa5e5976292712841c Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 10 Jun 2025 14:40:15 +0000 Subject: [PATCH 8/9] Minor cleanup of docs --- docs/observability/setup/telemetry.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/observability/setup/telemetry.md b/docs/observability/setup/telemetry.md index 121af5b..d4f1804 100644 --- a/docs/observability/setup/telemetry.md +++ b/docs/observability/setup/telemetry.md @@ -12,7 +12,7 @@ Grafana Alloy is used to get telemetry. These features are configured by default ## How to get Telemetry -- Copy this docker compose file: [exporters.docker-compose.yml](observability/examples/full/exporters.docker-compose.yml) +- Copy this docker compose file: [exporters.docker-compose.yml](../../../observability/examples/full/exporters.docker-compose.yml) - Edit the environment variables to point to your prometheus URL - Run `docker compose -f exporters.docker-compose.yml up -d ` on every VM you want metrics from @@ -20,7 +20,7 @@ Grafana Alloy is used to get telemetry. These features are configured by default ### Elastic Search Metrics To get elasticsearch metrics we have to mount an alloy config file into the image. -- Copy this docker compose file: [exporters.elastic.docker-compose.yml](observability/examples/full/exporters.elastic.docker-compose.yml) +- Copy this docker compose file: [exporters.elastic.docker-compose.yml](../../../observability/examples/full/exporters.elastic.docker-compose.yml) - Copy this configuration file [elasticsearch.alloy](../../../observability/examples/full/alloy/elasticsearch.alloy) into `alloy/elasticsearch.alloy` In the docker compose file, we can see there are two changes to the usual exporter: From 089f0b6474fbc029fef439c46fc50cf5d2d23bc1 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 10 Jun 2025 14:43:28 +0000 Subject: [PATCH 9/9] Minor cleanup of docs --- docs/observability/setup/telemetry.md | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/docs/observability/setup/telemetry.md b/docs/observability/setup/telemetry.md index d4f1804..9c7401b 100644 --- a/docs/observability/setup/telemetry.md +++ b/docs/observability/setup/telemetry.md @@ -12,9 +12,23 @@ Grafana Alloy is used to get telemetry. These features are configured by default ## How to get Telemetry +We have to run Grafana Alloy on every single VM to get telemetry. + +Alloy is setup to push metrics to a central prometheus instance. + - Copy this docker compose file: [exporters.docker-compose.yml](../../../observability/examples/full/exporters.docker-compose.yml) -- Edit the environment variables to point to your prometheus URL -- Run `docker compose -f exporters.docker-compose.yml up -d ` on every VM you want metrics from +- Edit the environment variables to point to your prometheus URL: + +```yaml + environment: + - PROMETHEUS_URL=http://some-host:9090/prometheus # The URL that is running prometheus. + - ALLOY_HOSTNAME=${ALLOY_HOSTNAME-localhost} # Used to add a label to metrics + - ALLOY_IP_ADDRESS=${ALLOY_IP_ADDRESS-localhost} # Used to add a label to metrics +``` + +Now you have the setup, you will have to run this on every VM you want metrics from. This is a good opportunity to look into orchestrating deployments from a central place. + +- On each specific VM, run `docker compose -f exporters.docker-compose.yml up -d ` ### Elastic Search Metrics